From 4728113b3d7378aa149326088e0eeb53ced1ed96 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Thu, 10 Mar 2022 17:16:18 +0000 Subject: [PATCH 01/16] wip celery integration --- Dockerfile | 20 ---------- app/__init__.py | 11 ++++-- app/celery_utils.py | 11 ++++++ config.json => app/config.json | 0 app/config.py | 38 +++++++++++++++++++ app/requirements.txt | 15 ++++++++ app/tasks.py | 8 ++++ app/utils/.translate.py.swp | Bin 0 -> 12288 bytes app/utils/translate.py | 3 ++ app/views/v1/translate.py | 37 ++++++++++++++++++ compose/app/Dockerfile | 28 ++++++++++++++ Dockerfile-gpu => compose/app/Dockerfile-gpu | 0 compose/app/celery/worker/start.sh | 7 ++++ compose/app/start.sh | 8 ++++ docker-compose.yml | 37 ++++++++++++------ main.py | 16 ++++++++ requirements.txt | 10 ----- 17 files changed, 204 insertions(+), 45 deletions(-) delete mode 100644 Dockerfile create mode 100644 app/celery_utils.py rename config.json => app/config.json (100%) create mode 100644 app/config.py create mode 100644 app/requirements.txt create mode 100644 app/tasks.py create mode 100644 app/utils/.translate.py.swp create mode 100644 compose/app/Dockerfile rename Dockerfile-gpu => compose/app/Dockerfile-gpu (100%) create mode 100644 compose/app/celery/worker/start.sh create mode 100644 compose/app/start.sh delete mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 1275718..0000000 --- a/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM python:3.8-slim - -# Project setup - -ENV VIRTUAL_ENV=/opt/venv - -RUN apt-get update && apt-get clean - -RUN python -m venv "$VIRTUAL_ENV" -ENV PATH="$VIRTUAL_ENV/bin:$PATH" -RUN pip install --quiet --upgrade pip && \ - pip install --quiet pip-tools -COPY ./requirements.txt /app/requirements.txt -RUN pip install -r /app/requirements.txt \ - && rm -rf /root/.cache/pip - -WORKDIR /app - -COPY ./app/nltk_pkg.py /app/nltk_pkg.py -RUN python /app/nltk_pkg.py diff --git a/app/__init__.py b/app/__init__.py index c867cf0..0d4507b 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,17 +1,20 @@ from fastapi import FastAPI from app.helpers.config import Config +from . import tasks def create_app() -> FastAPI: app = FastAPI() - from app.views.v1.translate import translate_v1 + from app.celery_utils import create_celery + app.celery_app = create_celery() + from app.views.v1.translate import translate_v1 app.include_router(translate_v1) - @app.on_event('startup') - async def startup_event() -> None: - config = Config(load_all_models=True) + #@app.on_event('startup') + #async def startup_event() -> None: + # config = Config(load_all_models=True) return app diff --git a/app/celery_utils.py b/app/celery_utils.py new file mode 100644 index 0000000..0372256 --- /dev/null +++ b/app/celery_utils.py @@ -0,0 +1,11 @@ +from celery import current_app as current_celery_app + +from app.config import settings + + +def create_celery(): + celery_app = current_celery_app + celery_app.config_from_object(settings, namespace="CELERY") + + return celery_app + diff --git a/config.json b/app/config.json similarity index 100% rename from config.json rename to app/config.json diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..d665a28 --- /dev/null +++ b/app/config.py @@ -0,0 +1,38 @@ +import os +import pathlib +from functools import lru_cache + + +class BaseConfig: + BASE_DIR = pathlib.Path(__file__).parent.parent + + CELERY_BROKER_URL: str = os.environ.get("CELERY_BROKER_URL", "redis://127.0.0.1:6379/0") + CELERY_RESULT_BACKEND: str = os.environ.get("CELERY_RESULT_BACKEND", "redis://127.0.0.1:6379/0") + +class DevelopmentConfig(BaseConfig): + pass + + +class ProductionConfig(BaseConfig): + pass + + +class TestingConfig(BaseConfig): + pass + + +@lru_cache() +def get_settings(): + config_cls_dict = { + "development": DevelopmentConfig, + "production": ProductionConfig, + "testing": TestingConfig + } + + config_name = os.environ.get("FASTAPI_CONFIG", "development") + config_cls = config_cls_dict[config_name] + return config_cls() + + +settings = get_settings() + diff --git a/app/requirements.txt b/app/requirements.txt new file mode 100644 index 0000000..ebb3c26 --- /dev/null +++ b/app/requirements.txt @@ -0,0 +1,15 @@ +fastapi==0.65.2 +uvicorn==0.13.3 +httpx==0.16.1 +subword_nmt==0.3.6 +ctranslate2==1.16.2 +sacremoses==0.0.43 +nltk==3.5 +sentencepiece==0.1.94 +torch==1.7.1 +torchvision==0.8.2 +transformers==4.0.1 +celery==5.1.2 +redis==3.5.3 +flower==1.0.0 +watchgod==0.7 diff --git a/app/tasks.py b/app/tasks.py new file mode 100644 index 0000000..ba54968 --- /dev/null +++ b/app/tasks.py @@ -0,0 +1,8 @@ +from celery import shared_task + +from app.utils.translate import translate_text +from app.helpers.config import Config + +@shared_task +def translate_text_async(model_id, text): + t = translate_text(model_id, text) diff --git a/app/utils/.translate.py.swp b/app/utils/.translate.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..7666a1a911da1d19e2315593fcf5411ba05d2f2b GIT binary patch literal 12288 zcmeI2L2uJA6vsU-2*v=78wX@MK@zm4+W={r7zdDsI1GkV;80an+B$7U8e6u@R%rr> zGhYL^a|KR(1rQ$qF5E%l2G41dChJgkWawG?H;w)L-t+I-Rja1G^yuy;U3HfUw6la9 z-*DQq#3f(m3EAeVZ@SCc4wp~LY|QvGc8_;V|06Gsk0s1{h z$WPE`&?nGG&`W{4!-R&Aykjee`dm^)gR$>yW|NpnX3W@`;SP`dpy!6%>#}ZZz`HC|#^&G5tPv;J z3cAgPCAiXN6-+5EmDOA$Chz?~vWITF8NXKB)17{7y+F4nUUF2^Cw2Ev0>UZFoEJdG z#?eg%O}l8~#F5O1X;zvLmllb8;3e&Y3KZu;yn_mq!w~xj{4%qA*o4#qt*6VRF%fEa zvP3A4!clIYO{F!x(TXOA+iAIlSVFStazcT!?n!-$q0F{7r)M0pSd1e4_>85{r<|0K zwz3I|^#ufmj6MM*Cs1Uj@vyRvDl+YQph0EwsplcS_uZU&m^l@i&SZ8XZOw` z^~0g+uh->}jh74CA>x{*XXS6l9Ky%33UQu#QrTk?P7*5LbwISSF%;o;{83^%q|5xt zmX1Hz>@=4(ll&qzg^1rew0e`?7dqgP7dBuFHYfZTmwZ6s!FjjOLcyeRlNiB3aH*3( QiJ2aWAnMU%D0M~t0A36D^#A|> literal 0 HcmV?d00001 diff --git a/app/utils/translate.py b/app/utils/translate.py index a4b4b51..096563f 100644 --- a/app/utils/translate.py +++ b/app/utils/translate.py @@ -5,8 +5,11 @@ def translate_text(model_id: str, text: str) -> Optional[str]: config = Config() + print('***** vars(config)', str(vars(config)), flush=True) + if not model_id in config.loaded_models: + print('***** "im here"', str("im here"), flush=True) return None if config.loaded_models[model_id]['sentence_segmenter']: diff --git a/app/views/v1/translate.py b/app/views/v1/translate.py index b8d4f18..59b96e3 100644 --- a/app/views/v1/translate.py +++ b/app/views/v1/translate.py @@ -10,6 +10,7 @@ TranslationResponse, ) from app.utils.translate import translate_text +from app.tasks import translate_text_async translate_v1 = APIRouter(prefix='/api/v1/translate') @@ -38,6 +39,27 @@ async def translate_sentence( return TranslationResponse(translation=translation) +@translate_v1.post('/t', status_code=status.HTTP_200_OK) +async def translate_sentence_async(request: TranslationRequest): + config = Config() + + model_id = get_model_id( + config.map_lang_to_closest(request.src), + config.map_lang_to_closest(request.tgt), + request.alt, + ) + + if not model_id in config.loaded_models: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f'Language pair {model_id} is not supported.', + ) + + task = translate_text_async.delay(model_id, request.text) + + return {'uid': task.id} + + @translate_v1.post('/batch', status_code=status.HTTP_200_OK) async def translate_batch( request: BatchTranslationRequest, @@ -71,3 +93,18 @@ async def languages() -> LanguagesResponse: return LanguagesResponse( languages=config.language_codes, models=config.languages_list ) + + +@translate_v1.get('/hello', status_code=status.HTTP_200_OK) +async def say_hello(): + from app.tasks import hello + task = hello.delay() + return {'uid': task.id} + +@translate_v1.get('/{uid}', status_code=status.HTTP_200_OK) +async def say_hello_result(uid): + from celery.result import AsyncResult + result = AsyncResult(uid) + if result: + return result.result + diff --git a/compose/app/Dockerfile b/compose/app/Dockerfile new file mode 100644 index 0000000..78afca5 --- /dev/null +++ b/compose/app/Dockerfile @@ -0,0 +1,28 @@ +FROM python:3.8-slim + +# Project setup + +ENV VIRTUAL_ENV=/opt/venv + +RUN apt-get update \ + && apt-get install -y telnet netcat \ + && apt-get clean + +RUN python -m venv "$VIRTUAL_ENV" +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN pip install --quiet --upgrade pip && \ + pip install --quiet pip-tools +COPY ./app/requirements.txt /app/requirements.txt +RUN pip install -r /app/requirements.txt \ + && rm -rf /root/.cache/pip + +COPY ./compose/app/start.sh /start.sh +RUN chmod +x /start.sh + +COPY ./compose/app/celery/worker/start.sh /start-celeryworker +RUN chmod +x /start-celeryworker + +WORKDIR /app + +COPY ./app/nltk_pkg.py /app/nltk_pkg.py +RUN python /app/nltk_pkg.py diff --git a/Dockerfile-gpu b/compose/app/Dockerfile-gpu similarity index 100% rename from Dockerfile-gpu rename to compose/app/Dockerfile-gpu diff --git a/compose/app/celery/worker/start.sh b/compose/app/celery/worker/start.sh new file mode 100644 index 0000000..d9e6e68 --- /dev/null +++ b/compose/app/celery/worker/start.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -o errexit +set -o nounset + +#celery -A main.celery worker --loglevel=info +python main.py diff --git a/compose/app/start.sh b/compose/app/start.sh new file mode 100644 index 0000000..4be1710 --- /dev/null +++ b/compose/app/start.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -o errexit +set -o pipefail +set -o nounset + +uvicorn main:app --reload --reload-dir app --host 0.0.0.0 --port 8000 +#--log-config logging.yml diff --git a/docker-compose.yml b/docker-compose.yml index 3bf01f9..5b71b1e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,22 +1,37 @@ version: '3.7' services: - mt: - build: . - command: uvicorn main:app --reload --host 0.0.0.0 --port 8000 --log-config logging.yml + build: + context: . + dockerfile: ./compose/app/Dockerfile + image: twb_mt + command: /start.sh restart: unless-stopped # runtime: nvidia # Comment out in local ports: - 8001:8000 volumes: - .:/app - - ../translation-models:/app/models - environment: - - MT_API_CONFIG=/app/config.json - - MT_API_DEVICE=cpu #or gpu, if so make runtime:nvidia - - MT_API_THREADS=16 - - MODELS_ROOT=/app/models - - NVIDIA_VISIBLE_DEVICES=all - - NVIDIA_DRIVER_CAPABILITIES=all + - ../.vols/translation-models:/models + env_file: + - .env/.dev + depends_on: + - redis + + redis: + image: redis:6-alpine + celery_worker: + build: + context: . + dockerfile: ./compose/app/Dockerfile + image: celery_worker + command: /start-celeryworker + volumes: + - .:/app + - ../.vols/translation-models:/models + env_file: + - .env/.dev + depends_on: + - redis diff --git a/main.py b/main.py index 0a23b5a..053fcdd 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,19 @@ from app import create_app app = create_app() +celery = app.celery_app + +def celery_worker(): + from watchgod import run_process + import subprocess + + def run_worker(): + subprocess.call( + ["celery", "-A", "main.celery", "worker", "--loglevel=info"] + ) + + run_process("./app", run_worker) + + +if __name__ == "__main__": + celery_worker() diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index b636378..0000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -fastapi==0.73.0 -uvicorn==0.17.4 -subword_nmt==0.3.8 -ctranslate2==2.12.0 -sacremoses==0.0.47 -nltk==3.7 -sentencepiece==0.1.96 -torch==1.7.1 -torchvision==0.8.2 -transformers==4.16.2 From 3814162353afc9d4b22d96e14ca6775243c655af Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Thu, 10 Mar 2022 20:09:50 +0000 Subject: [PATCH 02/16] add v2 views, limit celery worker to manage memory usage, lazy load model for workers --- app/__init__.py | 3 ++ app/tasks.py | 15 ++++++- app/utils/.translate.py.swp | Bin 12288 -> 0 bytes app/utils/translate.py | 3 -- app/views/v1/translate.py | 38 ------------------ app/views/v2/__init__.py | 0 app/views/v2/translate.py | 76 ++++++++++++++++++++++++++++++++++++ main.py | 2 +- 8 files changed, 93 insertions(+), 44 deletions(-) delete mode 100644 app/utils/.translate.py.swp create mode 100644 app/views/v2/__init__.py create mode 100644 app/views/v2/translate.py diff --git a/app/__init__.py b/app/__init__.py index 0d4507b..bb513ac 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -13,6 +13,9 @@ def create_app() -> FastAPI: from app.views.v1.translate import translate_v1 app.include_router(translate_v1) + from app.views.v2.translate import translate_v2 + app.include_router(translate_v2) + #@app.on_event('startup') #async def startup_event() -> None: # config = Config(load_all_models=True) diff --git a/app/tasks.py b/app/tasks.py index ba54968..d08de9f 100644 --- a/app/tasks.py +++ b/app/tasks.py @@ -1,8 +1,19 @@ +import json + from celery import shared_task -from app.utils.translate import translate_text from app.helpers.config import Config +from app.utils.translate import translate_text +from app.settings import CONFIG_JSON_PATH @shared_task def translate_text_async(model_id, text): - t = translate_text(model_id, text) + with open(CONFIG_JSON_PATH, 'r') as f: + conf = json.loads(f.read()) + model_data = list(filter(lambda x: f'{x["src"]}-{x["tgt"]}' == model_id, conf['models'])) + config_data = { + **conf, + 'models': model_data + } + config = Config(config_data=config_data) + return translate_text(model_id, text) diff --git a/app/utils/.translate.py.swp b/app/utils/.translate.py.swp deleted file mode 100644 index 7666a1a911da1d19e2315593fcf5411ba05d2f2b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2L2uJA6vsU-2*v=78wX@MK@zm4+W={r7zdDsI1GkV;80an+B$7U8e6u@R%rr> zGhYL^a|KR(1rQ$qF5E%l2G41dChJgkWawG?H;w)L-t+I-Rja1G^yuy;U3HfUw6la9 z-*DQq#3f(m3EAeVZ@SCc4wp~LY|QvGc8_;V|06Gsk0s1{h z$WPE`&?nGG&`W{4!-R&Aykjee`dm^)gR$>yW|NpnX3W@`;SP`dpy!6%>#}ZZz`HC|#^&G5tPv;J z3cAgPCAiXN6-+5EmDOA$Chz?~vWITF8NXKB)17{7y+F4nUUF2^Cw2Ev0>UZFoEJdG z#?eg%O}l8~#F5O1X;zvLmllb8;3e&Y3KZu;yn_mq!w~xj{4%qA*o4#qt*6VRF%fEa zvP3A4!clIYO{F!x(TXOA+iAIlSVFStazcT!?n!-$q0F{7r)M0pSd1e4_>85{r<|0K zwz3I|^#ufmj6MM*Cs1Uj@vyRvDl+YQph0EwsplcS_uZU&m^l@i&SZ8XZOw` z^~0g+uh->}jh74CA>x{*XXS6l9Ky%33UQu#QrTk?P7*5LbwISSF%;o;{83^%q|5xt zmX1Hz>@=4(ll&qzg^1rew0e`?7dqgP7dBuFHYfZTmwZ6s!FjjOLcyeRlNiB3aH*3( QiJ2aWAnMU%D0M~t0A36D^#A|> diff --git a/app/utils/translate.py b/app/utils/translate.py index 096563f..a4b4b51 100644 --- a/app/utils/translate.py +++ b/app/utils/translate.py @@ -5,11 +5,8 @@ def translate_text(model_id: str, text: str) -> Optional[str]: config = Config() - print('***** vars(config)', str(vars(config)), flush=True) - if not model_id in config.loaded_models: - print('***** "im here"', str("im here"), flush=True) return None if config.loaded_models[model_id]['sentence_segmenter']: diff --git a/app/views/v1/translate.py b/app/views/v1/translate.py index 59b96e3..fe3f305 100644 --- a/app/views/v1/translate.py +++ b/app/views/v1/translate.py @@ -9,8 +9,6 @@ TranslationRequest, TranslationResponse, ) -from app.utils.translate import translate_text -from app.tasks import translate_text_async translate_v1 = APIRouter(prefix='/api/v1/translate') @@ -39,27 +37,6 @@ async def translate_sentence( return TranslationResponse(translation=translation) -@translate_v1.post('/t', status_code=status.HTTP_200_OK) -async def translate_sentence_async(request: TranslationRequest): - config = Config() - - model_id = get_model_id( - config.map_lang_to_closest(request.src), - config.map_lang_to_closest(request.tgt), - request.alt, - ) - - if not model_id in config.loaded_models: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f'Language pair {model_id} is not supported.', - ) - - task = translate_text_async.delay(model_id, request.text) - - return {'uid': task.id} - - @translate_v1.post('/batch', status_code=status.HTTP_200_OK) async def translate_batch( request: BatchTranslationRequest, @@ -93,18 +70,3 @@ async def languages() -> LanguagesResponse: return LanguagesResponse( languages=config.language_codes, models=config.languages_list ) - - -@translate_v1.get('/hello', status_code=status.HTTP_200_OK) -async def say_hello(): - from app.tasks import hello - task = hello.delay() - return {'uid': task.id} - -@translate_v1.get('/{uid}', status_code=status.HTTP_200_OK) -async def say_hello_result(uid): - from celery.result import AsyncResult - result = AsyncResult(uid) - if result: - return result.result - diff --git a/app/views/v2/__init__.py b/app/views/v2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/views/v2/translate.py b/app/views/v2/translate.py new file mode 100644 index 0000000..6c9a747 --- /dev/null +++ b/app/views/v2/translate.py @@ -0,0 +1,76 @@ +from fastapi import APIRouter, HTTPException, status + +from app.helpers.config import Config +from app.utils.utils import get_model_id +from app.models.v1.translate import ( + BatchTranslationRequest, + BatchTranslationResponse, + LanguagesResponse, + TranslationRequest, + TranslationResponse, +) +from app.utils.translate import translate_text +from app.tasks import translate_text_async + + +translate_v2 = APIRouter(prefix='/api/v2/translate') + + +@translate_v2.post('/', status_code=status.HTTP_200_OK) +async def translate_sentence_async(request: TranslationRequest): + config = Config() + + model_id = get_model_id(request.src, request.tgt) + + task = translate_text_async.delay(model_id, request.text) + + return {'uid': task.id, + 'status': task.status} + + +@translate_v2.post('/batch', status_code=status.HTTP_200_OK) +async def translate_batch( + request: BatchTranslationRequest, +) -> BatchTranslationResponse: + config = Config() + + model_id = get_model_id( + config.map_lang_to_closest(request.src), + config.map_lang_to_closest(request.tgt), + request.alt, + ) + + if not model_id in config.loaded_models: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f'Language pair {model_id} is not supported.', + ) + + translated_batch = [] + for sentence in request.texts: + translation = translate_text(model_id, sentence) + translated_batch.append(translation) + + return BatchTranslationResponse(translation=translated_batch) + + +@translate_v2.get('/', status_code=status.HTTP_200_OK) +async def languages() -> LanguagesResponse: + config = Config() + + return LanguagesResponse( + languages=config.language_codes, models=config.languages_list + ) + + +@translate_v2.get('/{uid}', status_code=status.HTTP_200_OK) +async def translation_async_result(uid): + from celery.result import AsyncResult + result = AsyncResult(uid) + if result.successful(): + return TranslationResponse(translation=result.result) + return { + 'status': result.status, + 'info': result.info + } + diff --git a/main.py b/main.py index 053fcdd..f184469 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ def celery_worker(): def run_worker(): subprocess.call( - ["celery", "-A", "main.celery", "worker", "--loglevel=info"] + ["celery", "-A", "main.celery", "worker", "--loglevel=info", "--max-tasks-per-child=1", "--autoscale=1,2"] ) run_process("./app", run_worker) From abb51a78e3e0540e336e7c19af92a0536ee41137 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Thu, 10 Mar 2022 20:21:17 +0000 Subject: [PATCH 03/16] black formatting --- app/__init__.py | 7 +++++-- app/celery_utils.py | 1 - app/config.py | 12 ++++++++---- app/tasks.py | 10 +++++----- app/views/v2/translate.py | 10 +++------- 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index bb513ac..5823bc9 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -8,16 +8,19 @@ def create_app() -> FastAPI: app = FastAPI() from app.celery_utils import create_celery + app.celery_app = create_celery() from app.views.v1.translate import translate_v1 + app.include_router(translate_v1) from app.views.v2.translate import translate_v2 + app.include_router(translate_v2) - #@app.on_event('startup') - #async def startup_event() -> None: + # @app.on_event('startup') + # async def startup_event() -> None: # config = Config(load_all_models=True) return app diff --git a/app/celery_utils.py b/app/celery_utils.py index 0372256..f06d3ee 100644 --- a/app/celery_utils.py +++ b/app/celery_utils.py @@ -8,4 +8,3 @@ def create_celery(): celery_app.config_from_object(settings, namespace="CELERY") return celery_app - diff --git a/app/config.py b/app/config.py index d665a28..56267ca 100644 --- a/app/config.py +++ b/app/config.py @@ -6,8 +6,13 @@ class BaseConfig: BASE_DIR = pathlib.Path(__file__).parent.parent - CELERY_BROKER_URL: str = os.environ.get("CELERY_BROKER_URL", "redis://127.0.0.1:6379/0") - CELERY_RESULT_BACKEND: str = os.environ.get("CELERY_RESULT_BACKEND", "redis://127.0.0.1:6379/0") + CELERY_BROKER_URL: str = os.environ.get( + "CELERY_BROKER_URL", "redis://127.0.0.1:6379/0" + ) + CELERY_RESULT_BACKEND: str = os.environ.get( + "CELERY_RESULT_BACKEND", "redis://127.0.0.1:6379/0" + ) + class DevelopmentConfig(BaseConfig): pass @@ -26,7 +31,7 @@ def get_settings(): config_cls_dict = { "development": DevelopmentConfig, "production": ProductionConfig, - "testing": TestingConfig + "testing": TestingConfig, } config_name = os.environ.get("FASTAPI_CONFIG", "development") @@ -35,4 +40,3 @@ def get_settings(): settings = get_settings() - diff --git a/app/tasks.py b/app/tasks.py index d08de9f..b4a89aa 100644 --- a/app/tasks.py +++ b/app/tasks.py @@ -6,14 +6,14 @@ from app.utils.translate import translate_text from app.settings import CONFIG_JSON_PATH + @shared_task def translate_text_async(model_id, text): with open(CONFIG_JSON_PATH, 'r') as f: conf = json.loads(f.read()) - model_data = list(filter(lambda x: f'{x["src"]}-{x["tgt"]}' == model_id, conf['models'])) - config_data = { - **conf, - 'models': model_data - } + model_data = list( + filter(lambda x: f'{x["src"]}-{x["tgt"]}' == model_id, conf['models']) + ) + config_data = {**conf, 'models': model_data} config = Config(config_data=config_data) return translate_text(model_id, text) diff --git a/app/views/v2/translate.py b/app/views/v2/translate.py index 6c9a747..a4ef9a9 100644 --- a/app/views/v2/translate.py +++ b/app/views/v2/translate.py @@ -24,8 +24,7 @@ async def translate_sentence_async(request: TranslationRequest): task = translate_text_async.delay(model_id, request.text) - return {'uid': task.id, - 'status': task.status} + return {'uid': task.id, 'status': task.status} @translate_v2.post('/batch', status_code=status.HTTP_200_OK) @@ -66,11 +65,8 @@ async def languages() -> LanguagesResponse: @translate_v2.get('/{uid}', status_code=status.HTTP_200_OK) async def translation_async_result(uid): from celery.result import AsyncResult + result = AsyncResult(uid) if result.successful(): return TranslationResponse(translation=result.result) - return { - 'status': result.status, - 'info': result.info - } - + return {'status': result.status, 'info': result.info} From 8222ccaca812a113295d04640afc237ecd067515 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Thu, 10 Mar 2022 22:22:12 +0000 Subject: [PATCH 04/16] lazy model loading, async batch translation, clean up --- app/__init__.py | 6 +++--- app/helpers/config.py | 29 +++++++++++++++++++------- app/requirements.txt | 17 +++++++-------- app/tasks.py | 23 +++++++++++--------- app/views/v1/translate.py | 1 + app/views/v2/translate.py | 44 +++++++++++++-------------------------- compose/app/start.sh | 3 +-- docker-compose.yml | 4 ++-- 8 files changed, 65 insertions(+), 62 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index 5823bc9..2b2acd5 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -19,8 +19,8 @@ def create_app() -> FastAPI: app.include_router(translate_v2) - # @app.on_event('startup') - # async def startup_event() -> None: - # config = Config(load_all_models=True) + @app.on_event('startup') + async def startup_event() -> None: + config = Config(load_all_models=True) return app diff --git a/app/helpers/config.py b/app/helpers/config.py index 364c009..99400f5 100644 --- a/app/helpers/config.py +++ b/app/helpers/config.py @@ -24,7 +24,9 @@ def __init__( self, config_file: Optional[str] = None, config_data: Optional[Dict] = None, + model_id: Optional[str] = None, load_all_models: bool = False, + log_messages: bool = True, ): self.loaded_models: Dict = {} self.language_codes: Dict = {} @@ -32,6 +34,8 @@ def __init__( self.config_data: Dict = config_data or {} self.config_file: str = config_file or CONFIG_JSON_PATH self.load_all_models: bool = load_all_models + self.log_messages = log_messages + self.model_id = model_id self.warnings: List[str] = [] self.messages: List[str] = [] @@ -39,10 +43,9 @@ def __init__( if not config_data: self._validate() - if self.load_all_models or config_data: - self._load_language_codes() - self._load_all_models() - self._load_languages_list() + self._load_language_codes() + self._load_all_models() + self._load_languages_list() def map_lang_to_closest(self, lang: str) -> str: if lang in self.language_codes: @@ -95,7 +98,17 @@ def _is_valid_model_type(self, model_type: str) -> bool: return True def _load_all_models(self) -> None: - for model_config in self.config_data['models']: + models_for_loading = self.config_data['models'] + + # Filter models for lazy loading only a specific model by `model_id` + if self.model_id is not None: + models_for_loading = [ + m + for m in models_for_loading + if get_model_id(m['src'], m['tgt']) == self.model_id + ] + + for model_config in models_for_loading: if not 'load' in model_config or not model_config['load']: continue @@ -177,12 +190,14 @@ def _load_languages_list(self) -> None: self.languages_list[source][target].append(model_id) def _log_warning(self, msg: str) -> None: - logger.warning(msg) self.warnings.append(msg) + if self.log_messages: + logger.warning(msg) def _log_info(self, msg: str) -> None: - logger.info(msg) self.messages.append(msg) + if self.log_messages: + logger.info(msg) def _validate(self) -> None: self._validate_config_file() diff --git a/app/requirements.txt b/app/requirements.txt index ebb3c26..0915427 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -1,14 +1,13 @@ -fastapi==0.65.2 -uvicorn==0.13.3 -httpx==0.16.1 -subword_nmt==0.3.6 -ctranslate2==1.16.2 -sacremoses==0.0.43 -nltk==3.5 -sentencepiece==0.1.94 +fastapi==0.73.0 +uvicorn==0.17.4 +subword_nmt==0.3.8 +ctranslate2==2.12.0 +sacremoses==0.0.47 +nltk==3.7 +sentencepiece==0.1.96 torch==1.7.1 torchvision==0.8.2 -transformers==4.0.1 +transformers==4.16.2 celery==5.1.2 redis==3.5.3 flower==1.0.0 diff --git a/app/tasks.py b/app/tasks.py index b4a89aa..2a3fba2 100644 --- a/app/tasks.py +++ b/app/tasks.py @@ -1,19 +1,22 @@ -import json - from celery import shared_task from app.helpers.config import Config from app.utils.translate import translate_text -from app.settings import CONFIG_JSON_PATH @shared_task def translate_text_async(model_id, text): - with open(CONFIG_JSON_PATH, 'r') as f: - conf = json.loads(f.read()) - model_data = list( - filter(lambda x: f'{x["src"]}-{x["tgt"]}' == model_id, conf['models']) - ) - config_data = {**conf, 'models': model_data} - config = Config(config_data=config_data) + config = Config(model_id=model_id, log_messages=False) return translate_text(model_id, text) + + +@shared_task +def translate_batch_async(model_id, texts): + config = Config(model_id=model_id, log_messages=False) + + translated_batch = [] + for sentence in texts: + translation = translate_text(model_id, sentence) + translated_batch.append(translation) + + return translated_batch diff --git a/app/views/v1/translate.py b/app/views/v1/translate.py index fe3f305..b8d4f18 100644 --- a/app/views/v1/translate.py +++ b/app/views/v1/translate.py @@ -9,6 +9,7 @@ TranslationRequest, TranslationResponse, ) +from app.utils.translate import translate_text translate_v1 = APIRouter(prefix='/api/v1/translate') diff --git a/app/views/v2/translate.py b/app/views/v2/translate.py index a4ef9a9..4260b09 100644 --- a/app/views/v2/translate.py +++ b/app/views/v2/translate.py @@ -1,4 +1,5 @@ from fastapi import APIRouter, HTTPException, status +from celery.result import AsyncResult from app.helpers.config import Config from app.utils.utils import get_model_id @@ -10,7 +11,7 @@ TranslationResponse, ) from app.utils.translate import translate_text -from app.tasks import translate_text_async +from app.tasks import translate_text_async, translate_batch_async translate_v2 = APIRouter(prefix='/api/v2/translate') @@ -18,39 +19,18 @@ @translate_v2.post('/', status_code=status.HTTP_200_OK) async def translate_sentence_async(request: TranslationRequest): - config = Config() - model_id = get_model_id(request.src, request.tgt) - task = translate_text_async.delay(model_id, request.text) - return {'uid': task.id, 'status': task.status} @translate_v2.post('/batch', status_code=status.HTTP_200_OK) async def translate_batch( request: BatchTranslationRequest, -) -> BatchTranslationResponse: - config = Config() - - model_id = get_model_id( - config.map_lang_to_closest(request.src), - config.map_lang_to_closest(request.tgt), - request.alt, - ) - - if not model_id in config.loaded_models: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f'Language pair {model_id} is not supported.', - ) - - translated_batch = [] - for sentence in request.texts: - translation = translate_text(model_id, sentence) - translated_batch.append(translation) - - return BatchTranslationResponse(translation=translated_batch) +): + model_id = get_model_id(request.src, request.tgt) + task = translate_batch_async.delay(model_id, request.texts) + return {'uid': task.id, 'status': task.status} @translate_v2.get('/', status_code=status.HTTP_200_OK) @@ -63,10 +43,16 @@ async def languages() -> LanguagesResponse: @translate_v2.get('/{uid}', status_code=status.HTTP_200_OK) -async def translation_async_result(uid): - from celery.result import AsyncResult - +async def translate_sentence_async_result(uid): result = AsyncResult(uid) if result.successful(): return TranslationResponse(translation=result.result) return {'status': result.status, 'info': result.info} + + +@translate_v2.get('/batch/{uid}', status_code=status.HTTP_200_OK) +async def translate_batch_async_result(uid): + result = AsyncResult(uid) + if result.successful(): + return BatchTranslationResponse(translation=result.result) + return {'status': result.status, 'info': result.info} diff --git a/compose/app/start.sh b/compose/app/start.sh index 4be1710..8601399 100644 --- a/compose/app/start.sh +++ b/compose/app/start.sh @@ -4,5 +4,4 @@ set -o errexit set -o pipefail set -o nounset -uvicorn main:app --reload --reload-dir app --host 0.0.0.0 --port 8000 -#--log-config logging.yml +uvicorn main:app --reload --host 0.0.0.0 --port 8000 --log-config logging.yml diff --git a/docker-compose.yml b/docker-compose.yml index 5b71b1e..e3e3a08 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: - 8001:8000 volumes: - .:/app - - ../.vols/translation-models:/models + - ../translation-models:/models env_file: - .env/.dev depends_on: @@ -30,7 +30,7 @@ services: command: /start-celeryworker volumes: - .:/app - - ../.vols/translation-models:/models + - ../translation-models:/models env_file: - .env/.dev depends_on: From b2a6527b4c31cdba97477a561632d35e3a1e5771 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Wed, 23 Mar 2022 19:46:22 +0000 Subject: [PATCH 05/16] use non-root user for celery worker and get rid of celery warnings --- app/config.py | 2 +- app/nltk_pkg.py | 2 +- compose/app/Dockerfile | 9 ++++++++- compose/app/celery/worker/start.sh | 8 +++++++- main.py | 17 ++++++++++++++--- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/app/config.py b/app/config.py index 56267ca..f468ce3 100644 --- a/app/config.py +++ b/app/config.py @@ -9,7 +9,7 @@ class BaseConfig: CELERY_BROKER_URL: str = os.environ.get( "CELERY_BROKER_URL", "redis://127.0.0.1:6379/0" ) - CELERY_RESULT_BACKEND: str = os.environ.get( + result_backend: str = os.environ.get( "CELERY_RESULT_BACKEND", "redis://127.0.0.1:6379/0" ) diff --git a/app/nltk_pkg.py b/app/nltk_pkg.py index 7c5175f..54aa2e4 100644 --- a/app/nltk_pkg.py +++ b/app/nltk_pkg.py @@ -1,3 +1,3 @@ import nltk -nltk.download('punkt') +nltk.download('punkt', download_dir='/usr/local/share/nltk_data') diff --git a/compose/app/Dockerfile b/compose/app/Dockerfile index 78afca5..4ed47f7 100644 --- a/compose/app/Dockerfile +++ b/compose/app/Dockerfile @@ -2,12 +2,19 @@ FROM python:3.8-slim # Project setup -ENV VIRTUAL_ENV=/opt/venv +ENV VIRTUAL_ENV=/opt/venv \ + UWSGI_USER=twb \ + UWSGI_GROUP=twb RUN apt-get update \ && apt-get install -y telnet netcat \ && apt-get clean +################################# +# Create local user UWSGI_USER` # +################################# +RUN adduser --disabled-password --gecos '' "$UWSGI_USER" + RUN python -m venv "$VIRTUAL_ENV" ENV PATH="$VIRTUAL_ENV/bin:$PATH" RUN pip install --quiet --upgrade pip && \ diff --git a/compose/app/celery/worker/start.sh b/compose/app/celery/worker/start.sh index d9e6e68..f818a01 100644 --- a/compose/app/celery/worker/start.sh +++ b/compose/app/celery/worker/start.sh @@ -3,5 +3,11 @@ set -o errexit set -o nounset -#celery -A main.celery worker --loglevel=info +#celery -A main.celery worker \ +# --loglevel=info \ +# --max-tasks-per-child 1 \ +# --autoscale 1,2 \ +# --uid=${UWSGI_USER} \ +# --gid=${UWSGI_GROUP} \ + python main.py diff --git a/main.py b/main.py index f184469..9f828fe 100644 --- a/main.py +++ b/main.py @@ -3,17 +3,28 @@ app = create_app() celery = app.celery_app + def celery_worker(): from watchgod import run_process import subprocess def run_worker(): subprocess.call( - ["celery", "-A", "main.celery", "worker", "--loglevel=info", "--max-tasks-per-child=1", "--autoscale=1,2"] + [ + 'celery', + '-A', + 'main.celery', + 'worker', + '--loglevel=info', + '--max-tasks-per-child=1', + '--autoscale=1,2', + '--uid=twb', + '--gid=twb', + ] ) - run_process("./app", run_worker) + run_process('./app', run_worker) -if __name__ == "__main__": +if __name__ == '__main__': celery_worker() From 5209ea842ffc8ca0ceeb0385027e44cf85ae495b Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Wed, 23 Mar 2022 22:14:09 +0000 Subject: [PATCH 06/16] wip tests for async v2 endpoints --- app/config.py | 14 +++---- app/tests/api/v2/__init__.py | 0 app/tests/api/v2/test_api_translate.py | 55 ++++++++++++++++++++++++++ app/tests/conftest.py | 12 ++++++ 4 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 app/tests/api/v2/__init__.py create mode 100644 app/tests/api/v2/test_api_translate.py create mode 100644 app/tests/conftest.py diff --git a/app/config.py b/app/config.py index f468ce3..d77284e 100644 --- a/app/config.py +++ b/app/config.py @@ -7,10 +7,10 @@ class BaseConfig: BASE_DIR = pathlib.Path(__file__).parent.parent CELERY_BROKER_URL: str = os.environ.get( - "CELERY_BROKER_URL", "redis://127.0.0.1:6379/0" + 'CELERY_BROKER_URL', 'redis://127.0.0.1:6379/0' ) result_backend: str = os.environ.get( - "CELERY_RESULT_BACKEND", "redis://127.0.0.1:6379/0" + 'CELERY_RESULT_BACKEND', 'redis://127.0.0.1:6379/0' ) @@ -23,18 +23,18 @@ class ProductionConfig(BaseConfig): class TestingConfig(BaseConfig): - pass + task_always_eager = True @lru_cache() def get_settings(): config_cls_dict = { - "development": DevelopmentConfig, - "production": ProductionConfig, - "testing": TestingConfig, + 'development': DevelopmentConfig, + 'production': ProductionConfig, + 'testing': TestingConfig, } - config_name = os.environ.get("FASTAPI_CONFIG", "development") + config_name = os.environ.get('FASTAPI_CONFIG', 'development') config_cls = config_cls_dict[config_name] return config_cls() diff --git a/app/tests/api/v2/__init__.py b/app/tests/api/v2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/tests/api/v2/test_api_translate.py b/app/tests/api/v2/test_api_translate.py new file mode 100644 index 0000000..39573a4 --- /dev/null +++ b/app/tests/api/v2/test_api_translate.py @@ -0,0 +1,55 @@ +import json + +from fastapi import status +from fastapi.testclient import TestClient + +from main import app +from app.helpers.config import Config + + +class TestTranslateApiV2: + API_VERSION = 2 + SERVICE = 'translate' + + def setup(self): + self.client = TestClient(app) + self.config_data = { + 'languages': {}, + 'models': [], + } + self.config = Config(config_data=self.config_data) + + def get_endpoint(self, endpoint: str = '/') -> str: + endpoint = f'/{endpoint}' if not endpoint.startswith('/') else endpoint + return f'/api/v{self.API_VERSION}/{self.SERVICE}{endpoint}' + + def test_async_translate_text_valid_code(self): + options = { + 'src': 'en', + 'tgt': 'fr', + 'text': 'Hello there, how are you doing?', + } + response = self.client.post( + self.get_endpoint('/'), data=json.dumps(options) + ) + assert response.status_code == status.HTTP_200_OK + task_content = response.json() + assert task_content['status'] == 'SUCCESS' + + def test_async_batch_translate_text_valid_code(self): + options = { + 'src': 'en', + 'tgt': 'fr', + 'texts': ['Hello, what is your name?', 'How are you doing?'], + } + expected_translations = [ + 'Bonjour, quel est votre nom?', + 'Comment ça va?', + ] + response = self.client.post( + url=self.get_endpoint('/batch'), data=json.dumps(options) + ) + assert response.status_code == status.HTTP_200_OK + + task_content = response.json() + assert task_content['status'] == 'SUCCESS' diff --git a/app/tests/conftest.py b/app/tests/conftest.py new file mode 100644 index 0000000..f5d301a --- /dev/null +++ b/app/tests/conftest.py @@ -0,0 +1,12 @@ +import os + +import pytest + + +os.environ['FASTAPI_CONFIG'] = 'testing' # noqa + + +@pytest.fixture +def settings(): + from app.config import settings as _settings + return _settings From 0e9f3c5e4781869b1c834db6237a5d3d57f659f3 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Thu, 24 Mar 2022 17:27:08 +0000 Subject: [PATCH 07/16] only load all models when `load_all_models` is `True` --- app/helpers/config.py | 3 ++- app/tests/api/v2/test_api_translate.py | 6 +----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/app/helpers/config.py b/app/helpers/config.py index 99400f5..6e2962d 100644 --- a/app/helpers/config.py +++ b/app/helpers/config.py @@ -44,7 +44,8 @@ def __init__( self._validate() self._load_language_codes() - self._load_all_models() + if self.load_all_models: + self._load_all_models() self._load_languages_list() def map_lang_to_closest(self, lang: str) -> str: diff --git a/app/tests/api/v2/test_api_translate.py b/app/tests/api/v2/test_api_translate.py index 39573a4..9ecf6d9 100644 --- a/app/tests/api/v2/test_api_translate.py +++ b/app/tests/api/v2/test_api_translate.py @@ -13,11 +13,7 @@ class TestTranslateApiV2: def setup(self): self.client = TestClient(app) - self.config_data = { - 'languages': {}, - 'models': [], - } - self.config = Config(config_data=self.config_data) + self.config = Config() def get_endpoint(self, endpoint: str = '/') -> str: endpoint = f'/{endpoint}' if not endpoint.startswith('/') else endpoint From 99b590e4d90265a594f3f36da65165f2086da5a3 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Thu, 24 Mar 2022 22:16:56 +0000 Subject: [PATCH 08/16] list all potential languages that can be lazy loaded for v2 language endpoint --- .gitignore | 1 + app/helpers/config.py | 78 ++++++++++++++++++-------- app/tests/api/v2/test_api_translate.py | 8 +-- app/tests/conftest.py | 1 + app/views/v2/translate.py | 9 ++- 5 files changed, 63 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index 1bd436d..7683be6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ models/* .DS_Store app/.DS_Store +*.swp # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/app/helpers/config.py b/app/helpers/config.py index 6e2962d..3cd717b 100644 --- a/app/helpers/config.py +++ b/app/helpers/config.py @@ -34,18 +34,22 @@ def __init__( self.config_data: Dict = config_data or {} self.config_file: str = config_file or CONFIG_JSON_PATH self.load_all_models: bool = load_all_models - self.log_messages = log_messages - self.model_id = model_id + self.log_messages: List = log_messages + self.model_id: Optional[str] = model_id + self.models_for_loading: List = [] self.warnings: List[str] = [] self.messages: List[str] = [] if not config_data: - self._validate() + self._validate_config_file() + self._load_config_file() + self._validate_models() self._load_language_codes() - if self.load_all_models: - self._load_all_models() + if config_data or load_all_models or model_id: + self._get_models_for_loading() + self._load_available_models() self._load_languages_list() def map_lang_to_closest(self, lang: str) -> str: @@ -68,11 +72,11 @@ def _get_model_path( MODELS_ROOT_DIR, model_config['model_path'] ) if not os.path.exists(model_dir): - model_dir = None self._log_warning( f'Model path {model_dir} not found for model {model_id}. ' "Can't load custom translation model or segmenters." ) + model_dir = None else: self._log_warning( f'Model path not specified for model {model_id}. ' @@ -98,34 +102,40 @@ def _is_valid_model_type(self, model_type: str) -> bool: return False return True - def _load_all_models(self) -> None: - models_for_loading = self.config_data['models'] + def _get_models_for_loading(self): + load = self.config_data['models'] # Filter models for lazy loading only a specific model by `model_id` if self.model_id is not None: - models_for_loading = [ + load = [ m - for m in models_for_loading + for m in load if get_model_id(m['src'], m['tgt']) == self.model_id ] + if len(load) > 1: + load = load[:1] - for model_config in models_for_loading: - if not 'load' in model_config or not model_config['load']: - continue - - # CONFIG CHECKS - if not self._is_valid_model_config(model_config): - continue + if self.load_all_models: + load = [ + model for model in load if 'load' in model and model['load'] + ] - if not self._is_valid_model_type(model_config['model_type']): - continue + self.models_for_loading = load + def _load_available_models(self) -> None: + for model_config in self.models_for_loading: try: self._load_model(model_config) except ModelLoadingException: continue def _load_model(self, model_config: Dict) -> None: + + if not self._is_valid_model_config( + model_config + ) or not self._is_valid_model_type(model_config['model_type']): + raise ModelLoadingException + src: str = model_config['src'] tgt: str = model_config['tgt'] alt_id: Optional[str] = model_config.get('alt') @@ -177,6 +187,28 @@ def _load_language_codes(self) -> None: "Language name spefication dictionary ('languages') not found in configuration." ) + def get_all_potential_languages(self) -> Dict: + languages = {} + for model_config in self.config_data['models']: + if model_config['model_type'] == 'ctranslator2': + model_path = model_config.get('model_path') + if model_path is None: + continue + model_dir = os.path.join(MODELS_ROOT_DIR, model_path) + if not os.path.exists(model_dir): + continue + source: str = model_config['src'] + target: str = model_config['tgt'] + alt_id: Optional[str] = model_config.get('alt') + model_id: str = get_model_id(source, target, alt_id) + if source not in languages: + languages[source] = {} + if target not in languages[source]: + languages[source][target] = [] + + languages[source][target].append(model_id) + return languages + def _load_languages_list(self) -> None: for model_id in self.loaded_models.keys(): if not (parsed_id := parse_model_id(model_id)): @@ -200,9 +232,9 @@ def _log_info(self, msg: str) -> None: if self.log_messages: logger.info(msg) - def _validate(self) -> None: - self._validate_config_file() - self._validate_models() + def _load_config_file(self): + with open(self.config_file, 'r') as jsonfile: + self.config_data = json.load(jsonfile) def _validate_config_file(self) -> None: # Check if config file is there and well formatted @@ -214,7 +246,7 @@ def _validate_config_file(self) -> None: else: try: with open(self.config_file, 'r') as jsonfile: - self.config_data = json.load(jsonfile) + config_data = json.load(jsonfile) except json.decoder.JSONDecodeError: msg = 'Config file format broken. No models will be loaded.' logger.error(msg) diff --git a/app/tests/api/v2/test_api_translate.py b/app/tests/api/v2/test_api_translate.py index 9ecf6d9..620f741 100644 --- a/app/tests/api/v2/test_api_translate.py +++ b/app/tests/api/v2/test_api_translate.py @@ -5,20 +5,16 @@ from main import app from app.helpers.config import Config +from app.tests.base_test_case import APIBaseTestCase -class TestTranslateApiV2: +class TestTranslateApiV2(APIBaseTestCase): API_VERSION = 2 - SERVICE = 'translate' def setup(self): self.client = TestClient(app) self.config = Config() - def get_endpoint(self, endpoint: str = '/') -> str: - endpoint = f'/{endpoint}' if not endpoint.startswith('/') else endpoint - return f'/api/v{self.API_VERSION}/{self.SERVICE}{endpoint}' - def test_async_translate_text_valid_code(self): options = { 'src': 'en', diff --git a/app/tests/conftest.py b/app/tests/conftest.py index f5d301a..1b65871 100644 --- a/app/tests/conftest.py +++ b/app/tests/conftest.py @@ -9,4 +9,5 @@ @pytest.fixture def settings(): from app.config import settings as _settings + return _settings diff --git a/app/views/v2/translate.py b/app/views/v2/translate.py index 4260b09..c4cec0b 100644 --- a/app/views/v2/translate.py +++ b/app/views/v2/translate.py @@ -1,3 +1,5 @@ +from typing import Dict + from fastapi import APIRouter, HTTPException, status from celery.result import AsyncResult @@ -34,12 +36,9 @@ async def translate_batch( @translate_v2.get('/', status_code=status.HTTP_200_OK) -async def languages() -> LanguagesResponse: +async def languages() -> Dict: config = Config() - - return LanguagesResponse( - languages=config.language_codes, models=config.languages_list - ) + return {'models': config.get_all_potential_languages()} @translate_v2.get('/{uid}', status_code=status.HTTP_200_OK) From 7f543a46e46fbe90f3f0ac596af266d9d02ea690 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Thu, 24 Mar 2022 22:41:49 +0000 Subject: [PATCH 09/16] add unit tests for celery tasks --- app/tests/test_tasks.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 app/tests/test_tasks.py diff --git a/app/tests/test_tasks.py b/app/tests/test_tasks.py new file mode 100644 index 0000000..2678d21 --- /dev/null +++ b/app/tests/test_tasks.py @@ -0,0 +1,24 @@ +from app.helpers.config import Config +from app.tasks import translate_batch_async, translate_text_async + + +def test_task_translate_text_async(): + options = { + 'model_id': 'en-fr', + 'text': 'Hello there, how are you doing?', + } + expected_translation = 'Bonjour, comment allez-vous?' + translation = translate_text_async(**options) + assert translation == expected_translation + +def test_task_translate_batch_async(): + options = { + 'model_id': 'en-fr', + 'texts': ['Hello, what is your name?', 'How are you doing?'], + } + expected_translations = [ + 'Bonjour, quel est votre nom?', + 'Comment ça va?', + ] + translation = translate_batch_async(**options) + assert translation == expected_translations From 35584d9b262bf3073a9e87ceb969bbeb61317ae8 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Tue, 5 Apr 2022 17:54:06 +0000 Subject: [PATCH 10/16] move logging config from yml to python dict --- app/__init__.py | 4 ++++ app/logging.py | 35 +++++++++++++++++++++++++++++++++++ app/tests/test_tasks.py | 1 + compose/app/start.sh | 2 +- logging.yml | 32 -------------------------------- 5 files changed, 41 insertions(+), 33 deletions(-) create mode 100644 app/logging.py delete mode 100644 logging.yml diff --git a/app/__init__.py b/app/__init__.py index 2b2acd5..3f0c478 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -7,6 +7,10 @@ def create_app() -> FastAPI: app = FastAPI() + from app.logging import configure_logging + + configure_logging() + from app.celery_utils import create_celery app.celery_app = create_celery() diff --git a/app/logging.py b/app/logging.py new file mode 100644 index 0000000..9727801 --- /dev/null +++ b/app/logging.py @@ -0,0 +1,35 @@ +import logging +import logging.config + + +def configure_logging(): + logging_dict = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'verbose': { + 'format': '[ %(asctime)s ] %(levelname)s: %(message)s', + }, + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'formatter': 'verbose', + }, + }, + 'root': { + 'handlers': ['console'], + 'level': 'INFO', + }, + 'loggers': { + 'project': { + 'handlers': ['console'], + 'propagate': False, + }, + 'uvicorn.access': { + 'propagate': True, + }, + }, + } + + logging.config.dictConfig(logging_dict) diff --git a/app/tests/test_tasks.py b/app/tests/test_tasks.py index 2678d21..72816cf 100644 --- a/app/tests/test_tasks.py +++ b/app/tests/test_tasks.py @@ -11,6 +11,7 @@ def test_task_translate_text_async(): translation = translate_text_async(**options) assert translation == expected_translation + def test_task_translate_batch_async(): options = { 'model_id': 'en-fr', diff --git a/compose/app/start.sh b/compose/app/start.sh index 8601399..b912a95 100644 --- a/compose/app/start.sh +++ b/compose/app/start.sh @@ -4,4 +4,4 @@ set -o errexit set -o pipefail set -o nounset -uvicorn main:app --reload --host 0.0.0.0 --port 8000 --log-config logging.yml +uvicorn main:app --reload --host 0.0.0.0 --port 8000 diff --git a/logging.yml b/logging.yml deleted file mode 100644 index e6a7e28..0000000 --- a/logging.yml +++ /dev/null @@ -1,32 +0,0 @@ -version: 1 -disable_existing_loggers: false - -formatters: - standard: - format: "[ %(asctime)s ] %(levelname)s: %(message)s" - simple: - format: "%(levelname)s: %(message)s" - -handlers: - console: - class: logging.StreamHandler - formatter: standard - stream: ext://sys.stdout - simple: - class: logging.StreamHandler - formatter: simple - stream: ext://sys.stdout - -loggers: - console_logger: - handlers: [simple] - level: DEBUG - propagate: false - uvicorn: - error: - propagate: true - -root: - level: INFO - handlers: [console] - propagate: no From 88adf5c556d5b0e5e6e42c7b8abd17d80eb33f33 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Tue, 5 Apr 2022 18:57:06 +0000 Subject: [PATCH 11/16] wip production requirements: nginx, gunicorn --- app/asgi.py | 4 ++ app/requirements.txt | 2 +- app/views/v2/translate.py | 2 + compose/{ => dev}/app/Dockerfile | 4 +- compose/{ => dev}/app/Dockerfile-gpu | 0 compose/{ => dev}/app/celery/worker/start.sh | 0 compose/{ => dev}/app/start.sh | 0 compose/prod/app/Dockerfile | 36 +++++++++++++++++ compose/prod/app/Dockerfile-gpu | 42 ++++++++++++++++++++ compose/prod/app/celery/worker/start.sh | 11 +++++ compose/prod/app/start.sh | 10 +++++ compose/prod/nginx/Dockerfile | 4 ++ compose/prod/nginx/nginx.conf | 13 ++++++ docker-compose.prod.yml | 40 +++++++++++++++++++ docker-compose.yml | 8 ++-- 15 files changed, 169 insertions(+), 7 deletions(-) create mode 100644 app/asgi.py rename compose/{ => dev}/app/Dockerfile (87%) rename compose/{ => dev}/app/Dockerfile-gpu (100%) rename compose/{ => dev}/app/celery/worker/start.sh (100%) rename compose/{ => dev}/app/start.sh (100%) create mode 100644 compose/prod/app/Dockerfile create mode 100644 compose/prod/app/Dockerfile-gpu create mode 100644 compose/prod/app/celery/worker/start.sh create mode 100644 compose/prod/app/start.sh create mode 100644 compose/prod/nginx/Dockerfile create mode 100644 compose/prod/nginx/nginx.conf create mode 100644 docker-compose.prod.yml diff --git a/app/asgi.py b/app/asgi.py new file mode 100644 index 0000000..9d94d3e --- /dev/null +++ b/app/asgi.py @@ -0,0 +1,4 @@ +from app import create_app + +app = create_app() +celery = app.celery_app diff --git a/app/requirements.txt b/app/requirements.txt index 0915427..b819b10 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -10,5 +10,5 @@ torchvision==0.8.2 transformers==4.16.2 celery==5.1.2 redis==3.5.3 -flower==1.0.0 watchgod==0.7 +gunicorn==20.1.0 diff --git a/app/views/v2/translate.py b/app/views/v2/translate.py index c4cec0b..981d873 100644 --- a/app/views/v2/translate.py +++ b/app/views/v2/translate.py @@ -19,6 +19,7 @@ translate_v2 = APIRouter(prefix='/api/v2/translate') +@translate_v2.post('', status_code=status.HTTP_200_OK) @translate_v2.post('/', status_code=status.HTTP_200_OK) async def translate_sentence_async(request: TranslationRequest): model_id = get_model_id(request.src, request.tgt) @@ -35,6 +36,7 @@ async def translate_batch( return {'uid': task.id, 'status': task.status} +@translate_v2.get('', status_code=status.HTTP_200_OK) @translate_v2.get('/', status_code=status.HTTP_200_OK) async def languages() -> Dict: config = Config() diff --git a/compose/app/Dockerfile b/compose/dev/app/Dockerfile similarity index 87% rename from compose/app/Dockerfile rename to compose/dev/app/Dockerfile index 4ed47f7..ef653d3 100644 --- a/compose/app/Dockerfile +++ b/compose/dev/app/Dockerfile @@ -23,10 +23,10 @@ COPY ./app/requirements.txt /app/requirements.txt RUN pip install -r /app/requirements.txt \ && rm -rf /root/.cache/pip -COPY ./compose/app/start.sh /start.sh +COPY ./compose/dev/app/start.sh /start.sh RUN chmod +x /start.sh -COPY ./compose/app/celery/worker/start.sh /start-celeryworker +COPY ./compose/dev/app/celery/worker/start.sh /start-celeryworker RUN chmod +x /start-celeryworker WORKDIR /app diff --git a/compose/app/Dockerfile-gpu b/compose/dev/app/Dockerfile-gpu similarity index 100% rename from compose/app/Dockerfile-gpu rename to compose/dev/app/Dockerfile-gpu diff --git a/compose/app/celery/worker/start.sh b/compose/dev/app/celery/worker/start.sh similarity index 100% rename from compose/app/celery/worker/start.sh rename to compose/dev/app/celery/worker/start.sh diff --git a/compose/app/start.sh b/compose/dev/app/start.sh similarity index 100% rename from compose/app/start.sh rename to compose/dev/app/start.sh diff --git a/compose/prod/app/Dockerfile b/compose/prod/app/Dockerfile new file mode 100644 index 0000000..0906a85 --- /dev/null +++ b/compose/prod/app/Dockerfile @@ -0,0 +1,36 @@ +FROM python:3.8-slim + +# Project setup + +ENV VIRTUAL_ENV=/opt/venv \ + UWSGI_USER=twb \ + UWSGI_GROUP=twb + +RUN apt-get update \ + && apt-get install -y telnet netcat \ + && apt-get clean + +################################# +# Create local user UWSGI_USER` # +################################# +RUN adduser --disabled-password --gecos '' "$UWSGI_USER" + +RUN python -m venv "$VIRTUAL_ENV" +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN pip install --quiet --upgrade pip && \ + pip install --quiet pip-tools +COPY ./app/requirements.txt /app/requirements.txt +RUN pip install -r /app/requirements.txt \ + && rm -rf /root/.cache/pip + +COPY ./compose/prod/app/start.sh /start.sh +RUN chmod +x /start.sh + +COPY ./compose/prod/app/celery/worker/start.sh /start-celeryworker +RUN chmod +x /start-celeryworker + +COPY . /app +WORKDIR /app + +COPY ./app/nltk_pkg.py /app/nltk_pkg.py +RUN python /app/nltk_pkg.py diff --git a/compose/prod/app/Dockerfile-gpu b/compose/prod/app/Dockerfile-gpu new file mode 100644 index 0000000..6acdefa --- /dev/null +++ b/compose/prod/app/Dockerfile-gpu @@ -0,0 +1,42 @@ +# GPU setup +FROM nvidia/cuda:10.2-devel + +# Miniconda install copy-pasted from Miniconda's own Dockerfile reachable +# at: https://github.com/ContinuumIO/docker-images/blob/master/miniconda3/debian/Dockerfile + +ENV PATH /opt/conda/bin:$PATH + +RUN apt-get update --fix-missing && \ + apt-get install -y wget bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 git mercurial subversion && \ + apt-get clean + +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + /opt/conda/bin/conda clean -tipsy && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + /opt/conda/bin/conda clean -afy + +# For CPU setup comment out above and remove comment in line below +# FROM python:3.8-slim + +# Project setup + +WORKDIR /app + +COPY ./requirements.txt /app/requirements.txt + +RUN apt-get update \ + && apt-get install gcc -y \ + && apt-get clean + +RUN pip install -r /app/requirements.txt \ + && rm -rf /root/.cache/pip + +COPY . /app/ + +RUN python /app/nltk_pkg.py diff --git a/compose/prod/app/celery/worker/start.sh b/compose/prod/app/celery/worker/start.sh new file mode 100644 index 0000000..f3f9cd1 --- /dev/null +++ b/compose/prod/app/celery/worker/start.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -o errexit +set -o nounset + +celery -A app.asgi.celery worker \ + --loglevel=info \ + --max-tasks-per-child 1 \ + --autoscale 1,2 \ + --uid=${UWSGI_USER} \ + --gid=${UWSGI_GROUP} \ diff --git a/compose/prod/app/start.sh b/compose/prod/app/start.sh new file mode 100644 index 0000000..7276992 --- /dev/null +++ b/compose/prod/app/start.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -o errexit +set -o pipefail +set -o nounset + +gunicorn app.asgi:app -w 1 \ + -k uvicorn.workers.UvicornWorker \ + --bind 0.0.0.0:8000 \ + --chdir=/app diff --git a/compose/prod/nginx/Dockerfile b/compose/prod/nginx/Dockerfile new file mode 100644 index 0000000..5f9269f --- /dev/null +++ b/compose/prod/nginx/Dockerfile @@ -0,0 +1,4 @@ +FROM nginx:1.19-alpine + +RUN rm /etc/nginx/conf.d/default.conf +COPY nginx.conf /etc/nginx/conf.d diff --git a/compose/prod/nginx/nginx.conf b/compose/prod/nginx/nginx.conf new file mode 100644 index 0000000..673a8df --- /dev/null +++ b/compose/prod/nginx/nginx.conf @@ -0,0 +1,13 @@ +upstream twb_mt { + server mt:8000; +} + +server { + listen 80; + location / { + proxy_pass http://twb_mt; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Host $host; + proxy_redirect off; + } +} diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml new file mode 100644 index 0000000..8fc56b1 --- /dev/null +++ b/docker-compose.prod.yml @@ -0,0 +1,40 @@ +version: '3.7' + +services: + nginx: + build: ./compose/prod/nginx + ports: + - 80:80 + depends_on: + - mt + + mt: + build: + context: . + dockerfile: ./compose/prod/app/Dockerfile + image: twb_mt + command: /start.sh + restart: unless-stopped + # runtime: nvidia # Comment out in local + volumes: + - /translation-models:/models + env_file: + - .env/.prod + depends_on: + - redis + + redis: + image: redis:6-alpine + + celery_worker: + build: + context: . + dockerfile: ./compose/prod/app/Dockerfile + image: celery_worker + command: /start-celeryworker + volumes: + - /translation-models:/models + env_file: + - .env/.prod + depends_on: + - redis diff --git a/docker-compose.yml b/docker-compose.yml index e3e3a08..06da2f4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,7 +4,7 @@ services: mt: build: context: . - dockerfile: ./compose/app/Dockerfile + dockerfile: ./compose/dev/app/Dockerfile image: twb_mt command: /start.sh restart: unless-stopped @@ -13,7 +13,7 @@ services: - 8001:8000 volumes: - .:/app - - ../translation-models:/models + - /translation-models:/models env_file: - .env/.dev depends_on: @@ -25,12 +25,12 @@ services: celery_worker: build: context: . - dockerfile: ./compose/app/Dockerfile + dockerfile: ./compose/dev/app/Dockerfile image: celery_worker command: /start-celeryworker volumes: - .:/app - - ../translation-models:/models + - /translation-models:/models env_file: - .env/.dev depends_on: From b7013411e5ef8d8a760fa6beae79276f80a254e1 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Tue, 5 Apr 2022 20:55:19 +0000 Subject: [PATCH 12/16] minor clean up --- app/tests/conftest.py | 2 +- compose/dev/app/celery/worker/start.sh | 7 ------- compose/prod/nginx/nginx.conf | 6 +----- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/app/tests/conftest.py b/app/tests/conftest.py index 1b65871..c4f4d7f 100644 --- a/app/tests/conftest.py +++ b/app/tests/conftest.py @@ -3,7 +3,7 @@ import pytest -os.environ['FASTAPI_CONFIG'] = 'testing' # noqa +os.environ['FASTAPI_CONFIG'] = 'testing' @pytest.fixture diff --git a/compose/dev/app/celery/worker/start.sh b/compose/dev/app/celery/worker/start.sh index f818a01..3a0d348 100644 --- a/compose/dev/app/celery/worker/start.sh +++ b/compose/dev/app/celery/worker/start.sh @@ -3,11 +3,4 @@ set -o errexit set -o nounset -#celery -A main.celery worker \ -# --loglevel=info \ -# --max-tasks-per-child 1 \ -# --autoscale 1,2 \ -# --uid=${UWSGI_USER} \ -# --gid=${UWSGI_GROUP} \ - python main.py diff --git a/compose/prod/nginx/nginx.conf b/compose/prod/nginx/nginx.conf index 673a8df..8dcf022 100644 --- a/compose/prod/nginx/nginx.conf +++ b/compose/prod/nginx/nginx.conf @@ -1,11 +1,7 @@ -upstream twb_mt { - server mt:8000; -} - server { listen 80; location / { - proxy_pass http://twb_mt; + proxy_pass http://mt:8000; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header Host $host; proxy_redirect off; From 05b3487b79f4491c2c622e73995afc0f70a8a829 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Tue, 5 Apr 2022 22:05:51 +0000 Subject: [PATCH 13/16] pick from latest changes in `master` --- app/requirements.txt | 3 +-- app/views/v1/translate.py | 2 ++ compose/dev/app/Dockerfile | 8 ++++---- compose/prod/app/Dockerfile | 12 ++++-------- compose/prod/app/celery/worker/start.sh | 4 ++-- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/app/requirements.txt b/app/requirements.txt index b819b10..30bb917 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -5,8 +5,7 @@ ctranslate2==2.12.0 sacremoses==0.0.47 nltk==3.7 sentencepiece==0.1.96 -torch==1.7.1 -torchvision==0.8.2 +torch==1.8.0 transformers==4.16.2 celery==5.1.2 redis==3.5.3 diff --git a/app/views/v1/translate.py b/app/views/v1/translate.py index b8d4f18..ce04bde 100644 --- a/app/views/v1/translate.py +++ b/app/views/v1/translate.py @@ -15,6 +15,7 @@ translate_v1 = APIRouter(prefix='/api/v1/translate') +@translate_v1.post('', status_code=status.HTTP_200_OK) @translate_v1.post('/', status_code=status.HTTP_200_OK) async def translate_sentence( request: TranslationRequest, @@ -64,6 +65,7 @@ async def translate_batch( return BatchTranslationResponse(translation=translated_batch) +@translate_v1.get('', status_code=status.HTTP_200_OK) @translate_v1.get('/', status_code=status.HTTP_200_OK) async def languages() -> LanguagesResponse: config = Config() diff --git a/compose/dev/app/Dockerfile b/compose/dev/app/Dockerfile index ef653d3..e7c2c97 100644 --- a/compose/dev/app/Dockerfile +++ b/compose/dev/app/Dockerfile @@ -3,17 +3,17 @@ FROM python:3.8-slim # Project setup ENV VIRTUAL_ENV=/opt/venv \ - UWSGI_USER=twb \ - UWSGI_GROUP=twb + TWB_USER=twb \ + TWB_GROUP=twb RUN apt-get update \ && apt-get install -y telnet netcat \ && apt-get clean ################################# -# Create local user UWSGI_USER` # +# Create local user TWB_USER` # ################################# -RUN adduser --disabled-password --gecos '' "$UWSGI_USER" +RUN adduser --disabled-password --gecos '' "$TWB_USER" RUN python -m venv "$VIRTUAL_ENV" ENV PATH="$VIRTUAL_ENV/bin:$PATH" diff --git a/compose/prod/app/Dockerfile b/compose/prod/app/Dockerfile index 0906a85..2a17f68 100644 --- a/compose/prod/app/Dockerfile +++ b/compose/prod/app/Dockerfile @@ -3,17 +3,13 @@ FROM python:3.8-slim # Project setup ENV VIRTUAL_ENV=/opt/venv \ - UWSGI_USER=twb \ - UWSGI_GROUP=twb - -RUN apt-get update \ - && apt-get install -y telnet netcat \ - && apt-get clean + TWB_USER=twb \ + TWB_GROUP=twb ################################# -# Create local user UWSGI_USER` # +# Create local user TWB_USER` # ################################# -RUN adduser --disabled-password --gecos '' "$UWSGI_USER" +RUN adduser --disabled-password --gecos '' "$TWB_USER" RUN python -m venv "$VIRTUAL_ENV" ENV PATH="$VIRTUAL_ENV/bin:$PATH" diff --git a/compose/prod/app/celery/worker/start.sh b/compose/prod/app/celery/worker/start.sh index f3f9cd1..3fcd9d0 100644 --- a/compose/prod/app/celery/worker/start.sh +++ b/compose/prod/app/celery/worker/start.sh @@ -7,5 +7,5 @@ celery -A app.asgi.celery worker \ --loglevel=info \ --max-tasks-per-child 1 \ --autoscale 1,2 \ - --uid=${UWSGI_USER} \ - --gid=${UWSGI_GROUP} \ + --uid=${TWB_USER} \ + --gid=${TWB_GROUP} \ From b6b510b08d8f530803c0d29ccd40b2c019c183ef Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Tue, 19 Apr 2022 21:23:01 +0000 Subject: [PATCH 14/16] add .env/.dev file --- .env/.dev | 10 ++++++++++ .gitignore | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 .env/.dev diff --git a/.env/.dev b/.env/.dev new file mode 100644 index 0000000..1c6add2 --- /dev/null +++ b/.env/.dev @@ -0,0 +1,10 @@ +MT_API_CONFIG=/app/app/config.json +MT_API_DEVICE=cpu #or gpu, if so make runtime:nvidia +MT_API_THREADS=16 +MODELS_ROOT=/models +#NVIDIA_VISIBLE_DEVICES=all +#NVIDIA_DRIVER_CAPABILITIES=all + +FASTAPI_CONFIG=development +CELERY_BROKER_URL=redis://redis:6379/0 +CELERY_RESULT_BACKEND=redis://redis:6379/0 diff --git a/.gitignore b/.gitignore index 7683be6..ab4aac8 100644 --- a/.gitignore +++ b/.gitignore @@ -107,7 +107,7 @@ celerybeat.pid *.sage.py # Environments -.env +.env/*prod* .venv env/ venv/ From 1342657a7470f737f18c4ad1e2005bf223340fb1 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Tue, 19 Apr 2022 22:03:53 +0000 Subject: [PATCH 15/16] update nginx version, use env vars for prod celery and gunicorn worker counts --- .env/.prod-sample | 13 +++++++++++++ .gitignore | 2 +- compose/prod/app/Dockerfile | 4 +--- compose/prod/app/celery/worker/start.sh | 2 +- compose/prod/app/start.sh | 8 +++++--- compose/prod/nginx/Dockerfile | 2 +- docker-compose.prod.yml | 4 ++-- 7 files changed, 24 insertions(+), 11 deletions(-) create mode 100644 .env/.prod-sample diff --git a/.env/.prod-sample b/.env/.prod-sample new file mode 100644 index 0000000..77f2dbd --- /dev/null +++ b/.env/.prod-sample @@ -0,0 +1,13 @@ +MT_API_CONFIG=/app/app/config.json +MT_API_DEVICE=cpu #or gpu, if so make runtime:nvidia +MT_API_THREADS=16 +MODELS_ROOT=/models +#NVIDIA_VISIBLE_DEVICES=all +#NVIDIA_DRIVER_CAPABILITIES=all + +FASTAPI_CONFIG=production +CELERY_BROKER_URL=redis://redis:6379/0 +CELERY_RESULT_BACKEND=redis://redis:6379/0 +CELERY_MIN_WORKERS=1 +CELERY_MAX_WORKERS=2 +GUNICORN_WORKERS=1 diff --git a/.gitignore b/.gitignore index ab4aac8..52c2358 100644 --- a/.gitignore +++ b/.gitignore @@ -107,7 +107,7 @@ celerybeat.pid *.sage.py # Environments -.env/*prod* +.env/.prod .venv env/ venv/ diff --git a/compose/prod/app/Dockerfile b/compose/prod/app/Dockerfile index 2a17f68..b3991d9 100644 --- a/compose/prod/app/Dockerfile +++ b/compose/prod/app/Dockerfile @@ -6,9 +6,7 @@ ENV VIRTUAL_ENV=/opt/venv \ TWB_USER=twb \ TWB_GROUP=twb -################################# -# Create local user TWB_USER` # -################################# +# Create local user TWB_USER RUN adduser --disabled-password --gecos '' "$TWB_USER" RUN python -m venv "$VIRTUAL_ENV" diff --git a/compose/prod/app/celery/worker/start.sh b/compose/prod/app/celery/worker/start.sh index 3fcd9d0..4cf1db4 100644 --- a/compose/prod/app/celery/worker/start.sh +++ b/compose/prod/app/celery/worker/start.sh @@ -6,6 +6,6 @@ set -o nounset celery -A app.asgi.celery worker \ --loglevel=info \ --max-tasks-per-child 1 \ - --autoscale 1,2 \ + --autoscale ${CELERY_MIN_WORKERS},${CELERY_MAX_WORKERS} \ --uid=${TWB_USER} \ --gid=${TWB_GROUP} \ diff --git a/compose/prod/app/start.sh b/compose/prod/app/start.sh index 7276992..a5f62b3 100644 --- a/compose/prod/app/start.sh +++ b/compose/prod/app/start.sh @@ -4,7 +4,9 @@ set -o errexit set -o pipefail set -o nounset -gunicorn app.asgi:app -w 1 \ - -k uvicorn.workers.UvicornWorker \ +gunicorn app.asgi:app -w ${GUNICORN_WORKERS} \ + --worker-class uvicorn.workers.UvicornWorker \ --bind 0.0.0.0:8000 \ - --chdir=/app + --chdir=/app \ + --user ${TWB_USER} \ + --group ${TWB_GROUP} diff --git a/compose/prod/nginx/Dockerfile b/compose/prod/nginx/Dockerfile index 5f9269f..4580e8d 100644 --- a/compose/prod/nginx/Dockerfile +++ b/compose/prod/nginx/Dockerfile @@ -1,4 +1,4 @@ -FROM nginx:1.19-alpine +FROM nginx:1.21-alpine RUN rm /etc/nginx/conf.d/default.conf COPY nginx.conf /etc/nginx/conf.d diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 8fc56b1..9779174 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -19,7 +19,7 @@ services: volumes: - /translation-models:/models env_file: - - .env/.prod + - .env/.prod-sample depends_on: - redis @@ -35,6 +35,6 @@ services: volumes: - /translation-models:/models env_file: - - .env/.prod + - .env/.prod-sample depends_on: - redis From f19d61a11536ea426897ccea5cf7c6dff19acee3 Mon Sep 17 00:00:00 2001 From: Joshua Beretta Date: Tue, 19 Apr 2022 22:52:01 +0000 Subject: [PATCH 16/16] update gpu build requirements in dockerfiles --- compose/dev/app/Dockerfile | 4 +--- compose/dev/app/Dockerfile-gpu | 14 ++++++++++++-- compose/prod/app/Dockerfile-gpu | 19 +++++++++++++++---- docker-compose.prod.yml | 6 ++++-- docker-compose.yml | 6 ++++-- 5 files changed, 36 insertions(+), 13 deletions(-) diff --git a/compose/dev/app/Dockerfile b/compose/dev/app/Dockerfile index e7c2c97..783dca9 100644 --- a/compose/dev/app/Dockerfile +++ b/compose/dev/app/Dockerfile @@ -10,9 +10,7 @@ RUN apt-get update \ && apt-get install -y telnet netcat \ && apt-get clean -################################# -# Create local user TWB_USER` # -################################# +# Create local user TWB_USER RUN adduser --disabled-password --gecos '' "$TWB_USER" RUN python -m venv "$VIRTUAL_ENV" diff --git a/compose/dev/app/Dockerfile-gpu b/compose/dev/app/Dockerfile-gpu index 6acdefa..a5a6c27 100644 --- a/compose/dev/app/Dockerfile-gpu +++ b/compose/dev/app/Dockerfile-gpu @@ -4,7 +4,12 @@ FROM nvidia/cuda:10.2-devel # Miniconda install copy-pasted from Miniconda's own Dockerfile reachable # at: https://github.com/ContinuumIO/docker-images/blob/master/miniconda3/debian/Dockerfile -ENV PATH /opt/conda/bin:$PATH +ENV PATH /opt/conda/bin:$PATH \ + TWB_USER=twb \ + TWB_GROUP=twb +# +# Create local user TWB_USER +RUN adduser --disabled-password --gecos '' "$TWB_USER" RUN apt-get update --fix-missing && \ apt-get install -y wget bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 git mercurial subversion && \ @@ -37,6 +42,11 @@ RUN apt-get update \ RUN pip install -r /app/requirements.txt \ && rm -rf /root/.cache/pip -COPY . /app/ +COPY ./compose/dev/app/start.sh /start.sh +RUN chmod +x /start.sh +COPY ./compose/dev/app/celery/worker/start.sh /start-celeryworker +RUN chmod +x /start-celeryworker + +COPY ./app/nltk_pkg.py /app/nltk_pkg.py RUN python /app/nltk_pkg.py diff --git a/compose/prod/app/Dockerfile-gpu b/compose/prod/app/Dockerfile-gpu index 6acdefa..1a8bc48 100644 --- a/compose/prod/app/Dockerfile-gpu +++ b/compose/prod/app/Dockerfile-gpu @@ -4,7 +4,12 @@ FROM nvidia/cuda:10.2-devel # Miniconda install copy-pasted from Miniconda's own Dockerfile reachable # at: https://github.com/ContinuumIO/docker-images/blob/master/miniconda3/debian/Dockerfile -ENV PATH /opt/conda/bin:$PATH +ENV PATH /opt/conda/bin:$PATH \ + TWB_USER=twb \ + TWB_GROUP=twb + +# Create local user TWB_USER +RUN adduser --disabled-password --gecos '' "$TWB_USER" RUN apt-get update --fix-missing && \ apt-get install -y wget bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 git mercurial subversion && \ @@ -26,8 +31,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 # Project setup -WORKDIR /app - COPY ./requirements.txt /app/requirements.txt RUN apt-get update \ @@ -37,6 +40,14 @@ RUN apt-get update \ RUN pip install -r /app/requirements.txt \ && rm -rf /root/.cache/pip -COPY . /app/ +COPY ./compose/prod/app/start.sh /start.sh +RUN chmod +x /start.sh + +COPY ./compose/prod/app/celery/worker/start.sh /start-celeryworker +RUN chmod +x /start-celeryworker + +COPY . /app +WORKDIR /app +COPY ./app/nltk_pkg.py /app/nltk_pkg.py RUN python /app/nltk_pkg.py diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 9779174..346498d 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -12,7 +12,8 @@ services: build: context: . dockerfile: ./compose/prod/app/Dockerfile - image: twb_mt + # dockerfile: ./compose/prod/app/Dockerfile-gpu + image: twb_mt_prod command: /start.sh restart: unless-stopped # runtime: nvidia # Comment out in local @@ -30,7 +31,8 @@ services: build: context: . dockerfile: ./compose/prod/app/Dockerfile - image: celery_worker + # dockerfile: ./compose/prod/app/Dockerfile-gpu + image: celery_worker_prod command: /start-celeryworker volumes: - /translation-models:/models diff --git a/docker-compose.yml b/docker-compose.yml index 06da2f4..3524544 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,8 @@ services: build: context: . dockerfile: ./compose/dev/app/Dockerfile - image: twb_mt + # dockerfile: ./compose/dev/app/Dockerfile-gpu + image: twb_mt_dev command: /start.sh restart: unless-stopped # runtime: nvidia # Comment out in local @@ -26,7 +27,8 @@ services: build: context: . dockerfile: ./compose/dev/app/Dockerfile - image: celery_worker + # dockerfile: ./compose/dev/app/Dockerfile-gpu + image: celery_worker_dev command: /start-celeryworker volumes: - .:/app