diff --git a/.env/.dev b/.env/.dev new file mode 100644 index 0000000..1c6add2 --- /dev/null +++ b/.env/.dev @@ -0,0 +1,10 @@ +MT_API_CONFIG=/app/app/config.json +MT_API_DEVICE=cpu #or gpu, if so make runtime:nvidia +MT_API_THREADS=16 +MODELS_ROOT=/models +#NVIDIA_VISIBLE_DEVICES=all +#NVIDIA_DRIVER_CAPABILITIES=all + +FASTAPI_CONFIG=development +CELERY_BROKER_URL=redis://redis:6379/0 +CELERY_RESULT_BACKEND=redis://redis:6379/0 diff --git a/.env/.prod-sample b/.env/.prod-sample new file mode 100644 index 0000000..77f2dbd --- /dev/null +++ b/.env/.prod-sample @@ -0,0 +1,13 @@ +MT_API_CONFIG=/app/app/config.json +MT_API_DEVICE=cpu #or gpu, if so make runtime:nvidia +MT_API_THREADS=16 +MODELS_ROOT=/models +#NVIDIA_VISIBLE_DEVICES=all +#NVIDIA_DRIVER_CAPABILITIES=all + +FASTAPI_CONFIG=production +CELERY_BROKER_URL=redis://redis:6379/0 +CELERY_RESULT_BACKEND=redis://redis:6379/0 +CELERY_MIN_WORKERS=1 +CELERY_MAX_WORKERS=2 +GUNICORN_WORKERS=1 diff --git a/.gitignore b/.gitignore index 1bd436d..52c2358 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ models/* .DS_Store app/.DS_Store +*.swp # Byte-compiled / optimized / DLL files __pycache__/ @@ -106,7 +107,7 @@ celerybeat.pid *.sage.py # Environments -.env +.env/.prod .venv env/ venv/ diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 1275718..0000000 --- a/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM python:3.8-slim - -# Project setup - -ENV VIRTUAL_ENV=/opt/venv - -RUN apt-get update && apt-get clean - -RUN python -m venv "$VIRTUAL_ENV" -ENV PATH="$VIRTUAL_ENV/bin:$PATH" -RUN pip install --quiet --upgrade pip && \ - pip install --quiet pip-tools -COPY ./requirements.txt /app/requirements.txt -RUN pip install -r /app/requirements.txt \ - && rm -rf /root/.cache/pip - -WORKDIR /app - -COPY ./app/nltk_pkg.py /app/nltk_pkg.py -RUN python /app/nltk_pkg.py diff --git a/app/__init__.py b/app/__init__.py index c867cf0..3f0c478 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,15 +1,28 @@ from fastapi import FastAPI from app.helpers.config import Config +from . import tasks def create_app() -> FastAPI: app = FastAPI() + from app.logging import configure_logging + + configure_logging() + + from app.celery_utils import create_celery + + app.celery_app = create_celery() + from app.views.v1.translate import translate_v1 app.include_router(translate_v1) + from app.views.v2.translate import translate_v2 + + app.include_router(translate_v2) + @app.on_event('startup') async def startup_event() -> None: config = Config(load_all_models=True) diff --git a/app/asgi.py b/app/asgi.py new file mode 100644 index 0000000..9d94d3e --- /dev/null +++ b/app/asgi.py @@ -0,0 +1,4 @@ +from app import create_app + +app = create_app() +celery = app.celery_app diff --git a/app/celery_utils.py b/app/celery_utils.py new file mode 100644 index 0000000..f06d3ee --- /dev/null +++ b/app/celery_utils.py @@ -0,0 +1,10 @@ +from celery import current_app as current_celery_app + +from app.config import settings + + +def create_celery(): + celery_app = current_celery_app + celery_app.config_from_object(settings, namespace="CELERY") + + return celery_app diff --git a/config.json b/app/config.json similarity index 100% rename from config.json rename to app/config.json diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..d77284e --- /dev/null +++ b/app/config.py @@ -0,0 +1,42 @@ +import os +import pathlib +from functools import lru_cache + + +class BaseConfig: + BASE_DIR = pathlib.Path(__file__).parent.parent + + CELERY_BROKER_URL: str = os.environ.get( + 'CELERY_BROKER_URL', 'redis://127.0.0.1:6379/0' + ) + result_backend: str = os.environ.get( + 'CELERY_RESULT_BACKEND', 'redis://127.0.0.1:6379/0' + ) + + +class DevelopmentConfig(BaseConfig): + pass + + +class ProductionConfig(BaseConfig): + pass + + +class TestingConfig(BaseConfig): + task_always_eager = True + + +@lru_cache() +def get_settings(): + config_cls_dict = { + 'development': DevelopmentConfig, + 'production': ProductionConfig, + 'testing': TestingConfig, + } + + config_name = os.environ.get('FASTAPI_CONFIG', 'development') + config_cls = config_cls_dict[config_name] + return config_cls() + + +settings = get_settings() diff --git a/app/helpers/config.py b/app/helpers/config.py index 364c009..3cd717b 100644 --- a/app/helpers/config.py +++ b/app/helpers/config.py @@ -24,7 +24,9 @@ def __init__( self, config_file: Optional[str] = None, config_data: Optional[Dict] = None, + model_id: Optional[str] = None, load_all_models: bool = False, + log_messages: bool = True, ): self.loaded_models: Dict = {} self.language_codes: Dict = {} @@ -32,17 +34,23 @@ def __init__( self.config_data: Dict = config_data or {} self.config_file: str = config_file or CONFIG_JSON_PATH self.load_all_models: bool = load_all_models + self.log_messages: List = log_messages + self.model_id: Optional[str] = model_id + self.models_for_loading: List = [] self.warnings: List[str] = [] self.messages: List[str] = [] if not config_data: - self._validate() + self._validate_config_file() + self._load_config_file() - if self.load_all_models or config_data: - self._load_language_codes() - self._load_all_models() - self._load_languages_list() + self._validate_models() + self._load_language_codes() + if config_data or load_all_models or model_id: + self._get_models_for_loading() + self._load_available_models() + self._load_languages_list() def map_lang_to_closest(self, lang: str) -> str: if lang in self.language_codes: @@ -64,11 +72,11 @@ def _get_model_path( MODELS_ROOT_DIR, model_config['model_path'] ) if not os.path.exists(model_dir): - model_dir = None self._log_warning( f'Model path {model_dir} not found for model {model_id}. ' "Can't load custom translation model or segmenters." ) + model_dir = None else: self._log_warning( f'Model path not specified for model {model_id}. ' @@ -94,24 +102,40 @@ def _is_valid_model_type(self, model_type: str) -> bool: return False return True - def _load_all_models(self) -> None: - for model_config in self.config_data['models']: - if not 'load' in model_config or not model_config['load']: - continue + def _get_models_for_loading(self): + load = self.config_data['models'] - # CONFIG CHECKS - if not self._is_valid_model_config(model_config): - continue + # Filter models for lazy loading only a specific model by `model_id` + if self.model_id is not None: + load = [ + m + for m in load + if get_model_id(m['src'], m['tgt']) == self.model_id + ] + if len(load) > 1: + load = load[:1] - if not self._is_valid_model_type(model_config['model_type']): - continue + if self.load_all_models: + load = [ + model for model in load if 'load' in model and model['load'] + ] + self.models_for_loading = load + + def _load_available_models(self) -> None: + for model_config in self.models_for_loading: try: self._load_model(model_config) except ModelLoadingException: continue def _load_model(self, model_config: Dict) -> None: + + if not self._is_valid_model_config( + model_config + ) or not self._is_valid_model_type(model_config['model_type']): + raise ModelLoadingException + src: str = model_config['src'] tgt: str = model_config['tgt'] alt_id: Optional[str] = model_config.get('alt') @@ -163,6 +187,28 @@ def _load_language_codes(self) -> None: "Language name spefication dictionary ('languages') not found in configuration." ) + def get_all_potential_languages(self) -> Dict: + languages = {} + for model_config in self.config_data['models']: + if model_config['model_type'] == 'ctranslator2': + model_path = model_config.get('model_path') + if model_path is None: + continue + model_dir = os.path.join(MODELS_ROOT_DIR, model_path) + if not os.path.exists(model_dir): + continue + source: str = model_config['src'] + target: str = model_config['tgt'] + alt_id: Optional[str] = model_config.get('alt') + model_id: str = get_model_id(source, target, alt_id) + if source not in languages: + languages[source] = {} + if target not in languages[source]: + languages[source][target] = [] + + languages[source][target].append(model_id) + return languages + def _load_languages_list(self) -> None: for model_id in self.loaded_models.keys(): if not (parsed_id := parse_model_id(model_id)): @@ -177,16 +223,18 @@ def _load_languages_list(self) -> None: self.languages_list[source][target].append(model_id) def _log_warning(self, msg: str) -> None: - logger.warning(msg) self.warnings.append(msg) + if self.log_messages: + logger.warning(msg) def _log_info(self, msg: str) -> None: - logger.info(msg) self.messages.append(msg) + if self.log_messages: + logger.info(msg) - def _validate(self) -> None: - self._validate_config_file() - self._validate_models() + def _load_config_file(self): + with open(self.config_file, 'r') as jsonfile: + self.config_data = json.load(jsonfile) def _validate_config_file(self) -> None: # Check if config file is there and well formatted @@ -198,7 +246,7 @@ def _validate_config_file(self) -> None: else: try: with open(self.config_file, 'r') as jsonfile: - self.config_data = json.load(jsonfile) + config_data = json.load(jsonfile) except json.decoder.JSONDecodeError: msg = 'Config file format broken. No models will be loaded.' logger.error(msg) diff --git a/app/logging.py b/app/logging.py new file mode 100644 index 0000000..9727801 --- /dev/null +++ b/app/logging.py @@ -0,0 +1,35 @@ +import logging +import logging.config + + +def configure_logging(): + logging_dict = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'verbose': { + 'format': '[ %(asctime)s ] %(levelname)s: %(message)s', + }, + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'formatter': 'verbose', + }, + }, + 'root': { + 'handlers': ['console'], + 'level': 'INFO', + }, + 'loggers': { + 'project': { + 'handlers': ['console'], + 'propagate': False, + }, + 'uvicorn.access': { + 'propagate': True, + }, + }, + } + + logging.config.dictConfig(logging_dict) diff --git a/app/nltk_pkg.py b/app/nltk_pkg.py index 7c5175f..54aa2e4 100644 --- a/app/nltk_pkg.py +++ b/app/nltk_pkg.py @@ -1,3 +1,3 @@ import nltk -nltk.download('punkt') +nltk.download('punkt', download_dir='/usr/local/share/nltk_data') diff --git a/requirements.txt b/app/requirements.txt similarity index 66% rename from requirements.txt rename to app/requirements.txt index b636378..30bb917 100644 --- a/requirements.txt +++ b/app/requirements.txt @@ -5,6 +5,9 @@ ctranslate2==2.12.0 sacremoses==0.0.47 nltk==3.7 sentencepiece==0.1.96 -torch==1.7.1 -torchvision==0.8.2 +torch==1.8.0 transformers==4.16.2 +celery==5.1.2 +redis==3.5.3 +watchgod==0.7 +gunicorn==20.1.0 diff --git a/app/tasks.py b/app/tasks.py new file mode 100644 index 0000000..2a3fba2 --- /dev/null +++ b/app/tasks.py @@ -0,0 +1,22 @@ +from celery import shared_task + +from app.helpers.config import Config +from app.utils.translate import translate_text + + +@shared_task +def translate_text_async(model_id, text): + config = Config(model_id=model_id, log_messages=False) + return translate_text(model_id, text) + + +@shared_task +def translate_batch_async(model_id, texts): + config = Config(model_id=model_id, log_messages=False) + + translated_batch = [] + for sentence in texts: + translation = translate_text(model_id, sentence) + translated_batch.append(translation) + + return translated_batch diff --git a/app/tests/api/v2/__init__.py b/app/tests/api/v2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/tests/api/v2/test_api_translate.py b/app/tests/api/v2/test_api_translate.py new file mode 100644 index 0000000..620f741 --- /dev/null +++ b/app/tests/api/v2/test_api_translate.py @@ -0,0 +1,47 @@ +import json + +from fastapi import status +from fastapi.testclient import TestClient + +from main import app +from app.helpers.config import Config +from app.tests.base_test_case import APIBaseTestCase + + +class TestTranslateApiV2(APIBaseTestCase): + API_VERSION = 2 + + def setup(self): + self.client = TestClient(app) + self.config = Config() + + def test_async_translate_text_valid_code(self): + options = { + 'src': 'en', + 'tgt': 'fr', + 'text': 'Hello there, how are you doing?', + } + response = self.client.post( + self.get_endpoint('/'), data=json.dumps(options) + ) + assert response.status_code == status.HTTP_200_OK + task_content = response.json() + assert task_content['status'] == 'SUCCESS' + + def test_async_batch_translate_text_valid_code(self): + options = { + 'src': 'en', + 'tgt': 'fr', + 'texts': ['Hello, what is your name?', 'How are you doing?'], + } + expected_translations = [ + 'Bonjour, quel est votre nom?', + 'Comment ça va?', + ] + response = self.client.post( + url=self.get_endpoint('/batch'), data=json.dumps(options) + ) + assert response.status_code == status.HTTP_200_OK + + task_content = response.json() + assert task_content['status'] == 'SUCCESS' diff --git a/app/tests/conftest.py b/app/tests/conftest.py new file mode 100644 index 0000000..c4f4d7f --- /dev/null +++ b/app/tests/conftest.py @@ -0,0 +1,13 @@ +import os + +import pytest + + +os.environ['FASTAPI_CONFIG'] = 'testing' + + +@pytest.fixture +def settings(): + from app.config import settings as _settings + + return _settings diff --git a/app/tests/test_tasks.py b/app/tests/test_tasks.py new file mode 100644 index 0000000..72816cf --- /dev/null +++ b/app/tests/test_tasks.py @@ -0,0 +1,25 @@ +from app.helpers.config import Config +from app.tasks import translate_batch_async, translate_text_async + + +def test_task_translate_text_async(): + options = { + 'model_id': 'en-fr', + 'text': 'Hello there, how are you doing?', + } + expected_translation = 'Bonjour, comment allez-vous?' + translation = translate_text_async(**options) + assert translation == expected_translation + + +def test_task_translate_batch_async(): + options = { + 'model_id': 'en-fr', + 'texts': ['Hello, what is your name?', 'How are you doing?'], + } + expected_translations = [ + 'Bonjour, quel est votre nom?', + 'Comment ça va?', + ] + translation = translate_batch_async(**options) + assert translation == expected_translations diff --git a/app/views/v1/translate.py b/app/views/v1/translate.py index b8d4f18..ce04bde 100644 --- a/app/views/v1/translate.py +++ b/app/views/v1/translate.py @@ -15,6 +15,7 @@ translate_v1 = APIRouter(prefix='/api/v1/translate') +@translate_v1.post('', status_code=status.HTTP_200_OK) @translate_v1.post('/', status_code=status.HTTP_200_OK) async def translate_sentence( request: TranslationRequest, @@ -64,6 +65,7 @@ async def translate_batch( return BatchTranslationResponse(translation=translated_batch) +@translate_v1.get('', status_code=status.HTTP_200_OK) @translate_v1.get('/', status_code=status.HTTP_200_OK) async def languages() -> LanguagesResponse: config = Config() diff --git a/app/views/v2/__init__.py b/app/views/v2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/views/v2/translate.py b/app/views/v2/translate.py new file mode 100644 index 0000000..981d873 --- /dev/null +++ b/app/views/v2/translate.py @@ -0,0 +1,59 @@ +from typing import Dict + +from fastapi import APIRouter, HTTPException, status +from celery.result import AsyncResult + +from app.helpers.config import Config +from app.utils.utils import get_model_id +from app.models.v1.translate import ( + BatchTranslationRequest, + BatchTranslationResponse, + LanguagesResponse, + TranslationRequest, + TranslationResponse, +) +from app.utils.translate import translate_text +from app.tasks import translate_text_async, translate_batch_async + + +translate_v2 = APIRouter(prefix='/api/v2/translate') + + +@translate_v2.post('', status_code=status.HTTP_200_OK) +@translate_v2.post('/', status_code=status.HTTP_200_OK) +async def translate_sentence_async(request: TranslationRequest): + model_id = get_model_id(request.src, request.tgt) + task = translate_text_async.delay(model_id, request.text) + return {'uid': task.id, 'status': task.status} + + +@translate_v2.post('/batch', status_code=status.HTTP_200_OK) +async def translate_batch( + request: BatchTranslationRequest, +): + model_id = get_model_id(request.src, request.tgt) + task = translate_batch_async.delay(model_id, request.texts) + return {'uid': task.id, 'status': task.status} + + +@translate_v2.get('', status_code=status.HTTP_200_OK) +@translate_v2.get('/', status_code=status.HTTP_200_OK) +async def languages() -> Dict: + config = Config() + return {'models': config.get_all_potential_languages()} + + +@translate_v2.get('/{uid}', status_code=status.HTTP_200_OK) +async def translate_sentence_async_result(uid): + result = AsyncResult(uid) + if result.successful(): + return TranslationResponse(translation=result.result) + return {'status': result.status, 'info': result.info} + + +@translate_v2.get('/batch/{uid}', status_code=status.HTTP_200_OK) +async def translate_batch_async_result(uid): + result = AsyncResult(uid) + if result.successful(): + return BatchTranslationResponse(translation=result.result) + return {'status': result.status, 'info': result.info} diff --git a/compose/dev/app/Dockerfile b/compose/dev/app/Dockerfile new file mode 100644 index 0000000..783dca9 --- /dev/null +++ b/compose/dev/app/Dockerfile @@ -0,0 +1,33 @@ +FROM python:3.8-slim + +# Project setup + +ENV VIRTUAL_ENV=/opt/venv \ + TWB_USER=twb \ + TWB_GROUP=twb + +RUN apt-get update \ + && apt-get install -y telnet netcat \ + && apt-get clean + +# Create local user TWB_USER +RUN adduser --disabled-password --gecos '' "$TWB_USER" + +RUN python -m venv "$VIRTUAL_ENV" +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN pip install --quiet --upgrade pip && \ + pip install --quiet pip-tools +COPY ./app/requirements.txt /app/requirements.txt +RUN pip install -r /app/requirements.txt \ + && rm -rf /root/.cache/pip + +COPY ./compose/dev/app/start.sh /start.sh +RUN chmod +x /start.sh + +COPY ./compose/dev/app/celery/worker/start.sh /start-celeryworker +RUN chmod +x /start-celeryworker + +WORKDIR /app + +COPY ./app/nltk_pkg.py /app/nltk_pkg.py +RUN python /app/nltk_pkg.py diff --git a/Dockerfile-gpu b/compose/dev/app/Dockerfile-gpu similarity index 78% rename from Dockerfile-gpu rename to compose/dev/app/Dockerfile-gpu index 6acdefa..a5a6c27 100644 --- a/Dockerfile-gpu +++ b/compose/dev/app/Dockerfile-gpu @@ -4,7 +4,12 @@ FROM nvidia/cuda:10.2-devel # Miniconda install copy-pasted from Miniconda's own Dockerfile reachable # at: https://github.com/ContinuumIO/docker-images/blob/master/miniconda3/debian/Dockerfile -ENV PATH /opt/conda/bin:$PATH +ENV PATH /opt/conda/bin:$PATH \ + TWB_USER=twb \ + TWB_GROUP=twb +# +# Create local user TWB_USER +RUN adduser --disabled-password --gecos '' "$TWB_USER" RUN apt-get update --fix-missing && \ apt-get install -y wget bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 git mercurial subversion && \ @@ -37,6 +42,11 @@ RUN apt-get update \ RUN pip install -r /app/requirements.txt \ && rm -rf /root/.cache/pip -COPY . /app/ +COPY ./compose/dev/app/start.sh /start.sh +RUN chmod +x /start.sh +COPY ./compose/dev/app/celery/worker/start.sh /start-celeryworker +RUN chmod +x /start-celeryworker + +COPY ./app/nltk_pkg.py /app/nltk_pkg.py RUN python /app/nltk_pkg.py diff --git a/compose/dev/app/celery/worker/start.sh b/compose/dev/app/celery/worker/start.sh new file mode 100644 index 0000000..3a0d348 --- /dev/null +++ b/compose/dev/app/celery/worker/start.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -o errexit +set -o nounset + +python main.py diff --git a/compose/dev/app/start.sh b/compose/dev/app/start.sh new file mode 100644 index 0000000..b912a95 --- /dev/null +++ b/compose/dev/app/start.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -o errexit +set -o pipefail +set -o nounset + +uvicorn main:app --reload --host 0.0.0.0 --port 8000 diff --git a/compose/prod/app/Dockerfile b/compose/prod/app/Dockerfile new file mode 100644 index 0000000..b3991d9 --- /dev/null +++ b/compose/prod/app/Dockerfile @@ -0,0 +1,30 @@ +FROM python:3.8-slim + +# Project setup + +ENV VIRTUAL_ENV=/opt/venv \ + TWB_USER=twb \ + TWB_GROUP=twb + +# Create local user TWB_USER +RUN adduser --disabled-password --gecos '' "$TWB_USER" + +RUN python -m venv "$VIRTUAL_ENV" +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN pip install --quiet --upgrade pip && \ + pip install --quiet pip-tools +COPY ./app/requirements.txt /app/requirements.txt +RUN pip install -r /app/requirements.txt \ + && rm -rf /root/.cache/pip + +COPY ./compose/prod/app/start.sh /start.sh +RUN chmod +x /start.sh + +COPY ./compose/prod/app/celery/worker/start.sh /start-celeryworker +RUN chmod +x /start-celeryworker + +COPY . /app +WORKDIR /app + +COPY ./app/nltk_pkg.py /app/nltk_pkg.py +RUN python /app/nltk_pkg.py diff --git a/compose/prod/app/Dockerfile-gpu b/compose/prod/app/Dockerfile-gpu new file mode 100644 index 0000000..1a8bc48 --- /dev/null +++ b/compose/prod/app/Dockerfile-gpu @@ -0,0 +1,53 @@ +# GPU setup +FROM nvidia/cuda:10.2-devel + +# Miniconda install copy-pasted from Miniconda's own Dockerfile reachable +# at: https://github.com/ContinuumIO/docker-images/blob/master/miniconda3/debian/Dockerfile + +ENV PATH /opt/conda/bin:$PATH \ + TWB_USER=twb \ + TWB_GROUP=twb + +# Create local user TWB_USER +RUN adduser --disabled-password --gecos '' "$TWB_USER" + +RUN apt-get update --fix-missing && \ + apt-get install -y wget bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 git mercurial subversion && \ + apt-get clean + +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + /opt/conda/bin/conda clean -tipsy && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + /opt/conda/bin/conda clean -afy + +# For CPU setup comment out above and remove comment in line below +# FROM python:3.8-slim + +# Project setup + +COPY ./requirements.txt /app/requirements.txt + +RUN apt-get update \ + && apt-get install gcc -y \ + && apt-get clean + +RUN pip install -r /app/requirements.txt \ + && rm -rf /root/.cache/pip + +COPY ./compose/prod/app/start.sh /start.sh +RUN chmod +x /start.sh + +COPY ./compose/prod/app/celery/worker/start.sh /start-celeryworker +RUN chmod +x /start-celeryworker + +COPY . /app +WORKDIR /app + +COPY ./app/nltk_pkg.py /app/nltk_pkg.py +RUN python /app/nltk_pkg.py diff --git a/compose/prod/app/celery/worker/start.sh b/compose/prod/app/celery/worker/start.sh new file mode 100644 index 0000000..4cf1db4 --- /dev/null +++ b/compose/prod/app/celery/worker/start.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -o errexit +set -o nounset + +celery -A app.asgi.celery worker \ + --loglevel=info \ + --max-tasks-per-child 1 \ + --autoscale ${CELERY_MIN_WORKERS},${CELERY_MAX_WORKERS} \ + --uid=${TWB_USER} \ + --gid=${TWB_GROUP} \ diff --git a/compose/prod/app/start.sh b/compose/prod/app/start.sh new file mode 100644 index 0000000..a5f62b3 --- /dev/null +++ b/compose/prod/app/start.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -o errexit +set -o pipefail +set -o nounset + +gunicorn app.asgi:app -w ${GUNICORN_WORKERS} \ + --worker-class uvicorn.workers.UvicornWorker \ + --bind 0.0.0.0:8000 \ + --chdir=/app \ + --user ${TWB_USER} \ + --group ${TWB_GROUP} diff --git a/compose/prod/nginx/Dockerfile b/compose/prod/nginx/Dockerfile new file mode 100644 index 0000000..4580e8d --- /dev/null +++ b/compose/prod/nginx/Dockerfile @@ -0,0 +1,4 @@ +FROM nginx:1.21-alpine + +RUN rm /etc/nginx/conf.d/default.conf +COPY nginx.conf /etc/nginx/conf.d diff --git a/compose/prod/nginx/nginx.conf b/compose/prod/nginx/nginx.conf new file mode 100644 index 0000000..8dcf022 --- /dev/null +++ b/compose/prod/nginx/nginx.conf @@ -0,0 +1,9 @@ +server { + listen 80; + location / { + proxy_pass http://mt:8000; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Host $host; + proxy_redirect off; + } +} diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml new file mode 100644 index 0000000..346498d --- /dev/null +++ b/docker-compose.prod.yml @@ -0,0 +1,42 @@ +version: '3.7' + +services: + nginx: + build: ./compose/prod/nginx + ports: + - 80:80 + depends_on: + - mt + + mt: + build: + context: . + dockerfile: ./compose/prod/app/Dockerfile + # dockerfile: ./compose/prod/app/Dockerfile-gpu + image: twb_mt_prod + command: /start.sh + restart: unless-stopped + # runtime: nvidia # Comment out in local + volumes: + - /translation-models:/models + env_file: + - .env/.prod-sample + depends_on: + - redis + + redis: + image: redis:6-alpine + + celery_worker: + build: + context: . + dockerfile: ./compose/prod/app/Dockerfile + # dockerfile: ./compose/prod/app/Dockerfile-gpu + image: celery_worker_prod + command: /start-celeryworker + volumes: + - /translation-models:/models + env_file: + - .env/.prod-sample + depends_on: + - redis diff --git a/docker-compose.yml b/docker-compose.yml index 3bf01f9..3524544 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,22 +1,39 @@ version: '3.7' services: - mt: - build: . - command: uvicorn main:app --reload --host 0.0.0.0 --port 8000 --log-config logging.yml + build: + context: . + dockerfile: ./compose/dev/app/Dockerfile + # dockerfile: ./compose/dev/app/Dockerfile-gpu + image: twb_mt_dev + command: /start.sh restart: unless-stopped # runtime: nvidia # Comment out in local ports: - 8001:8000 volumes: - .:/app - - ../translation-models:/app/models - environment: - - MT_API_CONFIG=/app/config.json - - MT_API_DEVICE=cpu #or gpu, if so make runtime:nvidia - - MT_API_THREADS=16 - - MODELS_ROOT=/app/models - - NVIDIA_VISIBLE_DEVICES=all - - NVIDIA_DRIVER_CAPABILITIES=all + - /translation-models:/models + env_file: + - .env/.dev + depends_on: + - redis + + redis: + image: redis:6-alpine + celery_worker: + build: + context: . + dockerfile: ./compose/dev/app/Dockerfile + # dockerfile: ./compose/dev/app/Dockerfile-gpu + image: celery_worker_dev + command: /start-celeryworker + volumes: + - .:/app + - /translation-models:/models + env_file: + - .env/.dev + depends_on: + - redis diff --git a/logging.yml b/logging.yml deleted file mode 100644 index e6a7e28..0000000 --- a/logging.yml +++ /dev/null @@ -1,32 +0,0 @@ -version: 1 -disable_existing_loggers: false - -formatters: - standard: - format: "[ %(asctime)s ] %(levelname)s: %(message)s" - simple: - format: "%(levelname)s: %(message)s" - -handlers: - console: - class: logging.StreamHandler - formatter: standard - stream: ext://sys.stdout - simple: - class: logging.StreamHandler - formatter: simple - stream: ext://sys.stdout - -loggers: - console_logger: - handlers: [simple] - level: DEBUG - propagate: false - uvicorn: - error: - propagate: true - -root: - level: INFO - handlers: [console] - propagate: no diff --git a/main.py b/main.py index 0a23b5a..9f828fe 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,30 @@ from app import create_app app = create_app() +celery = app.celery_app + + +def celery_worker(): + from watchgod import run_process + import subprocess + + def run_worker(): + subprocess.call( + [ + 'celery', + '-A', + 'main.celery', + 'worker', + '--loglevel=info', + '--max-tasks-per-child=1', + '--autoscale=1,2', + '--uid=twb', + '--gid=twb', + ] + ) + + run_process('./app', run_worker) + + +if __name__ == '__main__': + celery_worker()