Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .env/.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
MT_API_CONFIG=/app/app/config.json
MT_API_DEVICE=cpu #or gpu, if so make runtime:nvidia
MT_API_THREADS=16
MODELS_ROOT=/models
#NVIDIA_VISIBLE_DEVICES=all
#NVIDIA_DRIVER_CAPABILITIES=all

FASTAPI_CONFIG=development
CELERY_BROKER_URL=redis://redis:6379/0
CELERY_RESULT_BACKEND=redis://redis:6379/0
13 changes: 13 additions & 0 deletions .env/.prod-sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
MT_API_CONFIG=/app/app/config.json
MT_API_DEVICE=cpu #or gpu, if so make runtime:nvidia
MT_API_THREADS=16
MODELS_ROOT=/models
#NVIDIA_VISIBLE_DEVICES=all
#NVIDIA_DRIVER_CAPABILITIES=all

FASTAPI_CONFIG=production
CELERY_BROKER_URL=redis://redis:6379/0
CELERY_RESULT_BACKEND=redis://redis:6379/0
CELERY_MIN_WORKERS=1
CELERY_MAX_WORKERS=2
GUNICORN_WORKERS=1
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
models/*
.DS_Store
app/.DS_Store
*.swp

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down Expand Up @@ -106,7 +107,7 @@ celerybeat.pid
*.sage.py

# Environments
.env
.env/.prod
.venv
env/
venv/
Expand Down
20 changes: 0 additions & 20 deletions Dockerfile

This file was deleted.

13 changes: 13 additions & 0 deletions app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
from fastapi import FastAPI

from app.helpers.config import Config
from . import tasks


def create_app() -> FastAPI:
app = FastAPI()

from app.logging import configure_logging

configure_logging()

from app.celery_utils import create_celery

app.celery_app = create_celery()

from app.views.v1.translate import translate_v1

app.include_router(translate_v1)

from app.views.v2.translate import translate_v2

app.include_router(translate_v2)

@app.on_event('startup')
async def startup_event() -> None:
config = Config(load_all_models=True)
Expand Down
4 changes: 4 additions & 0 deletions app/asgi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from app import create_app

app = create_app()
celery = app.celery_app
10 changes: 10 additions & 0 deletions app/celery_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from celery import current_app as current_celery_app

from app.config import settings


def create_celery():
celery_app = current_celery_app
celery_app.config_from_object(settings, namespace="CELERY")

return celery_app
File renamed without changes.
42 changes: 42 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
import pathlib
from functools import lru_cache


class BaseConfig:
BASE_DIR = pathlib.Path(__file__).parent.parent

CELERY_BROKER_URL: str = os.environ.get(
'CELERY_BROKER_URL', 'redis://127.0.0.1:6379/0'
)
result_backend: str = os.environ.get(
'CELERY_RESULT_BACKEND', 'redis://127.0.0.1:6379/0'
)


class DevelopmentConfig(BaseConfig):
pass


class ProductionConfig(BaseConfig):
pass


class TestingConfig(BaseConfig):
task_always_eager = True


@lru_cache()
def get_settings():
config_cls_dict = {
'development': DevelopmentConfig,
'production': ProductionConfig,
'testing': TestingConfig,
}

config_name = os.environ.get('FASTAPI_CONFIG', 'development')
config_cls = config_cls_dict[config_name]
return config_cls()


settings = get_settings()
90 changes: 69 additions & 21 deletions app/helpers/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,33 @@ def __init__(
self,
config_file: Optional[str] = None,
config_data: Optional[Dict] = None,
model_id: Optional[str] = None,
load_all_models: bool = False,
log_messages: bool = True,
):
self.loaded_models: Dict = {}
self.language_codes: Dict = {}
self.languages_list: Dict = {}
self.config_data: Dict = config_data or {}
self.config_file: str = config_file or CONFIG_JSON_PATH
self.load_all_models: bool = load_all_models
self.log_messages: List = log_messages
self.model_id: Optional[str] = model_id
self.models_for_loading: List = []

self.warnings: List[str] = []
self.messages: List[str] = []

if not config_data:
self._validate()
self._validate_config_file()
self._load_config_file()

if self.load_all_models or config_data:
self._load_language_codes()
self._load_all_models()
self._load_languages_list()
self._validate_models()
self._load_language_codes()
if config_data or load_all_models or model_id:
self._get_models_for_loading()
self._load_available_models()
self._load_languages_list()

def map_lang_to_closest(self, lang: str) -> str:
if lang in self.language_codes:
Expand All @@ -64,11 +72,11 @@ def _get_model_path(
MODELS_ROOT_DIR, model_config['model_path']
)
if not os.path.exists(model_dir):
model_dir = None
self._log_warning(
f'Model path {model_dir} not found for model {model_id}. '
"Can't load custom translation model or segmenters."
)
model_dir = None
else:
self._log_warning(
f'Model path not specified for model {model_id}. '
Expand All @@ -94,24 +102,40 @@ def _is_valid_model_type(self, model_type: str) -> bool:
return False
return True

def _load_all_models(self) -> None:
for model_config in self.config_data['models']:
if not 'load' in model_config or not model_config['load']:
continue
def _get_models_for_loading(self):
load = self.config_data['models']

# CONFIG CHECKS
if not self._is_valid_model_config(model_config):
continue
# Filter models for lazy loading only a specific model by `model_id`
if self.model_id is not None:
load = [
m
for m in load
if get_model_id(m['src'], m['tgt']) == self.model_id
]
if len(load) > 1:
load = load[:1]

if not self._is_valid_model_type(model_config['model_type']):
continue
if self.load_all_models:
load = [
model for model in load if 'load' in model and model['load']
]

self.models_for_loading = load

def _load_available_models(self) -> None:
for model_config in self.models_for_loading:
try:
self._load_model(model_config)
except ModelLoadingException:
continue

def _load_model(self, model_config: Dict) -> None:

if not self._is_valid_model_config(
model_config
) or not self._is_valid_model_type(model_config['model_type']):
raise ModelLoadingException

src: str = model_config['src']
tgt: str = model_config['tgt']
alt_id: Optional[str] = model_config.get('alt')
Expand Down Expand Up @@ -163,6 +187,28 @@ def _load_language_codes(self) -> None:
"Language name spefication dictionary ('languages') not found in configuration."
)

def get_all_potential_languages(self) -> Dict:
languages = {}
for model_config in self.config_data['models']:
if model_config['model_type'] == 'ctranslator2':
model_path = model_config.get('model_path')
if model_path is None:
continue
model_dir = os.path.join(MODELS_ROOT_DIR, model_path)
if not os.path.exists(model_dir):
continue
source: str = model_config['src']
target: str = model_config['tgt']
alt_id: Optional[str] = model_config.get('alt')
model_id: str = get_model_id(source, target, alt_id)
if source not in languages:
languages[source] = {}
if target not in languages[source]:
languages[source][target] = []

languages[source][target].append(model_id)
return languages

def _load_languages_list(self) -> None:
for model_id in self.loaded_models.keys():
if not (parsed_id := parse_model_id(model_id)):
Expand All @@ -177,16 +223,18 @@ def _load_languages_list(self) -> None:
self.languages_list[source][target].append(model_id)

def _log_warning(self, msg: str) -> None:
logger.warning(msg)
self.warnings.append(msg)
if self.log_messages:
logger.warning(msg)

def _log_info(self, msg: str) -> None:
logger.info(msg)
self.messages.append(msg)
if self.log_messages:
logger.info(msg)

def _validate(self) -> None:
self._validate_config_file()
self._validate_models()
def _load_config_file(self):
with open(self.config_file, 'r') as jsonfile:
self.config_data = json.load(jsonfile)

def _validate_config_file(self) -> None:
# Check if config file is there and well formatted
Expand All @@ -198,7 +246,7 @@ def _validate_config_file(self) -> None:
else:
try:
with open(self.config_file, 'r') as jsonfile:
self.config_data = json.load(jsonfile)
config_data = json.load(jsonfile)
except json.decoder.JSONDecodeError:
msg = 'Config file format broken. No models will be loaded.'
logger.error(msg)
Expand Down
35 changes: 35 additions & 0 deletions app/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import logging
import logging.config


def configure_logging():
logging_dict = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'verbose': {
'format': '[ %(asctime)s ] %(levelname)s: %(message)s',
},
},
'handlers': {
'console': {
'class': 'logging.StreamHandler',
'formatter': 'verbose',
},
},
'root': {
'handlers': ['console'],
'level': 'INFO',
},
'loggers': {
'project': {
'handlers': ['console'],
'propagate': False,
},
'uvicorn.access': {
'propagate': True,
},
},
}

logging.config.dictConfig(logging_dict)
2 changes: 1 addition & 1 deletion app/nltk_pkg.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import nltk

nltk.download('punkt')
nltk.download('punkt', download_dir='/usr/local/share/nltk_data')
7 changes: 5 additions & 2 deletions requirements.txt → app/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ ctranslate2==2.12.0
sacremoses==0.0.47
nltk==3.7
sentencepiece==0.1.96
torch==1.7.1
torchvision==0.8.2
torch==1.8.0
transformers==4.16.2
celery==5.1.2
redis==3.5.3
watchgod==0.7
gunicorn==20.1.0
22 changes: 22 additions & 0 deletions app/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from celery import shared_task

from app.helpers.config import Config
from app.utils.translate import translate_text


@shared_task
def translate_text_async(model_id, text):
config = Config(model_id=model_id, log_messages=False)
return translate_text(model_id, text)


@shared_task
def translate_batch_async(model_id, texts):
config = Config(model_id=model_id, log_messages=False)

translated_batch = []
for sentence in texts:
translation = translate_text(model_id, sentence)
translated_batch.append(translation)

return translated_batch
Empty file added app/tests/api/v2/__init__.py
Empty file.
Loading