From 1392dee36252dfff8e134c78e55bd1fbfc062edf Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 13:05:04 +0400 Subject: [PATCH 01/22] feat: rename app for annottion --- .gitignore | 410 ++++++++++++++++++ .idea/.gitignore | 3 + annotation/Dockerfile | 4 +- annotation/alembic/env.py | 6 +- ...8e6343_add_extensive_coverage_parameter.py | 2 +- annotation/{app => annotation}/__init__.py | 0 .../annotations/__init__.py | 2 +- .../{app => annotation}/annotations/main.py | 10 +- .../annotations/resources.py | 14 +- .../categories/__init__.py | 0 .../categories/resources.py | 12 +- .../categories/services.py | 10 +- annotation/{app => annotation}/database.py | 0 .../distribution/__init__.py | 2 +- .../{app => annotation}/distribution/main.py | 10 +- .../distribution/resources.py | 20 +- annotation/{app => annotation}/errors.py | 2 +- annotation/{app => annotation}/filters.py | 2 +- .../{app => annotation}/jobs/__init__.py | 2 +- .../{app => annotation}/jobs/resources.py | 26 +- .../{app => annotation}/jobs/services.py | 16 +- .../{app => annotation}/kafka_client.py | 0 annotation/{app => annotation}/logger.py | 0 annotation/{app => annotation}/main.py | 22 +- .../{app => annotation}/metadata/__init__.py | 0 .../{app => annotation}/metadata/resources.py | 6 +- .../microservice_communication/__init__.py | 0 .../assets_communication.py | 2 +- .../jobs_communication.py | 2 +- .../microservice_communication/search.py | 6 +- .../microservice_communication/task.py | 6 +- .../microservice_communication/user.py | 2 +- annotation/{app => annotation}/models.py | 6 +- .../{app => annotation}/revisions/__init__.py | 0 .../revisions/resources.py | 10 +- .../{app => annotation}/schemas/__init__.py | 12 +- .../schemas/annotations.py | 0 .../{app => annotation}/schemas/categories.py | 2 +- .../{app => annotation}/schemas/errors.py | 0 .../{app => annotation}/schemas/jobs.py | 0 .../{app => annotation}/schemas/metadata.py | 2 +- .../{app => annotation}/schemas/tasks.py | 0 annotation/{app => annotation}/tags.py | 0 .../{app => annotation}/tasks/__init__.py | 0 .../{app => annotation}/tasks/resources.py | 26 +- .../{app => annotation}/tasks/services.py | 14 +- .../{app => annotation}/tasks/validation.py | 8 +- .../{app => annotation}/token_dependency.py | 0 annotation/{app => annotation}/utils.py | 0 annotation/tests/conftest.py | 24 +- annotation/tests/override_app_dependency.py | 12 +- .../tests/test_annotators_overall_load.py | 16 +- annotation/tests/test_assets_communication.py | 6 +- annotation/tests/test_category_crud.py | 12 +- annotation/tests/test_compare_scores.py | 4 +- annotation/tests/test_cross_validation.py | 8 +- annotation/tests/test_delete_batch_tasks.py | 6 +- annotation/tests/test_distribution.py | 10 +- annotation/tests/test_finish_task.py | 40 +- .../tests/test_get_accumulated_revisions.py | 8 +- ..._get_annotation_for_particular_revision.py | 6 +- annotation/tests/test_get_child_categories.py | 8 +- annotation/tests/test_get_entities_status.py | 2 +- annotation/tests/test_get_job.py | 10 +- annotation/tests/test_get_job_files.py | 6 +- annotation/tests/test_get_job_progress.py | 6 +- .../tests/test_get_jobs_info_by_files.py | 12 +- annotation/tests/test_get_pages_info.py | 10 +- annotation/tests/test_get_revisions.py | 10 +- .../test_get_revisions_without_annotation.py | 6 +- annotation/tests/test_get_unassigned_files.py | 4 +- annotation/tests/test_get_users_for_job.py | 4 +- annotation/tests/test_job_categories.py | 6 +- annotation/tests/test_microservices_search.py | 10 +- annotation/tests/test_post.py | 20 +- annotation/tests/test_post_annotation.py | 32 +- annotation/tests/test_post_job.py | 10 +- annotation/tests/test_post_next_task.py | 10 +- .../tests/test_post_unassgined_files.py | 8 +- annotation/tests/test_search_kafka.py | 24 +- annotation/tests/test_start_job.py | 8 +- annotation/tests/test_tasks_crud_cr.py | 12 +- annotation/tests/test_tasks_crud_ud.py | 6 +- annotation/tests/test_update_job.py | 18 +- annotation/tests/test_validation.py | 8 +- dev_runner/README.md | 0 dev_runner/collect_requirements.sh | 51 +++ dev_runner/conf/shared.env | 85 ++++ dev_runner/dev_runner/__init__.py | 0 dev_runner/dev_runner/conf.py | 22 + dev_runner/dev_runner/runners/__init__.py | 0 .../dev_runner/runners/annotation_runner.py | 11 + .../dev_runner/runners/assets_runner.py | 23 + dev_runner/dev_runner/runners/base_runner.py | 120 +++++ .../dev_runner/runners/convert_runner.py | 11 + dev_runner/dev_runner/runners/jobs_runner.py | 11 + .../dev_runner/runners/models_runner.py | 19 + .../dev_runner/runners/pipelines_runner.py | 21 + .../dev_runner/runners/processing_runner.py | 21 + .../dev_runner/runners/scheduler_runner.py | 26 ++ .../dev_runner/runners/search_runner.py | 24 + .../dev_runner/runners/taxonomy_runner.py | 15 + dev_runner/dev_runner/runners/users_runner.py | 25 ++ dev_runner/docker-compose.yml | 117 +++++ dev_runner/goten.env | 33 ++ .../create-multiple-postgresql-databases.sh | 22 + dev_runner/pyproject.toml | 106 +++++ dev_runner/start.py | 38 ++ 108 files changed, 1548 insertions(+), 334 deletions(-) create mode 100644 .gitignore create mode 100644 .idea/.gitignore rename annotation/{app => annotation}/__init__.py (100%) rename annotation/{app => annotation}/annotations/__init__.py (91%) rename annotation/{app => annotation}/annotations/main.py (99%) rename annotation/{app => annotation}/annotations/resources.py (97%) rename annotation/{app => annotation}/categories/__init__.py (100%) rename annotation/{app => annotation}/categories/resources.py (94%) rename annotation/{app => annotation}/categories/services.py (98%) rename annotation/{app => annotation}/database.py (100%) rename annotation/{app => annotation}/distribution/__init__.py (93%) rename annotation/{app => annotation}/distribution/main.py (99%) rename annotation/{app => annotation}/distribution/resources.py (90%) rename annotation/{app => annotation}/errors.py (98%) rename annotation/{app => annotation}/filters.py (88%) rename annotation/{app => annotation}/jobs/__init__.py (94%) rename annotation/{app => annotation}/jobs/resources.py (96%) rename annotation/{app => annotation}/jobs/services.py (97%) rename annotation/{app => annotation}/kafka_client.py (100%) rename annotation/{app => annotation}/logger.py (100%) rename annotation/{app => annotation}/main.py (84%) rename annotation/{app => annotation}/metadata/__init__.py (100%) rename annotation/{app => annotation}/metadata/resources.py (68%) rename annotation/{app => annotation}/microservice_communication/__init__.py (100%) rename annotation/{app => annotation}/microservice_communication/assets_communication.py (98%) rename annotation/{app => annotation}/microservice_communication/jobs_communication.py (95%) rename annotation/{app => annotation}/microservice_communication/search.py (97%) rename annotation/{app => annotation}/microservice_communication/task.py (88%) rename annotation/{app => annotation}/microservice_communication/user.py (96%) rename annotation/{app => annotation}/models.py (99%) rename annotation/{app => annotation}/revisions/__init__.py (100%) rename annotation/{app => annotation}/revisions/resources.py (77%) rename annotation/{app => annotation}/schemas/__init__.py (90%) rename annotation/{app => annotation}/schemas/annotations.py (100%) rename annotation/{app => annotation}/schemas/categories.py (97%) rename annotation/{app => annotation}/schemas/errors.py (100%) rename annotation/{app => annotation}/schemas/jobs.py (100%) rename annotation/{app => annotation}/schemas/metadata.py (78%) rename annotation/{app => annotation}/schemas/tasks.py (100%) rename annotation/{app => annotation}/tags.py (100%) rename annotation/{app => annotation}/tasks/__init__.py (100%) rename annotation/{app => annotation}/tasks/resources.py (97%) rename annotation/{app => annotation}/tasks/services.py (98%) rename annotation/{app => annotation}/tasks/validation.py (98%) rename annotation/{app => annotation}/token_dependency.py (100%) rename annotation/{app => annotation}/utils.py (100%) create mode 100644 dev_runner/README.md create mode 100755 dev_runner/collect_requirements.sh create mode 100644 dev_runner/conf/shared.env create mode 100644 dev_runner/dev_runner/__init__.py create mode 100644 dev_runner/dev_runner/conf.py create mode 100644 dev_runner/dev_runner/runners/__init__.py create mode 100644 dev_runner/dev_runner/runners/annotation_runner.py create mode 100644 dev_runner/dev_runner/runners/assets_runner.py create mode 100644 dev_runner/dev_runner/runners/base_runner.py create mode 100644 dev_runner/dev_runner/runners/convert_runner.py create mode 100644 dev_runner/dev_runner/runners/jobs_runner.py create mode 100644 dev_runner/dev_runner/runners/models_runner.py create mode 100644 dev_runner/dev_runner/runners/pipelines_runner.py create mode 100644 dev_runner/dev_runner/runners/processing_runner.py create mode 100644 dev_runner/dev_runner/runners/scheduler_runner.py create mode 100644 dev_runner/dev_runner/runners/search_runner.py create mode 100644 dev_runner/dev_runner/runners/taxonomy_runner.py create mode 100644 dev_runner/dev_runner/runners/users_runner.py create mode 100644 dev_runner/docker-compose.yml create mode 100644 dev_runner/goten.env create mode 100755 dev_runner/pg-init-scripts/create-multiple-postgresql-databases.sh create mode 100644 dev_runner/pyproject.toml create mode 100644 dev_runner/start.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..aecde0525 --- /dev/null +++ b/.gitignore @@ -0,0 +1,410 @@ +# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,pycharm,vim,macos,windows,linux +# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,pycharm,vim,macos,windows,linux + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +.idea/artifacts +.idea/compiler.xml +.idea/jarRepositories.xml +.idea/modules.xml +.idea/*.iml +.idea/modules +*.iml +*.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +modules.xml +.idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +# Azure Toolkit for IntelliJ plugin +# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij +.idea/**/azureSettings.xml + +.idea/badgerdoc.iml +.idea/vcs.xml +.idea/inspectionProfiles/profiles_settings.xml + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +### Vim ### +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +### VisualStudioCode ### +.vscode/* +#!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,pycharm,vim,macos,windows,linux + +# Custom + +infra/docker/python_base/tenant_dependency +infra/docker/python_base/filter_lib +.trunk +**/.pytest_cache/** +**/*.egg-info \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 000000000..26d33521a --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/annotation/Dockerfile b/annotation/Dockerfile index 4ffba7513..4201b63f3 100644 --- a/annotation/Dockerfile +++ b/annotation/Dockerfile @@ -14,7 +14,7 @@ RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/inst # Copy using poetry.lock in case it doesn't exist yet COPY pyproject.toml poetry.lock .env alembic.ini version.txt ./ COPY ./alembic ./alembic -COPY ./app ./app +COPY annotation ./annotation FROM base as build @@ -28,7 +28,7 @@ RUN apt-get install -y make RUN poetry install --no-root COPY Makefile pytest.ini setup.cfg ./ COPY ./tests ./tests -COPY app app +COPY annotation annotation COPY alembic alembic RUN make test_checks diff --git a/annotation/alembic/env.py b/annotation/alembic/env.py index 77c270f2f..79acffc7c 100644 --- a/annotation/alembic/env.py +++ b/annotation/alembic/env.py @@ -4,8 +4,8 @@ from sqlalchemy import engine_from_config, pool from alembic import context # type: ignore -from app.database import SQLALCHEMY_DATABASE_URL -from app.utils import get_test_db_url +from annotation.database import SQLALCHEMY_DATABASE_URL +from annotation.utils import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -19,7 +19,7 @@ # for 'autogenerate' support # from myapp import mymodel # target_metadata = mymodel.Base.metadata -from app.models import Base # noqa E402 +from annotation.models import Base # noqa E402 target_metadata = Base.metadata diff --git a/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py b/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py index 0698ce063..08d6dc0be 100644 --- a/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py +++ b/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py @@ -8,7 +8,7 @@ import sqlalchemy as sa from alembic import op -from app.models import ValidationSchema +from annotation.models import ValidationSchema # revision identifiers, used by Alembic. revision = "71095b8e6343" diff --git a/annotation/app/__init__.py b/annotation/annotation/__init__.py similarity index 100% rename from annotation/app/__init__.py rename to annotation/annotation/__init__.py diff --git a/annotation/app/annotations/__init__.py b/annotation/annotation/annotations/__init__.py similarity index 91% rename from annotation/app/annotations/__init__.py rename to annotation/annotation/annotations/__init__.py index a810a92d7..738e7380e 100644 --- a/annotation/app/annotations/__init__.py +++ b/annotation/annotation/annotations/__init__.py @@ -1,4 +1,4 @@ -from app.annotations.main import ( +from annotation.annotations.main import ( LATEST, MANIFEST, S3_START_PATH, diff --git a/annotation/app/annotations/main.py b/annotation/annotation/annotations/main.py similarity index 99% rename from annotation/app/annotations/main.py rename to annotation/annotation/annotations/main.py index 02cca1c17..20e067f03 100644 --- a/annotation/app/annotations/main.py +++ b/annotation/annotation/annotations/main.py @@ -14,11 +14,11 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session -from app import logger -from app.kafka_client import KAFKA_BOOTSTRAP_SERVER, KAFKA_SEARCH_TOPIC -from app.kafka_client import producers as kafka_producers -from app.models import AnnotatedDoc, DocumentLinks -from app.schemas import ( +from annotation import logger +from annotation.kafka_client import KAFKA_BOOTSTRAP_SERVER, KAFKA_SEARCH_TOPIC +from annotation.kafka_client import producers as kafka_producers +from annotation.models import AnnotatedDoc, DocumentLinks +from annotation.schemas import ( AnnotatedDocSchema, DocForSaveSchema, PageSchema, diff --git a/annotation/app/annotations/resources.py b/annotation/annotation/annotations/resources.py similarity index 97% rename from annotation/app/annotations/resources.py rename to annotation/annotation/annotations/resources.py index 9f1809e0b..be016ca11 100644 --- a/annotation/app/annotations/resources.py +++ b/annotation/annotation/annotations/resources.py @@ -6,13 +6,13 @@ from sqlalchemy.orm import Session from tenant_dependency import TenantData -from app.database import get_db -from app.errors import NoSuchRevisionsError -from app.microservice_communication.assets_communication import ( +from annotation.database import get_db +from annotation.errors import NoSuchRevisionsError +from annotation.microservice_communication.assets_communication import ( get_file_path_and_bucket, ) -from app.microservice_communication.search import X_CURRENT_TENANT_HEADER -from app.schemas import ( +from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.schemas import ( AnnotatedDocSchema, BadRequestErrorSchema, ConnectionErrorSchema, @@ -23,8 +23,8 @@ ParticularRevisionSchema, ValidationSchema, ) -from app.tags import ANNOTATION_TAG, JOBS_TAG, REVISION_TAG -from app.tasks import update_task_status +from annotation.tags import ANNOTATION_TAG, JOBS_TAG, REVISION_TAG +from annotation.tasks import update_task_status from ..models import AnnotatedDoc, File, Job, ManualAnnotationTask from ..token_dependency import TOKEN diff --git a/annotation/app/categories/__init__.py b/annotation/annotation/categories/__init__.py similarity index 100% rename from annotation/app/categories/__init__.py rename to annotation/annotation/categories/__init__.py diff --git a/annotation/app/categories/resources.py b/annotation/annotation/categories/resources.py similarity index 94% rename from annotation/app/categories/resources.py rename to annotation/annotation/categories/resources.py index 75a8cc60a..40f411677 100644 --- a/annotation/app/categories/resources.py +++ b/annotation/annotation/categories/resources.py @@ -5,11 +5,11 @@ from sqlalchemy.orm import Session from sqlalchemy_filters.exceptions import BadFilterFormat -from app.database import get_db -from app.errors import NoSuchCategoryError -from app.filters import CategoryFilter -from app.microservice_communication.search import X_CURRENT_TENANT_HEADER -from app.schemas import ( +from annotation.database import get_db +from annotation.errors import NoSuchCategoryError +from annotation.filters import CategoryFilter +from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.schemas import ( BadRequestErrorSchema, CategoryBaseSchema, CategoryInputSchema, @@ -18,7 +18,7 @@ NotFoundErrorSchema, SubCategoriesOutSchema, ) -from app.tags import CATEGORIES_TAG +from annotation.tags import CATEGORIES_TAG from .services import ( add_category_db, diff --git a/annotation/app/categories/services.py b/annotation/annotation/categories/services.py similarity index 98% rename from annotation/app/categories/services.py rename to annotation/annotation/categories/services.py index 6c25fcc46..c81adbb2b 100644 --- a/annotation/app/categories/services.py +++ b/annotation/annotation/categories/services.py @@ -9,16 +9,16 @@ from sqlalchemy.sql.expression import func from sqlalchemy_utils import Ltree -from app import logger as app_logger -from app.errors import ( +from annotation import logger as app_logger +from annotation.errors import ( CheckFieldError, ForeignKeyError, NoSuchCategoryError, SelfParentError, ) -from app.filters import CategoryFilter -from app.models import Category, Job -from app.schemas import ( +from annotation.filters import CategoryFilter +from annotation.models import Category, Job +from annotation.schemas import ( CategoryInputSchema, CategoryORMSchema, CategoryResponseSchema, diff --git a/annotation/app/database.py b/annotation/annotation/database.py similarity index 100% rename from annotation/app/database.py rename to annotation/annotation/database.py diff --git a/annotation/app/distribution/__init__.py b/annotation/annotation/distribution/__init__.py similarity index 93% rename from annotation/app/distribution/__init__.py rename to annotation/annotation/distribution/__init__.py index 0eb12a693..2807c7548 100644 --- a/annotation/app/distribution/__init__.py +++ b/annotation/annotation/distribution/__init__.py @@ -1,4 +1,4 @@ -from app.distribution.main import ( +from annotation.distribution.main import ( add_unassigned_file, calculate_users_load, distribute, diff --git a/annotation/app/distribution/main.py b/annotation/annotation/distribution/main.py similarity index 99% rename from annotation/app/distribution/main.py rename to annotation/annotation/distribution/main.py index 7c58cda5d..e94fd17b3 100644 --- a/annotation/app/distribution/main.py +++ b/annotation/annotation/distribution/main.py @@ -47,13 +47,13 @@ from sqlalchemy.orm import Session -from app.jobs import create_user, read_user -from app.microservice_communication.assets_communication import ( +from annotation.jobs import create_user, read_user +from annotation.microservice_communication.assets_communication import ( FilesForDistribution, ) -from app.models import File, User -from app.schemas import TaskStatusEnumSchema, ValidationSchema -from app.tasks import create_tasks as create_db_tasks +from annotation.models import File, User +from annotation.schemas import TaskStatusEnumSchema, ValidationSchema +from annotation.tasks import create_tasks as create_db_tasks MAX_PAGES = 50 diff --git a/annotation/app/distribution/resources.py b/annotation/annotation/distribution/resources.py similarity index 90% rename from annotation/app/distribution/resources.py rename to annotation/annotation/distribution/resources.py index 8896007b3..141b40680 100644 --- a/annotation/app/distribution/resources.py +++ b/annotation/annotation/distribution/resources.py @@ -5,32 +5,32 @@ from sqlalchemy.orm import Session from tenant_dependency import TenantData -from app.database import get_db -from app.distribution import ( +from annotation.database import get_db +from annotation.distribution import ( distribute, find_unassigned_files, prepare_response, ) -from app.errors import FieldConstraintError -from app.jobs import ( +from annotation.errors import FieldConstraintError +from annotation.jobs import ( check_annotators, check_validators, get_job_attributes_for_post, ) -from app.microservice_communication.assets_communication import ( +from annotation.microservice_communication.assets_communication import ( get_files_info, prepare_files_for_distribution, ) -from app.microservice_communication.search import X_CURRENT_TENANT_HEADER -from app.models import File, Job, User -from app.schemas import ( +from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.models import File, Job, User +from annotation.schemas import ( BadRequestErrorSchema, ConnectionErrorSchema, ManualAnnotationTaskSchema, TaskInfoSchema, ) -from app.tags import TASKS_TAG -from app.token_dependency import TOKEN +from annotation.tags import TASKS_TAG +from annotation.token_dependency import TOKEN router = APIRouter( prefix="/distribution", diff --git a/annotation/app/errors.py b/annotation/annotation/errors.py similarity index 98% rename from annotation/app/errors.py rename to annotation/annotation/errors.py index 8625bd4e6..9b9cc839e 100644 --- a/annotation/app/errors.py +++ b/annotation/annotation/errors.py @@ -3,7 +3,7 @@ from fastapi.responses import JSONResponse from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from app import logger as app_logger +from annotation import logger as app_logger logger = app_logger.Logger diff --git a/annotation/app/filters.py b/annotation/annotation/filters.py similarity index 88% rename from annotation/app/filters.py rename to annotation/annotation/filters.py index 5d3b2d4dc..859a21179 100644 --- a/annotation/app/filters.py +++ b/annotation/annotation/filters.py @@ -1,6 +1,6 @@ from filter_lib import create_filter_model -from app.models import AnnotatedDoc, Category, Job, ManualAnnotationTask, User +from annotation.models import AnnotatedDoc, Category, Job, ManualAnnotationTask, User CategoryFilter = create_filter_model( Category, diff --git a/annotation/app/jobs/__init__.py b/annotation/annotation/jobs/__init__.py similarity index 94% rename from annotation/app/jobs/__init__.py rename to annotation/annotation/jobs/__init__.py index 007b5e949..8ba05bfbd 100644 --- a/annotation/app/jobs/__init__.py +++ b/annotation/annotation/jobs/__init__.py @@ -1,4 +1,4 @@ -from app.jobs.services import ( +from annotation.jobs.services import ( check_annotators, check_validators, clean_tasks_before_jobs_update, diff --git a/annotation/app/jobs/resources.py b/annotation/annotation/jobs/resources.py similarity index 96% rename from annotation/app/jobs/resources.py rename to annotation/annotation/jobs/resources.py index dce3346b5..dd93c36dc 100644 --- a/annotation/app/jobs/resources.py +++ b/annotation/annotation/jobs/resources.py @@ -17,19 +17,19 @@ from sqlalchemy_filters.exceptions import BadFilterFormat from tenant_dependency import TenantData -import app.categories.services -from app import logger as app_logger -from app.categories import fetch_bunch_categories_db -from app.database import get_db -from app.distribution import distribute -from app.filters import CategoryFilter -from app.microservice_communication.assets_communication import get_files_info -from app.microservice_communication.jobs_communication import ( +import annotation.categories.services +from annotation import logger as app_logger +from annotation.categories import fetch_bunch_categories_db +from annotation.database import get_db +from annotation.distribution import distribute +from annotation.filters import CategoryFilter +from annotation.microservice_communication.assets_communication import get_files_info +from annotation.microservice_communication.jobs_communication import ( JobUpdateException, update_job_status, ) -from app.microservice_communication.search import X_CURRENT_TENANT_HEADER -from app.schemas import ( +from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.schemas import ( BadRequestErrorSchema, CategoryResponseSchema, ConnectionErrorSchema, @@ -46,8 +46,8 @@ UnassignedFilesInfoSchema, ValidationSchema, ) -from app.tags import FILES_TAG, JOBS_TAG -from app.token_dependency import TOKEN +from annotation.tags import FILES_TAG, JOBS_TAG +from annotation.token_dependency import TOKEN from ..models import ( AnnotatedDoc, @@ -555,7 +555,7 @@ def search_job_categories( """ get_job(db, job_id, x_current_tenant) try: - task_response = app.categories.services.filter_category_db( + task_response = annotation.categories.services.filter_category_db( db, request, x_current_tenant, diff --git a/annotation/app/jobs/services.py b/annotation/annotation/jobs/services.py similarity index 97% rename from annotation/app/jobs/services.py rename to annotation/annotation/jobs/services.py index 70534575b..def19d8d4 100644 --- a/annotation/app/jobs/services.py +++ b/annotation/annotation/jobs/services.py @@ -8,13 +8,13 @@ from sqlalchemy.orm import Session, query from sqlalchemy.orm.attributes import InstrumentedAttribute -from app.categories import fetch_bunch_categories_db -from app.categories.services import response_object_from_db -from app.database import Base -from app.errors import EnumValidationError, FieldConstraintError, WrongJobError -from app.microservice_communication.assets_communication import get_files_info -from app.microservice_communication.jobs_communication import get_job_names -from app.models import ( +from annotation.categories import fetch_bunch_categories_db +from annotation.categories.services import response_object_from_db +from annotation.database import Base +from annotation.errors import EnumValidationError, FieldConstraintError, WrongJobError +from annotation.microservice_communication.assets_communication import get_files_info +from annotation.microservice_communication.jobs_communication import get_job_names +from annotation.models import ( Category, File, Job, @@ -24,7 +24,7 @@ association_job_owner, association_job_validator, ) -from app.schemas import ( +from annotation.schemas import ( CROSS_MIN_ANNOTATORS_NUMBER, CategoryResponseSchema, FileStatusEnumSchema, diff --git a/annotation/app/kafka_client.py b/annotation/annotation/kafka_client.py similarity index 100% rename from annotation/app/kafka_client.py rename to annotation/annotation/kafka_client.py diff --git a/annotation/app/logger.py b/annotation/annotation/logger.py similarity index 100% rename from annotation/app/logger.py rename to annotation/annotation/logger.py diff --git a/annotation/app/main.py b/annotation/annotation/main.py similarity index 84% rename from annotation/app/main.py rename to annotation/annotation/main.py index 0ebf0ff14..dc2db2d2b 100644 --- a/annotation/app/main.py +++ b/annotation/annotation/main.py @@ -7,10 +7,10 @@ from sqlalchemy.exc import DBAPIError, SQLAlchemyError from starlette.requests import Request -from app.annotations import resources as annotations_resources -from app.categories import resources as categories_resources -from app.distribution import resources as distribution_resources -from app.errors import ( +from annotation.annotations import resources as annotations_resources +from annotation.categories import resources as categories_resources +from annotation.distribution import resources as distribution_resources +from annotation.errors import ( AgreementScoreServiceException, CheckFieldError, EnumValidationError, @@ -35,13 +35,13 @@ no_such_revisions_error_handler, wrong_job_error_handler, ) -from app.jobs import resources as jobs_resources -from app import logger as app_logger -from app.metadata import resources as metadata_resources -from app.revisions import resources as revision_resources -from app.tags import TAGS -from app.tasks import resources as task_resources -from app.token_dependency import TOKEN +from annotation.jobs import resources as jobs_resources +from annotation import logger as app_logger +from annotation.metadata import resources as metadata_resources +from annotation.revisions import resources as revision_resources +from annotation.tags import TAGS +from annotation.tasks import resources as task_resources +from annotation.token_dependency import TOKEN load_dotenv(find_dotenv()) diff --git a/annotation/app/metadata/__init__.py b/annotation/annotation/metadata/__init__.py similarity index 100% rename from annotation/app/metadata/__init__.py rename to annotation/annotation/metadata/__init__.py diff --git a/annotation/app/metadata/resources.py b/annotation/annotation/metadata/resources.py similarity index 68% rename from annotation/app/metadata/resources.py rename to annotation/annotation/metadata/resources.py index a90e2173e..be013589f 100644 --- a/annotation/app/metadata/resources.py +++ b/annotation/annotation/metadata/resources.py @@ -1,8 +1,8 @@ from fastapi import APIRouter, status -from app.microservice_communication.search import X_CURRENT_TENANT_HEADER -from app.schemas import EntitiesStatusesSchema -from app.tags import METADATA_TAG, TASKS_TAG +from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.schemas import EntitiesStatusesSchema +from annotation.tags import METADATA_TAG, TASKS_TAG router = APIRouter( prefix="/metadata", diff --git a/annotation/app/microservice_communication/__init__.py b/annotation/annotation/microservice_communication/__init__.py similarity index 100% rename from annotation/app/microservice_communication/__init__.py rename to annotation/annotation/microservice_communication/__init__.py diff --git a/annotation/app/microservice_communication/assets_communication.py b/annotation/annotation/microservice_communication/assets_communication.py similarity index 98% rename from annotation/app/microservice_communication/assets_communication.py rename to annotation/annotation/microservice_communication/assets_communication.py index 715ae95b7..bf0b52eb1 100644 --- a/annotation/app/microservice_communication/assets_communication.py +++ b/annotation/annotation/microservice_communication/assets_communication.py @@ -5,7 +5,7 @@ from dotenv import find_dotenv, load_dotenv from requests import ConnectionError, RequestException, Timeout -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, diff --git a/annotation/app/microservice_communication/jobs_communication.py b/annotation/annotation/microservice_communication/jobs_communication.py similarity index 95% rename from annotation/app/microservice_communication/jobs_communication.py rename to annotation/annotation/microservice_communication/jobs_communication.py index 711f6b09b..2c031ddb6 100644 --- a/annotation/app/microservice_communication/jobs_communication.py +++ b/annotation/annotation/microservice_communication/jobs_communication.py @@ -5,7 +5,7 @@ from dotenv import find_dotenv, load_dotenv from requests import RequestException -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, diff --git a/annotation/app/microservice_communication/search.py b/annotation/annotation/microservice_communication/search.py similarity index 97% rename from annotation/app/microservice_communication/search.py rename to annotation/annotation/microservice_communication/search.py index a0fa8e699..6d942f44c 100644 --- a/annotation/app/microservice_communication/search.py +++ b/annotation/annotation/microservice_communication/search.py @@ -90,9 +90,9 @@ from typing import Dict, List import requests -from app.annotations import row_to_dict -from app.models import ManualAnnotationTask -from app.schemas import ExpandedManualAnnotationTaskSchema +from annotation.annotations import row_to_dict +from annotation.models import ManualAnnotationTask +from annotation.schemas import ExpandedManualAnnotationTaskSchema from fastapi import Header, HTTPException from requests.exceptions import ConnectionError, RequestException, Timeout diff --git a/annotation/app/microservice_communication/task.py b/annotation/annotation/microservice_communication/task.py similarity index 88% rename from annotation/app/microservice_communication/task.py rename to annotation/annotation/microservice_communication/task.py index f15a68d2d..88ec5af6b 100644 --- a/annotation/app/microservice_communication/task.py +++ b/annotation/annotation/microservice_communication/task.py @@ -5,13 +5,13 @@ from dotenv import find_dotenv, load_dotenv from requests import RequestException -from app.errors import AgreementScoreServiceException -from app.microservice_communication.search import ( +from annotation.errors import AgreementScoreServiceException +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.schemas import ( +from annotation.schemas import ( AgreementScoreServiceInput, AgreementScoreServiceResponse, ) diff --git a/annotation/app/microservice_communication/user.py b/annotation/annotation/microservice_communication/user.py similarity index 96% rename from annotation/app/microservice_communication/user.py rename to annotation/annotation/microservice_communication/user.py index 3b1a5382a..5b621f4f4 100644 --- a/annotation/app/microservice_communication/user.py +++ b/annotation/annotation/microservice_communication/user.py @@ -5,7 +5,7 @@ from dotenv import find_dotenv, load_dotenv from requests import RequestException -from app.models import ManualAnnotationTask +from annotation.models import ManualAnnotationTask load_dotenv(find_dotenv()) USERS_SEARCH_URL = os.environ.get("USERS_SEARCH_URL") diff --git a/annotation/app/models.py b/annotation/annotation/models.py similarity index 99% rename from annotation/app/models.py rename to annotation/annotation/models.py index 96870a333..5f8f40536 100644 --- a/annotation/app/models.py +++ b/annotation/annotation/models.py @@ -21,9 +21,9 @@ from sqlalchemy.orm import relationship, validates from sqlalchemy_utils import Ltree, LtreeType -from app.database import Base -from app.errors import CheckFieldError -from app.schemas import ( +from annotation.database import Base +from annotation.errors import CheckFieldError +from annotation.schemas import ( DEFAULT_LOAD, AnnotationStatisticsEventEnumSchema, CategoryTypeSchema, diff --git a/annotation/app/revisions/__init__.py b/annotation/annotation/revisions/__init__.py similarity index 100% rename from annotation/app/revisions/__init__.py rename to annotation/annotation/revisions/__init__.py diff --git a/annotation/app/revisions/resources.py b/annotation/annotation/revisions/resources.py similarity index 77% rename from annotation/app/revisions/resources.py rename to annotation/annotation/revisions/resources.py index c02c4fee0..6739fb119 100644 --- a/annotation/app/revisions/resources.py +++ b/annotation/annotation/revisions/resources.py @@ -4,11 +4,11 @@ from sqlalchemy.orm import Session from starlette import status -from app.database import get_db -from app.microservice_communication.search import X_CURRENT_TENANT_HEADER -from app.models import AnnotatedDoc -from app.schemas import AnnotatedDocSchema, ConnectionErrorSchema -from app.tags import ANNOTATION_TAG, REVISION_TAG +from annotation.database import get_db +from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.models import AnnotatedDoc +from annotation.schemas import AnnotatedDocSchema, ConnectionErrorSchema +from annotation.tags import ANNOTATION_TAG, REVISION_TAG router = APIRouter( prefix="/revisions", diff --git a/annotation/app/schemas/__init__.py b/annotation/annotation/schemas/__init__.py similarity index 90% rename from annotation/app/schemas/__init__.py rename to annotation/annotation/schemas/__init__.py index 892124fb5..db9dd1960 100644 --- a/annotation/app/schemas/__init__.py +++ b/annotation/annotation/schemas/__init__.py @@ -1,4 +1,4 @@ -from app.schemas.annotations import ( +from annotation.schemas.annotations import ( AnnotatedDocSchema, DocForSaveSchema, PageOutSchema, @@ -6,7 +6,7 @@ ParticularRevisionSchema, RevisionLink, ) -from app.schemas.categories import ( +from annotation.schemas.categories import ( CategoryBaseSchema, CategoryDataAttributeNames, CategoryInputSchema, @@ -15,12 +15,12 @@ CategoryTypeSchema, SubCategoriesOutSchema, ) -from app.schemas.errors import ( +from annotation.schemas.errors import ( BadRequestErrorSchema, ConnectionErrorSchema, NotFoundErrorSchema, ) -from app.schemas.jobs import ( +from annotation.schemas.jobs import ( CROSS_MIN_ANNOTATORS_NUMBER, DEFAULT_LOAD, FileInfoSchema, @@ -36,8 +36,8 @@ UnassignedFilesInfoSchema, ValidationSchema, ) -from app.schemas.metadata import EntitiesStatusesSchema -from app.schemas.tasks import ( +from annotation.schemas.metadata import EntitiesStatusesSchema +from annotation.schemas.tasks import ( AgreementScoreComparingResult, AgreementScoreServiceInput, AgreementScoreServiceResponse, diff --git a/annotation/app/schemas/annotations.py b/annotation/annotation/schemas/annotations.py similarity index 100% rename from annotation/app/schemas/annotations.py rename to annotation/annotation/schemas/annotations.py diff --git a/annotation/app/schemas/categories.py b/annotation/annotation/schemas/categories.py similarity index 97% rename from annotation/app/schemas/categories.py rename to annotation/annotation/schemas/categories.py index da49230cf..edd68ea39 100644 --- a/annotation/app/schemas/categories.py +++ b/annotation/annotation/schemas/categories.py @@ -3,7 +3,7 @@ from pydantic import BaseModel, Field, validator -from app.errors import CheckFieldError +from annotation.errors import CheckFieldError class CategoryTypeSchema(str, Enum): diff --git a/annotation/app/schemas/errors.py b/annotation/annotation/schemas/errors.py similarity index 100% rename from annotation/app/schemas/errors.py rename to annotation/annotation/schemas/errors.py diff --git a/annotation/app/schemas/jobs.py b/annotation/annotation/schemas/jobs.py similarity index 100% rename from annotation/app/schemas/jobs.py rename to annotation/annotation/schemas/jobs.py diff --git a/annotation/app/schemas/metadata.py b/annotation/annotation/schemas/metadata.py similarity index 78% rename from annotation/app/schemas/metadata.py rename to annotation/annotation/schemas/metadata.py index f69488de2..0395189fa 100644 --- a/annotation/app/schemas/metadata.py +++ b/annotation/annotation/schemas/metadata.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from app.schemas.tasks import TaskStatusEnumSchema +from annotation.schemas.tasks import TaskStatusEnumSchema class EntitiesStatusesSchema(BaseModel): diff --git a/annotation/app/schemas/tasks.py b/annotation/annotation/schemas/tasks.py similarity index 100% rename from annotation/app/schemas/tasks.py rename to annotation/annotation/schemas/tasks.py diff --git a/annotation/app/tags.py b/annotation/annotation/tags.py similarity index 100% rename from annotation/app/tags.py rename to annotation/annotation/tags.py diff --git a/annotation/app/tasks/__init__.py b/annotation/annotation/tasks/__init__.py similarity index 100% rename from annotation/app/tasks/__init__.py rename to annotation/annotation/tasks/__init__.py diff --git a/annotation/app/tasks/resources.py b/annotation/annotation/tasks/resources.py similarity index 97% rename from annotation/app/tasks/resources.py rename to annotation/annotation/tasks/resources.py index 686d7c87b..497fdf1f7 100644 --- a/annotation/app/tasks/resources.py +++ b/annotation/annotation/tasks/resources.py @@ -24,10 +24,10 @@ from sqlalchemy_filters.exceptions import BadFilterFormat from tenant_dependency import TenantData -from app.annotations import accumulate_pages_info, row_to_dict -from app.database import get_db -from app.filters import TaskFilter -from app.jobs import ( +from annotation.annotations import accumulate_pages_info, row_to_dict +from annotation.database import get_db +from annotation.filters import TaskFilter +from annotation.jobs import ( collect_job_names, delete_tasks, get_job, @@ -37,21 +37,21 @@ update_inner_job_status, update_user_overall_load, ) -from app.logger import Logger -from app.microservice_communication.assets_communication import get_file_names -from app.microservice_communication.jobs_communication import ( +from annotation.logger import Logger +from annotation.microservice_communication.assets_communication import get_file_names +from annotation.microservice_communication.jobs_communication import ( JobUpdateException, update_job_status, ) -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( X_CURRENT_TENANT_HEADER, expand_response, ) -from app.microservice_communication.user import ( +from annotation.microservice_communication.user import ( GetUserInfoAccessDenied, get_user_logins, ) -from app.schemas import ( +from annotation.schemas import ( AnnotationStatisticsInputSchema, AnnotationStatisticsResponseSchema, BadRequestErrorSchema, @@ -69,12 +69,12 @@ ValidationEndSchema, ValidationSchema, ) -from app.tags import REVISION_TAG, TASKS_TAG -from app.tasks.validation import ( +from annotation.tags import REVISION_TAG, TASKS_TAG +from annotation.tasks.validation import ( create_annotation_tasks, create_validation_tasks, ) -from app.token_dependency import TOKEN +from annotation.token_dependency import TOKEN from ..models import File, Job, ManualAnnotationTask from .services import ( diff --git a/annotation/app/tasks/services.py b/annotation/annotation/tasks/services.py similarity index 98% rename from annotation/app/tasks/services.py rename to annotation/annotation/tasks/services.py index 65976e12c..8b0744ff2 100644 --- a/annotation/app/tasks/services.py +++ b/annotation/annotation/tasks/services.py @@ -12,14 +12,14 @@ from sqlalchemy.orm import Session from tenant_dependency import TenantData -from app.errors import CheckFieldError, FieldConstraintError -from app.filters import TaskFilter -from app.jobs import update_files, update_user_overall_load -from app.microservice_communication.assets_communication import ( +from annotation.errors import CheckFieldError, FieldConstraintError +from annotation.filters import TaskFilter +from annotation.jobs import update_files, update_user_overall_load +from annotation.microservice_communication.assets_communication import ( get_file_path_and_bucket, ) -from app.microservice_communication.task import get_agreement_score -from app.models import ( +from annotation.microservice_communication.task import get_agreement_score +from annotation.models import ( AgreementMetrics, AnnotatedDoc, AnnotationStatistics, @@ -28,7 +28,7 @@ association_job_annotator, association_job_validator, ) -from app.schemas import ( +from annotation.schemas import ( AgreementScoreComparingResult, AgreementScoreServiceInput, AgreementScoreServiceResponse, diff --git a/annotation/app/tasks/validation.py b/annotation/annotation/tasks/validation.py similarity index 98% rename from annotation/app/tasks/validation.py rename to annotation/annotation/tasks/validation.py index f4fc5dcbd..c04082c53 100644 --- a/annotation/app/tasks/validation.py +++ b/annotation/annotation/tasks/validation.py @@ -7,12 +7,12 @@ from sqlalchemy import and_, asc, null, or_ from sqlalchemy.orm import Session -from app.distribution import prepare_response -from app.microservice_communication.assets_communication import ( +from annotation.distribution import prepare_response +from annotation.microservice_communication.assets_communication import ( FilesForDistribution, ) -from app.models import AnnotatedDoc, Job, User -from app.schemas import ( +from annotation.models import AnnotatedDoc, Job, User +from annotation.schemas import ( AnnotationAndValidationActionsSchema, TaskStatusEnumSchema, ValidationSchema, diff --git a/annotation/app/token_dependency.py b/annotation/annotation/token_dependency.py similarity index 100% rename from annotation/app/token_dependency.py rename to annotation/annotation/token_dependency.py diff --git a/annotation/app/utils.py b/annotation/annotation/utils.py similarity index 100% rename from annotation/app/utils.py rename to annotation/annotation/utils.py diff --git a/annotation/tests/conftest.py b/annotation/tests/conftest.py index 2c32badb7..4e171e140 100644 --- a/annotation/tests/conftest.py +++ b/annotation/tests/conftest.py @@ -20,11 +20,11 @@ import tests.test_validation as validation from alembic import command from alembic.config import Config -from app.annotations import MANIFEST, S3_START_PATH -from app.categories import cache -from app.database import SQLALCHEMY_DATABASE_URL, Base -from app.jobs import update_user_overall_load -from app.models import ( +from annotation.annotations import MANIFEST, S3_START_PATH +from annotation.categories import cache +from annotation.database import SQLALCHEMY_DATABASE_URL, Base +from annotation.jobs import update_user_overall_load +from annotation.models import ( AnnotatedDoc, Category, DocumentLinks, @@ -33,7 +33,7 @@ ManualAnnotationTask, User, ) -from app.schemas import ( +from annotation.schemas import ( AnnotationStatisticsInputSchema, CategoryTypeSchema, FileStatusEnumSchema, @@ -41,8 +41,8 @@ TaskStatusEnumSchema, ValidationSchema, ) -from app.tasks import add_task_stats_record -from app.utils import get_test_db_url +from annotation.tasks import add_task_stats_record +from annotation.utils import get_test_db_url from tests.override_app_dependency import TEST_TENANT from tests.test_annotators_overall_load import ( OVERALL_LOAD_CREATED_TASKS, @@ -958,7 +958,7 @@ def mock_assets_communication( monkeypatch, prepare_db_categories_for_filtration ) -> Session: monkeypatch.setattr( - "app.jobs.resources.get_files_info", + "annotation.jobs.resources.get_files_info", Mock(return_value=[{"file_id": MOCK_ID, "pages_number": 2}]), ) return prepare_db_categories_for_filtration @@ -969,7 +969,7 @@ def mock_db_error_for_job_categories( monkeypatch, prepare_db_categories_for_filtration ) -> Session: monkeypatch.setattr( - "app.jobs.resources.fetch_bunch_categories_db", + "annotation.jobs.resources.fetch_bunch_categories_db", Mock(side_effect=SQLAlchemyError), ) return prepare_db_categories_for_filtration @@ -980,7 +980,7 @@ def mock_db_error_get_job_categories( monkeypatch, prepare_db_categories_for_filtration ) -> Session: monkeypatch.setattr( - "app.main.filter_job_categories", + "annotation.main.filter_job_categories", Mock(side_effect=SQLAlchemyError), ) return prepare_db_categories_for_filtration @@ -1273,7 +1273,7 @@ def mock_exception(*args, **kwargs): @pytest.fixture def mock_minio_empty_bucket(monkeypatch, empty_bucket): monkeypatch.setattr( - "app.annotations.main.connect_s3", + "annotation.annotations.main.connect_s3", Mock(return_value=empty_bucket), ) yield empty_bucket diff --git a/annotation/tests/override_app_dependency.py b/annotation/tests/override_app_dependency.py index 1d4880204..0e143a6f4 100644 --- a/annotation/tests/override_app_dependency.py +++ b/annotation/tests/override_app_dependency.py @@ -13,16 +13,16 @@ from sqlalchemy.orm import sessionmaker from tenant_dependency import TenantData -from app.database import get_db -from app.main import app -from app.microservice_communication.search import ( +from annotation.database import get_db +from annotation.main import app +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.token_dependency import TOKEN -from app.utils import get_test_db_url -from app.database import SQLALCHEMY_DATABASE_URL +from annotation.token_dependency import TOKEN +from annotation.utils import get_test_db_url +from annotation.database import SQLALCHEMY_DATABASE_URL TEST_TOKEN = "token" diff --git a/annotation/tests/test_annotators_overall_load.py b/annotation/tests/test_annotators_overall_load.py index 40e91533b..c10a891ee 100644 --- a/annotation/tests/test_annotators_overall_load.py +++ b/annotation/tests/test_annotators_overall_load.py @@ -6,12 +6,12 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.jobs import update_user_overall_load -from app.main import app -from app.microservice_communication.assets_communication import ( +from annotation.jobs import update_user_overall_load +from annotation.main import app +from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ) -from app.models import ( +from annotation.models import ( AnnotatedDoc, Category, File, @@ -19,7 +19,7 @@ ManualAnnotationTask, User, ) -from app.schemas import ( +from annotation.schemas import ( CategoryTypeSchema, FileStatusEnumSchema, JobStatusEnumSchema, @@ -465,7 +465,7 @@ def test_overall_load_after_distribution( monkeypatch, prepare_db_for_overall_load ): monkeypatch.setattr( - "app.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=[{"id": 3, "pages": 4}]), ) response = client.post( @@ -617,7 +617,7 @@ def test_overall_load_recalculation_when_add_users( when adding or deleting users""" session = prepare_db_for_overall_load monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value={job_id: "JobName"}), ) response = client.patch( @@ -669,7 +669,7 @@ def test_overall_load_recalculation_when_delete_users( ): session = prepare_db_for_overall_load monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value={job_id: "JobName"}), ) response = client.patch( diff --git a/annotation/tests/test_assets_communication.py b/annotation/tests/test_assets_communication.py index 4dc82edcb..1cb4e8dd4 100644 --- a/annotation/tests/test_assets_communication.py +++ b/annotation/tests/test_assets_communication.py @@ -2,7 +2,7 @@ import pytest import responses -from app.microservice_communication.assets_communication import ( +from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ASSETS_URL, get_dataset_info, @@ -126,7 +126,7 @@ def test_get_file_names( monkeypatch, file_ids, parsed_response, expected_result ): monkeypatch.setattr( - "app.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=parsed_response), ) @@ -215,7 +215,7 @@ def test_get_files_info( expected_result, ): monkeypatch.setattr( - "app.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=mocked_files), ) for i, dataset_id in enumerate(dataset_ids): diff --git a/annotation/tests/test_category_crud.py b/annotation/tests/test_category_crud.py index 5f38f3d4b..b47c98bdb 100644 --- a/annotation/tests/test_category_crud.py +++ b/annotation/tests/test_category_crud.py @@ -9,7 +9,7 @@ from pytest import fixture, mark from sqlalchemy.exc import IntegrityError, SQLAlchemyError -from app.models import Category +from annotation.models import Category from tests.consts import CATEGORIES_PATH from tests.override_app_dependency import TEST_HEADERS, app @@ -177,7 +177,7 @@ def add_for_cascade_delete( @mark.integration -@patch("app.categories.resources.add_category_db", side_effect=SQLAlchemyError) +@patch("annotation.categories.resources.add_category_db", side_effect=SQLAlchemyError) def test_add_db_connection_error(prepare_db_categories_different_names): data = prepare_category_body() response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) @@ -373,7 +373,7 @@ def test_add_self_parent(prepare_db_categories_different_names): @mark.integration @patch( - "app.categories.resources.fetch_category_db", side_effect=SQLAlchemyError + "annotation.categories.resources.fetch_category_db", side_effect=SQLAlchemyError ) def test_get_db_connection_error(prepare_db_categories_same_names): cat_id = 1 @@ -426,7 +426,7 @@ def test_get_no_tenant_specified(prepare_db_categories_same_names): @mark.integration @patch( - "app.categories.resources.filter_category_db", side_effect=SQLAlchemyError + "annotation.categories.resources.filter_category_db", side_effect=SQLAlchemyError ) def test_search_db_connection_error(prepare_db_categories_for_filtration): data = prepare_filtration_body() @@ -653,7 +653,7 @@ def test_search_wrong_parameters( @mark.integration @patch( - "app.categories.resources.update_category_db", side_effect=SQLAlchemyError + "annotation.categories.resources.update_category_db", side_effect=SQLAlchemyError ) def test_update_db_connection_error(prepare_db_categories_different_names): cat_id = 1 @@ -815,7 +815,7 @@ def test_update_allowed_parent( @mark.integration @patch( - "app.categories.resources.delete_category_db", side_effect=SQLAlchemyError + "annotation.categories.resources.delete_category_db", side_effect=SQLAlchemyError ) def test_delete_db_connection_error(prepare_db_categories_same_names): cat_id = "1" diff --git a/annotation/tests/test_compare_scores.py b/annotation/tests/test_compare_scores.py index 7c164854b..11a0642a5 100644 --- a/annotation/tests/test_compare_scores.py +++ b/annotation/tests/test_compare_scores.py @@ -2,12 +2,12 @@ import pytest -from app.schemas.tasks import ( +from annotation.schemas.tasks import ( AgreementScoreComparingResult, AgreementScoreServiceResponse, TaskMetric, ) -from app.tasks.services import compare_agreement_scores +from annotation.tasks.services import compare_agreement_scores min_match_1 = 0.8 case_1 = [ diff --git a/annotation/tests/test_cross_validation.py b/annotation/tests/test_cross_validation.py index 00de50d37..e52b85cce 100644 --- a/annotation/tests/test_cross_validation.py +++ b/annotation/tests/test_cross_validation.py @@ -3,13 +3,13 @@ import pytest -from app.distribution import ( +from annotation.distribution import ( distribute_validation_partial_files, distribute_whole_files, ) -from app.errors import FieldConstraintError -from app.jobs import check_annotators, check_validators -from app.schemas import TaskStatusEnumSchema, ValidationSchema +from annotation.errors import FieldConstraintError +from annotation.jobs import check_annotators, check_validators +from annotation.schemas import TaskStatusEnumSchema, ValidationSchema from tests.test_distribution import JOB_ID TASKS_STATUS = TaskStatusEnumSchema.pending diff --git a/annotation/tests/test_delete_batch_tasks.py b/annotation/tests/test_delete_batch_tasks.py index b184ed175..3a321ab10 100644 --- a/annotation/tests/test_delete_batch_tasks.py +++ b/annotation/tests/test_delete_batch_tasks.py @@ -4,9 +4,9 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from app.annotations import row_to_dict -from app.models import Category, File, Job, ManualAnnotationTask, User -from app.schemas import ( +from annotation.annotations import row_to_dict +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import ( CategoryTypeSchema, TaskStatusEnumSchema, ValidationSchema, diff --git a/annotation/tests/test_distribution.py b/annotation/tests/test_distribution.py index 4bce1113d..1043f6d87 100644 --- a/annotation/tests/test_distribution.py +++ b/annotation/tests/test_distribution.py @@ -3,7 +3,7 @@ import pytest -from app.distribution import ( +from annotation.distribution import ( add_unassigned_file, calculate_users_load, distribute_annotation_partial_files, @@ -14,12 +14,12 @@ find_unassigned_files, find_unassigned_pages, ) -from app.distribution.main import distribute_tasks_extensively -from app.microservice_communication.assets_communication import ( +from annotation.distribution.main import distribute_tasks_extensively +from annotation.microservice_communication.assets_communication import ( prepare_files_for_distribution, ) -from app.models import File -from app.schemas import FileStatusEnumSchema, TaskStatusEnumSchema +from annotation.models import File +from annotation.schemas import FileStatusEnumSchema, TaskStatusEnumSchema from tests.override_app_dependency import TEST_TENANT JOB_ID = 1 diff --git a/annotation/tests/test_finish_task.py b/annotation/tests/test_finish_task.py index 14183a68f..9b13b6836 100644 --- a/annotation/tests/test_finish_task.py +++ b/annotation/tests/test_finish_task.py @@ -11,8 +11,8 @@ from sqlalchemy.exc import DBAPIError, SQLAlchemyError from sqlalchemy.orm import Session -from app.annotations import accumulate_pages_info, row_to_dict -from app.models import ( +from annotation.annotations import accumulate_pages_info, row_to_dict +from annotation.models import ( AgreementMetrics, AnnotatedDoc, Category, @@ -21,7 +21,7 @@ ManualAnnotationTask, User, ) -from app.schemas import ( +from annotation.schemas import ( AgreementScoreServiceResponse, CategoryTypeSchema, FileStatusEnumSchema, @@ -29,7 +29,7 @@ TaskStatusEnumSchema, ValidationSchema, ) -from app.tasks import get_task_revisions +from annotation.tasks import get_task_revisions from tests.consts import FINISH_TASK_PATH from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app @@ -870,7 +870,7 @@ def test_finish_tasks_failed_validation_statuses( } accumulate_pages = set(), failed, set(), set(), None monkeypatch.setattr( - "app.annotations.main.accumulate_pages_info", + "annotation.annotations.main.accumulate_pages_info", Mock(return_value=accumulate_pages), ) responses.add( @@ -917,7 +917,7 @@ def test_finish_tasks_reannotation_statuses( } accumulate_pages = set(), set(), annotated, set(), None monkeypatch.setattr( - "app.annotations.main.accumulate_pages_info", + "annotation.annotations.main.accumulate_pages_info", Mock(return_value=accumulate_pages), ) responses.add( @@ -1095,8 +1095,8 @@ def test_finish_task_should_work_with_all_pages_covered_extensively_twice( assert validation_task.status == TaskStatusEnumSchema.ready -@patch("app.tasks.services.AGREEMENT_SCORE_MIN_MATCH", 0.7) -@patch("app.tasks.resources.AGREEMENT_SCORE_ENABLED", "true") +@patch("annotation.tasks.services.AGREEMENT_SCORE_MIN_MATCH", 0.7) +@patch("annotation.tasks.resources.AGREEMENT_SCORE_ENABLED", "true") def test_finish_task_with_agreement_score_enabled_score_matched( prepare_db_with_extensive_coverage_annotations, ): @@ -1112,14 +1112,14 @@ def test_finish_task_with_agreement_score_enabled_score_matched( db.commit() with patch( - "app.tasks.services.get_agreement_score", + "annotation.tasks.services.get_agreement_score", return_value=AGREEMENT_SCORE_RESPONSE, ) as mock1: with patch( - "app.tasks.services.get_file_path_and_bucket", + "annotation.tasks.services.get_file_path_and_bucket", return_value=("", ""), ) as mock2: - with patch("app.tasks.resources.update_job_status") as mock4: + with patch("annotation.tasks.resources.update_job_status") as mock4: response = client.post( FINISH_TASK_PATH.format(task_id=annotation_tasks[2]["id"]), headers=TEST_HEADERS, @@ -1145,8 +1145,8 @@ def test_finish_task_with_agreement_score_enabled_score_matched( assert db.query(AgreementMetrics).count() == 6 -@patch("app.tasks.services.AGREEMENT_SCORE_MIN_MATCH", 0.99) -@patch("app.tasks.resources.AGREEMENT_SCORE_ENABLED", "true") +@patch("annotation.tasks.services.AGREEMENT_SCORE_MIN_MATCH", 0.99) +@patch("annotation.tasks.resources.AGREEMENT_SCORE_ENABLED", "true") def test_finish_task_with_agreement_score_enabled_score_not_matched( prepare_db_with_extensive_coverage_annotations, ): @@ -1162,14 +1162,14 @@ def test_finish_task_with_agreement_score_enabled_score_not_matched( db.commit() with patch( - "app.tasks.services.get_agreement_score", + "annotation.tasks.services.get_agreement_score", return_value=AGREEMENT_SCORE_RESPONSE, ) as mock1: with patch( - "app.tasks.services.get_file_path_and_bucket", + "annotation.tasks.services.get_file_path_and_bucket", return_value=("", ""), ) as mock2: - with patch("app.tasks.resources.update_job_status") as mock4: + with patch("annotation.tasks.resources.update_job_status") as mock4: response = client.post( FINISH_TASK_PATH.format(task_id=annotation_tasks[2]["id"]), headers=TEST_HEADERS, @@ -1194,7 +1194,7 @@ def test_finish_task_with_agreement_score_enabled_score_not_matched( assert job.status == JobStatusEnumSchema.in_progress -@patch("app.tasks.services.AGREEMENT_SCORE_MIN_MATCH", 0.5) +@patch("annotation.tasks.services.AGREEMENT_SCORE_MIN_MATCH", 0.5) @patch.dict(os.environ, {"AGREEMENT_SCORE_ENABLED": "true"}) def test_finish_task_with_agreement_score_enabled_annotation_not_finished( prepare_db_with_extensive_coverage_annotations_same_pages, @@ -1211,14 +1211,14 @@ def test_finish_task_with_agreement_score_enabled_annotation_not_finished( db.commit() with patch( - "app.tasks.services.get_agreement_score", + "annotation.tasks.services.get_agreement_score", return_value=AGREEMENT_SCORE_RESPONSE, ) as mock1: with patch( - "app.tasks.services.get_file_path_and_bucket", + "annotation.tasks.services.get_file_path_and_bucket", return_value=("", ""), ) as mock2: - with patch("app.tasks.resources.update_job_status") as mock4: + with patch("annotation.tasks.resources.update_job_status") as mock4: response = client.post( FINISH_TASK_PATH.format(task_id=annotation_tasks[2]["id"]), headers=TEST_HEADERS, diff --git a/annotation/tests/test_get_accumulated_revisions.py b/annotation/tests/test_get_accumulated_revisions.py index 03371b581..840593605 100644 --- a/annotation/tests/test_get_accumulated_revisions.py +++ b/annotation/tests/test_get_accumulated_revisions.py @@ -4,13 +4,13 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from app.annotations import LATEST -from app.microservice_communication.search import ( +from annotation.annotations import LATEST +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import AnnotatedDoc, User +from annotation.models import AnnotatedDoc, User from tests.consts import ANNOTATION_PATH from tests.override_app_dependency import TEST_TOKEN, app from tests.test_post_annotation import POST_ANNOTATION_PG_DOC @@ -341,7 +341,7 @@ def test_get_annotation_for_latest_revision_status_codes( expected_response, ): monkeypatch.setattr( - "app.annotations.main.connect_s3", + "annotation.annotations.main.connect_s3", Mock(return_value=minio_accumulate_revisions), ) params = {"page_numbers": page_numbers} diff --git a/annotation/tests/test_get_annotation_for_particular_revision.py b/annotation/tests/test_get_annotation_for_particular_revision.py index a377d6939..f05b2defe 100644 --- a/annotation/tests/test_get_annotation_for_particular_revision.py +++ b/annotation/tests/test_get_annotation_for_particular_revision.py @@ -4,12 +4,12 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import AnnotatedDoc, User +from annotation.models import AnnotatedDoc, User from tests.consts import ANNOTATION_PATH from tests.override_app_dependency import TEST_TENANT, TEST_TOKEN, app @@ -128,7 +128,7 @@ def test_get_annotation_for_particular_revision_status_codes( expected_response, ): monkeypatch.setattr( - "app.annotations.main.connect_s3", + "annotation.annotations.main.connect_s3", Mock(return_value=minio_particular_revision), ) response = client.get( diff --git a/annotation/tests/test_get_child_categories.py b/annotation/tests/test_get_child_categories.py index 9724d8612..f8da46ccc 100644 --- a/annotation/tests/test_get_child_categories.py +++ b/annotation/tests/test_get_child_categories.py @@ -7,16 +7,16 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.microservice_communication.assets_communication import ( +from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ) -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import Category -from app.schemas import CategoryTypeSchema +from annotation.models import Category +from annotation.schemas import CategoryTypeSchema from tests.consts import CATEGORIES_PATH, POST_JOBS_PATH from tests.override_app_dependency import ( TEST_HEADERS, diff --git a/annotation/tests/test_get_entities_status.py b/annotation/tests/test_get_entities_status.py index 779d74f47..dd96fd74e 100644 --- a/annotation/tests/test_get_entities_status.py +++ b/annotation/tests/test_get_entities_status.py @@ -1,7 +1,7 @@ import pytest from fastapi.testclient import TestClient -from app.schemas import EntitiesStatusesSchema, TaskStatusEnumSchema +from annotation.schemas import EntitiesStatusesSchema, TaskStatusEnumSchema from tests.override_app_dependency import TEST_HEADERS, app client = TestClient(app) diff --git a/annotation/tests/test_get_job.py b/annotation/tests/test_get_job.py index 7c3bf0fcd..785ab0c80 100644 --- a/annotation/tests/test_get_job.py +++ b/annotation/tests/test_get_job.py @@ -5,14 +5,14 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.jobs import collect_job_names -from app.microservice_communication.search import ( +from annotation.jobs import collect_job_names +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import Category, File, Job, User -from app.schemas import FileStatusEnumSchema, ValidationSchema +from annotation.models import Category, File, Job, User +from annotation.schemas import FileStatusEnumSchema, ValidationSchema from tests.consts import ANNOTATION_PATH from tests.override_app_dependency import TEST_TOKEN, app @@ -252,7 +252,7 @@ def test_get_jobs_name(monkeypatch, prepare_db_for_get_job): session = prepare_db_for_get_job job_ids = [1, 2, 3] monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value={3: "JobNameFromJobsMicroservice"}), ) expected_result = { diff --git a/annotation/tests/test_get_job_files.py b/annotation/tests/test_get_job_files.py index eec7228c4..ceb4788bc 100644 --- a/annotation/tests/test_get_job_files.py +++ b/annotation/tests/test_get_job_files.py @@ -5,13 +5,13 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import Category, File, Job, User -from app.schemas import ( +from annotation.models import Category, File, Job, User +from annotation.schemas import ( CategoryTypeSchema, FileStatusEnumSchema, ValidationSchema, diff --git a/annotation/tests/test_get_job_progress.py b/annotation/tests/test_get_job_progress.py index 4e6153bb6..6a8cd27da 100644 --- a/annotation/tests/test_get_job_progress.py +++ b/annotation/tests/test_get_job_progress.py @@ -1,13 +1,13 @@ import pytest from fastapi.testclient import TestClient -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import Category, File, Job, User -from app.schemas import ( +from annotation.models import Category, File, Job, User +from annotation.schemas import ( CategoryTypeSchema, FileStatusEnumSchema, TaskStatusEnumSchema, diff --git a/annotation/tests/test_get_jobs_info_by_files.py b/annotation/tests/test_get_jobs_info_by_files.py index 16c2ba8c6..791f1dd97 100644 --- a/annotation/tests/test_get_jobs_info_by_files.py +++ b/annotation/tests/test_get_jobs_info_by_files.py @@ -4,9 +4,9 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from app.jobs.services import get_jobs_by_files -from app.models import File, Job, User -from app.schemas import JobStatusEnumSchema, ValidationSchema +from annotation.jobs.services import get_jobs_by_files +from annotation.models import File, Job, User +from annotation.schemas import JobStatusEnumSchema, ValidationSchema from tests.consts import POST_JOBS_PATH from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app @@ -209,7 +209,7 @@ def test_get_jobs_by_file( ): db = db_get_jobs_info_by_files monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value=JOB_NAMES), ) @@ -286,7 +286,7 @@ def test_get_jobs_info_by_files( monkeypatch, db_get_jobs_info_by_files, file_ids, expected_result ): monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value=JOB_NAMES), ) response = client.get( @@ -309,7 +309,7 @@ def test_get_jobs_info_by_files( ) def test_get_jobs_info_by_files_db_errors(db_errors, monkeypatch): monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value=JOB_NAMES), ) diff --git a/annotation/tests/test_get_pages_info.py b/annotation/tests/test_get_pages_info.py index a5836d506..5aa92c869 100644 --- a/annotation/tests/test_get_pages_info.py +++ b/annotation/tests/test_get_pages_info.py @@ -5,15 +5,15 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.annotations import accumulate_pages_info -from app.microservice_communication.search import ( +from annotation.annotations import accumulate_pages_info +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import AnnotatedDoc, File, Job, ManualAnnotationTask, User -from app.schemas import TaskStatusEnumSchema, ValidationSchema -from app.tasks import get_task_revisions +from annotation.models import AnnotatedDoc, File, Job, ManualAnnotationTask, User +from annotation.schemas import TaskStatusEnumSchema, ValidationSchema +from annotation.tasks import get_task_revisions from tests.consts import CRUD_TASKS_PATH from tests.override_app_dependency import TEST_TENANT, TEST_TOKEN, app diff --git a/annotation/tests/test_get_revisions.py b/annotation/tests/test_get_revisions.py index 7a3e686c7..8834f8b82 100644 --- a/annotation/tests/test_get_revisions.py +++ b/annotation/tests/test_get_revisions.py @@ -7,8 +7,8 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.annotations import S3_START_PATH -from app.models import DocumentLinks +from annotation.annotations import S3_START_PATH +from annotation.models import DocumentLinks from tests.consts import ANNOTATION_PATH from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app @@ -612,7 +612,7 @@ def test_get_latest_revision_by_user( expected_response_key, ): monkeypatch.setattr( - "app.annotations.main.connect_s3", + "annotation.annotations.main.connect_s3", Mock(return_value=prepare_moto_s3_for_get_revisions), ) response = client.get( @@ -732,7 +732,7 @@ def test_get_all_revisions( expected_response_key, ): monkeypatch.setattr( - "app.annotations.main.connect_s3", + "annotation.annotations.main.connect_s3", Mock(return_value=prepare_moto_s3_for_get_revisions), ) response = client.get( @@ -752,7 +752,7 @@ def test_get_annotation_with_similarity( prepare_db_for_get_revisions_similar: DocumentLinks, ) -> None: monkeypatch.setattr( - "app.annotations.main.connect_s3", + "annotation.annotations.main.connect_s3", Mock(return_value=prepare_moto_s3_for_get_revisions), ) response = client.get( diff --git a/annotation/tests/test_get_revisions_without_annotation.py b/annotation/tests/test_get_revisions_without_annotation.py index d411a17f5..206b0fdce 100644 --- a/annotation/tests/test_get_revisions_without_annotation.py +++ b/annotation/tests/test_get_revisions_without_annotation.py @@ -2,13 +2,13 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import Category, File, Job, ManualAnnotationTask, User -from app.schemas import ( +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import ( CategoryTypeSchema, TaskStatusEnumSchema, ValidationSchema, diff --git a/annotation/tests/test_get_unassigned_files.py b/annotation/tests/test_get_unassigned_files.py index 6a2612b7b..0ee707bf3 100644 --- a/annotation/tests/test_get_unassigned_files.py +++ b/annotation/tests/test_get_unassigned_files.py @@ -5,8 +5,8 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.models import Category, File, Job, User -from app.schemas import CategoryTypeSchema, ValidationSchema +from annotation.models import Category, File, Job, User +from annotation.schemas import CategoryTypeSchema, ValidationSchema from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app client = TestClient(app) diff --git a/annotation/tests/test_get_users_for_job.py b/annotation/tests/test_get_users_for_job.py index 876fac740..e44473316 100644 --- a/annotation/tests/test_get_users_for_job.py +++ b/annotation/tests/test_get_users_for_job.py @@ -2,8 +2,8 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from app.models import Job, User -from app.schemas import ValidationSchema +from annotation.models import Job, User +from annotation.schemas import ValidationSchema from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app client = TestClient(app) diff --git a/annotation/tests/test_job_categories.py b/annotation/tests/test_job_categories.py index 10333f24c..d7522ccda 100644 --- a/annotation/tests/test_job_categories.py +++ b/annotation/tests/test_job_categories.py @@ -6,13 +6,13 @@ from pytest import mark from sqlalchemy.orm import Session -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import Category, Job -from app.schemas import JobTypeEnumSchema, ValidationSchema +from annotation.models import Category, Job +from annotation.schemas import JobTypeEnumSchema, ValidationSchema from tests.consts import POST_JOBS_PATH from tests.override_app_dependency import ( TEST_HEADERS, diff --git a/annotation/tests/test_microservices_search.py b/annotation/tests/test_microservices_search.py index aa651fcbb..c476d6f0f 100644 --- a/annotation/tests/test_microservices_search.py +++ b/annotation/tests/test_microservices_search.py @@ -1,18 +1,18 @@ import pytest import responses -from app.microservice_communication.assets_communication import ( +from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ) -from app.microservice_communication.jobs_communication import JOBS_SEARCH_URL -from app.microservice_communication.search import ( +from annotation.microservice_communication.jobs_communication import JOBS_SEARCH_URL +from annotation.microservice_communication.search import ( PAGE_SIZE, calculate_amount_of_pagination_pages, construct_search_params, expand_response, get_response, ) -from app.models import ManualAnnotationTask -from app.schemas import ( +from annotation.models import ManualAnnotationTask +from annotation.schemas import ( ExpandedManualAnnotationTaskSchema, TaskStatusEnumSchema, ) diff --git a/annotation/tests/test_post.py b/annotation/tests/test_post.py index 447bac2a7..9f2b2edf8 100644 --- a/annotation/tests/test_post.py +++ b/annotation/tests/test_post.py @@ -7,9 +7,9 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.microservice_communication.assets_communication import ASSETS_URL -from app.models import Category, File, Job, ManualAnnotationTask, User -from app.schemas import CategoryTypeSchema, ValidationSchema +from annotation.microservice_communication.assets_communication import ASSETS_URL +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import CategoryTypeSchema, ValidationSchema from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app client = TestClient(app) @@ -443,7 +443,7 @@ def test_post_tasks_empty_files_and_datasets_error( @patch.object(Session, "query") def test_post_tasks_exception(Session, monkeypatch, prepare_db_for_post): monkeypatch.setattr( - "app.jobs.resources.get_files_info", + "annotation.jobs.resources.get_files_info", Mock(return_value=FILES_FROM_ASSETS_FOR_TASK_INFO[0]), ) Session.side_effect = Mock(side_effect=SQLAlchemyError()) @@ -472,7 +472,7 @@ def test_post_tasks_only_files( expected_tasks_number, ): monkeypatch.setattr( - "app.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=returned_files), ) response = client.post( @@ -519,7 +519,7 @@ def test_post_tasks_new_user(monkeypatch, prepare_db_for_post): TASK_INFO_NEW_USER["user_ids"][1] ) monkeypatch.setattr( - "app.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=FILES_FROM_ASSETS_FOR_TASK_INFO_NEW_USER), ) response = client.post( @@ -581,7 +581,7 @@ def test_post_tasks_deadline( assets_files, ): monkeypatch.setattr( - "app.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=assets_files), ) response = client.post( @@ -596,7 +596,7 @@ def test_post_tasks_deadline( @pytest.mark.integration def test_post_tasks_validation_only(monkeypatch, prepare_db_for_post): monkeypatch.setattr( - "app.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=[FILES_FROM_ASSETS_FOR_TASK_INFO[2][0]]), ) tasks_info = { @@ -629,7 +629,7 @@ def test_post_tasks_wrong_files( returned_files, ): monkeypatch.setattr( - "app.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=returned_files), ) response = client.post( @@ -686,7 +686,7 @@ def test_post_tasks_users_validation_error( assets_files, ): monkeypatch.setattr( - "app.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=assets_files), ) response = client.post( diff --git a/annotation/tests/test_post_annotation.py b/annotation/tests/test_post_annotation.py index 2b2df2c73..a9714671d 100644 --- a/annotation/tests/test_post_annotation.py +++ b/annotation/tests/test_post_annotation.py @@ -12,7 +12,7 @@ from sqlalchemy.exc import DBAPIError, SQLAlchemyError from sqlalchemy.orm import Session -from app.annotations import ( +from annotation.annotations import ( MANIFEST, check_task_pages, construct_annotated_doc, @@ -20,21 +20,21 @@ get_pages_sha, row_to_dict, ) -from app.annotations.main import ( +from annotation.annotations.main import ( check_docs_identity, upload_json_to_minio, upload_pages_to_minio, ) -from app.kafka_client import producers -from app.microservice_communication.assets_communication import ( +from annotation.kafka_client import producers +from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ) -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import ( +from annotation.models import ( AnnotatedDoc, Category, File, @@ -42,7 +42,7 @@ ManualAnnotationTask, User, ) -from app.schemas import ( +from annotation.schemas import ( CategoryTypeSchema, DocForSaveSchema, JobTypeEnumSchema, @@ -1066,7 +1066,7 @@ def delete_date_fields(annotated_docs: List[dict]) -> None: # in annotation task ], ) -@patch("app.annotations.main.KafkaProducer", Mock) +@patch("annotation.annotations.main.KafkaProducer", Mock) @responses.activate def test_post_annotation_by_user_status_codes( mock_minio_empty_bucket, @@ -1181,7 +1181,7 @@ def test_post_annotation_by_user_status_codes( ), # if something wrong with assets ], ) -@patch("app.annotations.main.KafkaProducer", Mock) +@patch("annotation.annotations.main.KafkaProducer", Mock) @responses.activate def test_post_annotation_by_pipeline_status_codes( mock_minio_empty_bucket, @@ -1240,7 +1240,7 @@ def test_post_annotation_by_pipeline_status_codes( ), # if pages, failed and validated not provided ], ) -@patch("app.annotations.main.KafkaProducer", Mock) +@patch("annotation.annotations.main.KafkaProducer", Mock) @responses.activate def test_post_annotation_by_user_status_codes_with_existing_doc( mock_minio_empty_bucket, @@ -2190,7 +2190,7 @@ def test_construct_annotated_doc_different_jobs_and_files( ], ) @pytest.mark.skip -@patch("app.annotations.main.KafkaProducer", Mock) +@patch("annotation.annotations.main.KafkaProducer", Mock) @responses.activate def test_post_annotation_by_user( mock_minio_empty_bucket, @@ -2223,7 +2223,7 @@ def test_post_annotation_by_user( @pytest.mark.skip @pytest.mark.integration -@patch("app.annotations.main.KafkaProducer", Mock) +@patch("annotation.annotations.main.KafkaProducer", Mock) @responses.activate def test_post_annotation_by_pipeline( mock_minio_empty_bucket, @@ -2260,7 +2260,7 @@ def test_post_annotation_by_pipeline( @pytest.mark.integration -@patch("app.annotations.main.KafkaProducer", Mock) +@patch("annotation.annotations.main.KafkaProducer", Mock) @responses.activate def test_post_annotation_by_pipeline_two_eq_revs_in_a_row( mock_minio_empty_bucket, prepare_db_for_post_annotation @@ -2342,7 +2342,7 @@ def test_check_task_pages(pages, validated, failed, task_pages): @pytest.mark.skip @pytest.mark.integration -@patch("app.annotations.main.KafkaProducer", Mock) +@patch("annotation.annotations.main.KafkaProducer", Mock) @responses.activate def test_post_annotation_by_user_assign_similar_doc( mock_minio_empty_bucket, @@ -2399,7 +2399,7 @@ def test_post_annotation_by_user_assign_similar_doc( @pytest.mark.skip @pytest.mark.integration -@patch("app.annotations.main.KafkaProducer", Mock) +@patch("annotation.annotations.main.KafkaProducer", Mock) @responses.activate @pytest.mark.parametrize( ("revision", "label"), @@ -2474,7 +2474,7 @@ def test_post_annotation_by_user_similar_doc_no_category( (ANNOTATION_VALIDATION_TASKS[5], DOC_FOR_SAVE_USER_ONLY_VALIDATED), ], ) -@patch("app.annotations.main.KafkaProducer", Mock) +@patch("annotation.annotations.main.KafkaProducer", Mock) @responses.activate def test_post_user_annotation_change_task_statuses( mock_minio_empty_bucket, diff --git a/annotation/tests/test_post_job.py b/annotation/tests/test_post_job.py index fad55527e..812824d2c 100644 --- a/annotation/tests/test_post_job.py +++ b/annotation/tests/test_post_job.py @@ -7,13 +7,13 @@ from sqlalchemy.ext.declarative.api import DeclarativeMeta from sqlalchemy.orm import Session -from app.annotations import row_to_dict -from app.jobs import get_job_attributes_for_post -from app.microservice_communication.assets_communication import ( +from annotation.annotations import row_to_dict +from annotation.jobs import get_job_attributes_for_post +from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ASSETS_URL, ) -from app.models import ( +from annotation.models import ( Category, File, Job, @@ -21,7 +21,7 @@ User, association_job_annotator, ) -from app.schemas import ( +from annotation.schemas import ( CategoryTypeSchema, JobStatusEnumSchema, JobTypeEnumSchema, diff --git a/annotation/tests/test_post_next_task.py b/annotation/tests/test_post_next_task.py index f332a8552..6592d5244 100644 --- a/annotation/tests/test_post_next_task.py +++ b/annotation/tests/test_post_next_task.py @@ -3,17 +3,17 @@ import pytest import responses -from app.microservice_communication.assets_communication import ( +from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ) -from app.microservice_communication.search import ( +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.microservice_communication.user import USERS_SEARCH_URL -from app.models import Category, File, Job, ManualAnnotationTask, User -from app.schemas import ( +from annotation.microservice_communication.user import USERS_SEARCH_URL +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import ( CategoryTypeSchema, TaskStatusEnumSchema, ValidationSchema, diff --git a/annotation/tests/test_post_unassgined_files.py b/annotation/tests/test_post_unassgined_files.py index 667859890..01c15e343 100644 --- a/annotation/tests/test_post_unassgined_files.py +++ b/annotation/tests/test_post_unassgined_files.py @@ -3,14 +3,14 @@ from sqlalchemy.exc import DBAPIError, SQLAlchemyError from sqlalchemy.sql.elements import not_ -from app.annotations import row_to_dict -from app.microservice_communication.search import ( +from annotation.annotations import row_to_dict +from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, ) -from app.models import File, Job, ManualAnnotationTask, User -from app.schemas import ( +from annotation.models import File, Job, ManualAnnotationTask, User +from annotation.schemas import ( FileStatusEnumSchema, TaskStatusEnumSchema, ValidationSchema, diff --git a/annotation/tests/test_search_kafka.py b/annotation/tests/test_search_kafka.py index e5744a356..6d7c56f89 100644 --- a/annotation/tests/test_search_kafka.py +++ b/annotation/tests/test_search_kafka.py @@ -1,13 +1,13 @@ from unittest import mock import responses -from app.annotations import add_search_annotation_producer -from app.kafka_client import producers -from app.microservice_communication.assets_communication import ( +from annotation.annotations import add_search_annotation_producer +from annotation.kafka_client import producers +from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ) -from app.models import Category, File, Job, ManualAnnotationTask, User -from app.schemas import ( +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import ( CategoryTypeSchema, JobStatusEnumSchema, TaskStatusEnumSchema, @@ -115,7 +115,7 @@ def test_kafka_connection_error(monkeypatch): is correctly handled and no producers added to KAFKA_PRODUCERS. """ monkeypatch.setattr( - "app.annotations.main.KafkaProducer", + "annotation.annotations.main.KafkaProducer", mock.Mock(side_effect=NoBrokersAvailable()), ) add_search_annotation_producer() @@ -130,8 +130,8 @@ def __init__(self, bootstrap_servers, client_id, value_serializer): @mark.unittest -@mock.patch(target="app.annotations.main.KAFKA_BOOTSTRAP_SERVER", new="url_1") -@mock.patch(target="app.annotations.main.KafkaProducer", new=MockProducer) +@mock.patch(target="annotation.annotations.main.KAFKA_BOOTSTRAP_SERVER", new="url_1") +@mock.patch(target="annotation.annotations.main.KafkaProducer", new=MockProducer) def test_add_search_annotation_producer(monkeypatch): """Checks that "add_search_annotation_producer" function calls "_init_search_annotation_producer" which creates KafkaProducer with @@ -150,7 +150,7 @@ def test_producer_startup_creation(monkeypatch): """Checks that producer creation automatically called on app startup.""" mock_startup = mock.Mock() monkeypatch.setattr( - "app.annotations.main._init_search_annotation_producer", mock_startup + "annotation.annotations.main._init_search_annotation_producer", mock_startup ) with TestClient(app): mock_startup.assert_called_once() @@ -168,8 +168,8 @@ def test_producer_startup_creation(monkeypatch): (f"{ANNOTATION_KAFKA_TASK_ID}", DOC_FOR_SAVE_BY_USER), ], ) -@mock.patch(target="app.annotations.main.KAFKA_SEARCH_TOPIC", new="test") -@mock.patch(target="app.annotations.main.KafkaProducer", new=mock.Mock()) +@mock.patch(target="annotation.annotations.main.KAFKA_SEARCH_TOPIC", new="test") +@mock.patch(target="annotation.annotations.main.KafkaProducer", new=mock.Mock()) def test_post_annotation_send_message( monkeypatch, empty_bucket, @@ -180,7 +180,7 @@ def test_post_annotation_send_message( """Tests that producer sent correct message when pipeline or user posts new annotation.""" monkeypatch.setattr( - "app.annotations.main.connect_s3", + "annotation.annotations.main.connect_s3", mock.Mock(return_value=empty_bucket), ) responses.add( diff --git a/annotation/tests/test_start_job.py b/annotation/tests/test_start_job.py index 038d13e5a..3da8691bb 100644 --- a/annotation/tests/test_start_job.py +++ b/annotation/tests/test_start_job.py @@ -7,10 +7,10 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.annotations import row_to_dict -from app.jobs import update_inner_job_status -from app.models import Category, File, Job, ManualAnnotationTask, User -from app.schemas import ( +from annotation.annotations import row_to_dict +from annotation.jobs import update_inner_job_status +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import ( CategoryTypeSchema, JobStatusEnumSchema, TaskStatusEnumSchema, diff --git a/annotation/tests/test_tasks_crud_cr.py b/annotation/tests/test_tasks_crud_cr.py index fde08c995..9e2353c68 100644 --- a/annotation/tests/test_tasks_crud_cr.py +++ b/annotation/tests/test_tasks_crud_cr.py @@ -10,13 +10,13 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from app.microservice_communication.assets_communication import ( +from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ) -from app.microservice_communication.jobs_communication import JOBS_SEARCH_URL -from app.microservice_communication.user import USERS_SEARCH_URL -from app.models import Category, File, Job, ManualAnnotationTask, User -from app.schemas import CategoryTypeSchema, ValidationSchema +from annotation.microservice_communication.jobs_communication import JOBS_SEARCH_URL +from annotation.microservice_communication.user import USERS_SEARCH_URL +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import CategoryTypeSchema, ValidationSchema from tests.consts import CRUD_TASKS_PATH from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from tests.test_post import check_files_distributed_pages @@ -1302,7 +1302,7 @@ def prepare_filtration_body_double_filter( @pytest.mark.integration -@patch("app.tasks.resources.filter_tasks_db", side_effect=SQLAlchemyError) +@patch("annotation.tasks.resources.filter_tasks_db", side_effect=SQLAlchemyError) def test_search_tasks_500_error(prepare_db_for_cr_task): data = prepare_filtration_body() response = client.post(SEARCH_TASKS_PATH, json=data, headers=TEST_HEADERS) diff --git a/annotation/tests/test_tasks_crud_ud.py b/annotation/tests/test_tasks_crud_ud.py index fb811bfd6..e94c40465 100644 --- a/annotation/tests/test_tasks_crud_ud.py +++ b/annotation/tests/test_tasks_crud_ud.py @@ -2,9 +2,9 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from app.annotations import row_to_dict -from app.models import Category, File, Job, ManualAnnotationTask, User -from app.schemas import ( +from annotation.annotations import row_to_dict +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import ( CategoryTypeSchema, TaskStatusEnumSchema, ValidationSchema, diff --git a/annotation/tests/test_update_job.py b/annotation/tests/test_update_job.py index 27ebb8e66..2761f3284 100644 --- a/annotation/tests/test_update_job.py +++ b/annotation/tests/test_update_job.py @@ -7,8 +7,8 @@ from sqlalchemy import asc from sqlalchemy.exc import SQLAlchemyError -from app.annotations import row_to_dict -from app.models import ( +from annotation.annotations import row_to_dict +from annotation.models import ( Category, File, Job, @@ -18,7 +18,7 @@ association_job_owner, association_job_validator, ) -from app.schemas import ( +from annotation.schemas import ( CategoryTypeSchema, FileStatusEnumSchema, JobStatusEnumSchema, @@ -243,7 +243,7 @@ @mark.integration -@patch("app.jobs.resources.get_job", side_effect=SQLAlchemyError) +@patch("annotation.jobs.resources.get_job", side_effect=SQLAlchemyError) def test_update_job_connection_exception(prepare_db_for_update_job): """Tests error handling for SQLAlchemy errors.""" response = client.patch( @@ -405,7 +405,7 @@ def test_update_files( expected_result = new_files new_ids = [new_id["file_id"] for new_id in new_files] monkeypatch.setattr( - "app.jobs.services.get_files_info", + "annotation.jobs.services.get_files_info", Mock(return_value=new_files), ) response = client.patch( @@ -564,7 +564,7 @@ def test_update_user_constraints( was added into database). """ monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value={job_id: "JobName"}), ) response = client.patch( @@ -588,7 +588,7 @@ def test_update_files_and_datasets_for_already_started_job( "files and datasets can't be updated for already started job" ) monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value={UPDATE_JOB_IDS[5]: "JobName"}), ) response = client.patch( @@ -639,7 +639,7 @@ def test_update_extraction_job_new_user( ) assert existing_users_count == 1 monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value={job_id: "JobName"}), ) response = client.patch( @@ -725,7 +725,7 @@ def test_update_jobs_name_from_db_or_microservice( """ session = prepare_db_for_update_job monkeypatch.setattr( - "app.jobs.services.get_job_names", + "annotation.jobs.services.get_job_names", Mock(return_value={3: "JobName3"}), ) response = client.patch( diff --git a/annotation/tests/test_validation.py b/annotation/tests/test_validation.py index 610dc7746..0340b0df5 100644 --- a/annotation/tests/test_validation.py +++ b/annotation/tests/test_validation.py @@ -6,15 +6,15 @@ from fastapi.testclient import TestClient from sqlalchemy import or_ -from app.annotations import row_to_dict -from app.models import AnnotatedDoc, File, Job, ManualAnnotationTask, User -from app.schemas import ( +from annotation.annotations import row_to_dict +from annotation.models import AnnotatedDoc, File, Job, ManualAnnotationTask, User +from annotation.schemas import ( AnnotationAndValidationActionsSchema, FileStatusEnumSchema, TaskStatusEnumSchema, ValidationSchema, ) -from app.tasks.validation import ( +from annotation.tasks.validation import ( _find_annotators_for_failed_pages, check_user_job_action, check_user_job_belonging, diff --git a/dev_runner/README.md b/dev_runner/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/dev_runner/collect_requirements.sh b/dev_runner/collect_requirements.sh new file mode 100755 index 000000000..e46894716 --- /dev/null +++ b/dev_runner/collect_requirements.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +set -e -x + +TMP_REQUIREMENTS_FILE=$(mktemp) +ROOT_DIR=$(git rev-parse --show-toplevel) +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +POETRY_SERVICES=(search annotation convert models processing taxonomy) +PIPENV_SERVICES=(assets) +PIP_SERVICES=(jobs pipelines scheduler users) +DUPLICATED_DEPENDENCIES=(starlette fastapi aiohttp sqlalchemy_utils sqlalchemy aiosignal alembic anyio asgiref attrs boto3 botocore cachetools certifi cffi charset-normalizer click colorama cryptography h11 idna importlib-metadata importlib-resources jmespath mako markupsafe pillow psycopg2-binary pycparser pydantic pyjwt python-dotenv pyyaml requests s3transfer setuptools typing-extensions urllib3 uvicorn yarl zipp frozenlist minio multidict sniffio aiokafka websocket-client) + +collect_poetry_dependencies() { + for poetry_service in "${POETRY_SERVICES[@]}"; do + cd "$ROOT_DIR/$poetry_service" || exit + # ensure that you have https://pypi.org/project/poetry-plugin-export/ installed + poetry export -f requirements.txt --without-hashes | cut -d \; -f 1 >> "$TMP_REQUIREMENTS_FILE" + done +} + +collect_pipenv_dependencies() { + for pipenv_service in "${PIPENV_SERVICES[@]}"; do + cd "$ROOT_DIR/$pipenv_service" || exit + pipenv requirements | tail -n +2 | cut -d \; -f 1 >> "$TMP_REQUIREMENTS_FILE" + done +} + +collect_pip_dependencies() { + for pip_service in "${PIP_SERVICES[@]}"; do + cd "$ROOT_DIR/$pip_service" || exit + if [ -f requirements.txt ]; then + cat requirements.txt | cut -d \; -f 1 >> "$TMP_REQUIREMENTS_FILE" + fi + done +} + +collect_poetry_dependencies +collect_pipenv_dependencies +collect_pip_dependencies +cd "$SCRIPT_DIR" || exit +requirementes=$(cat "$TMP_REQUIREMENTS_FILE") +for dependency in "${DUPLICATED_DEPENDENCIES[@]}"; do + requirementes=$(echo "$requirementes" | grep -v "$dependency") +done +echo $requirementes | xargs poetry -v add + +for dependency in "${DUPLICATED_DEPENDENCIES[@]}"; do + poetry add "$dependency"=="*" +done + +poetry add ../lib/tenants ../lib/filter_lib python-magic-bin diff --git a/dev_runner/conf/shared.env b/dev_runner/conf/shared.env new file mode 100644 index 000000000..e3c7cb01f --- /dev/null +++ b/dev_runner/conf/shared.env @@ -0,0 +1,85 @@ +POSTGRES_HOST=postgres +DB_HOST=${POSTGRES_HOST} +POSTGRES_PORT=5432 +DB_PORT=${POSTGRES_PORT} +POSTGRES_USER=postgres +DB_USERNAME=${POSTGRES_USER} +POSTGRES_PASSWORD=postgres +DB_PASSWORD=${POSTGRES_PASSWORD} + +S3_ENDPOINT_URL=http://localhost +MINIO_HOST=${S3_ENDPOINT_URL} +MINIO_PUBLIC_HOST=${MINIO_HOST} +MINIO_URI=localhost:9000 +MINIO_SERVER=${MINIO_URI} +S3_LOGIN=minioadmin +S3_PASS=minioadmin +MINIO_ACCESS_KEY=${S3_LOGIN} +MINIO_SECRET_KEY=${S3_PASS} +MINIO_ROOT_USER=${MINIO_ACCESS_KEY} +MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY} + +BD_URL=http://localhost + +ANNOTATION_PORT=8000 +ASSETS_PORT=8001 +CONVERT_PORT=8002 +JOBS_PORT=8003 +MODELS_PORT=8004 +PIPELINES_PORT=8005 +PROCESSING_PORT=8006 +SCHEDULER_PORT=8007 +SEARCH_PORT=8008 +TAXONOMY_PORT=8009 +USERS_PORT=8010 + +ASSETS_URL=http://localhost/datasets +ASSETS_URI=${ASSETS_URL} +HOST_ASSETS=${ASSETS_URL} +ASSETS_FILES_URL=http://localhost/files/search +ASSETS_SERVICE_URL=${DB_URL}:${ASSETS_PORT}/files/ +ANNOTATION_URL=${DB_URL}:${ANNOTATION_PORT} +ANNOTATION_SERVICE_URL=CATEGORY_SERVICE_URL=${DB_URL}:${ANOTATION_PORT} # !!!! +ANNOTATION_MICROSERVICE_URI=${ANNOTATION_SERVICE_URL} +CATEGORY_SERVICE_URL=${ANNOTATION_SERVICE_URL}/categories/ + +CONVERT_URL=${DB_URL}:${CONVERT_PORT} +CONVERT_EXPORT_URL=${CONVERT_URL}/export + +JOBS_URL=${DB_URL}:${JOBS_PORT} +JOB_SERVICE_URL=${DB_URL}:${JOBS_PORT}/jobs/ + +MODELS_URL=${DB_URL}:${MODELS_PORT} +MODELS_URI=${MODELS_URL} +HOST_MODELS=${MODELS_URL} +MODELS_SEARCH_ENDPOINT=${MODELS_URL}/search + +PIPELINES_URL=${DB_URL}:${PIPELINES_PORT} +PIPELINES_URI=${PIPELINES_URL} +HOST_PIPELINES=${PIPELINES_URL} + +PROCESSING_URL=${DB_URL}:${PROCESSING_PORT} +PROCESSING_URI=${PROCESSING_URL} +TAXONOMY_URL=${DB_URL}:${TAXONOMY_PORT} + +JOBS_SEARCH_URL=${JOB_SERVICE_URL}/search + +KAFKA_BOOTSTRAP_SERVER=localhost:9092 +KAFKA_BOOTSTRAP_SERVERS=${KAFKA_BOOTSTRAP_SERVER} +KAFKA_SEARCH_TOPIC=search + +KEYCLOAK_URL=http://localhost +KEYCLOAK_URI=${KEYCLOAK_URL} + +GOTENBERG=gotenberg:3000 +GOTENBERG_LIBRE_OFFICE_ENDPOINT="http://${GOTENBERG}/forms/libreoffice/convert" +GOTENBERG_FORMATS=[".txt",".docx",".doc",".bib",".xml",".fodt",".html",".ltx",".odt",".ott",".pdb",".psw",".rtf",".sdw",".stw",".sxw",".uot",".vor",".wps",".epub",".emf",".fodg",".met",".odd",".otg",".std",".svg",".svm",".swf",".sxd",".sxw",".tiff",".xhtml",".xpm",".fodp",".potm",".pot",".pptx",".pps",".ppt",".pwp",".sda",".sdd",".sti",".sxi",".uop",".wmf",".odp"] +IMAGE_FORMATS=[".png",".bmp", ".pbm", ".pct", ".pgm", ".ppm", ".ras", ".tiff"] + +ROOT_PATH= +LOG_LEVEL=DEBUG + +ES_HOST_TEST=localhost +ES_HOST=${ES_HOST_TEST +ES_PORT_TEST=9200 +ES_PORT=${ES_PORT_TEST} diff --git a/dev_runner/dev_runner/__init__.py b/dev_runner/dev_runner/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dev_runner/dev_runner/conf.py b/dev_runner/dev_runner/conf.py new file mode 100644 index 000000000..7db17fc22 --- /dev/null +++ b/dev_runner/dev_runner/conf.py @@ -0,0 +1,22 @@ +import os +from pydantic import BaseSettings + + +BASE_PORT = os.environ.get("BD_BASE_PORT", 8000) + + +class RunnersSettings(BaseSettings): + ANNOTATION_PORT: int = BASE_PORT + 0 + ASSETS_PORT: int = BASE_PORT + 1 + CONVERT_PORT: int = BASE_PORT + 2 + JOBS_PORT: int = BASE_PORT + 3 + MODELS_PORT: int = BASE_PORT + 4 + PIPELINES_PORT: int = BASE_PORT + 5 + PROCESSING_PORT: int = BASE_PORT + 6 + SCHEDULER_PORT: int = BASE_PORT + 7 + SEARCH_PORT: int = BASE_PORT + 8 + TAXONOMY_PORT: int = BASE_PORT + 9 + USERS_PORT: int = BASE_PORT + 10 + + +settings = RunnersSettings() diff --git a/dev_runner/dev_runner/runners/__init__.py b/dev_runner/dev_runner/runners/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dev_runner/dev_runner/runners/annotation_runner.py b/dev_runner/dev_runner/runners/annotation_runner.py new file mode 100644 index 000000000..171f87180 --- /dev/null +++ b/dev_runner/dev_runner/runners/annotation_runner.py @@ -0,0 +1,11 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class AnnotationRunner(BaseRunner): + PACKAGE_NAME = "annotation" + APP_NAME = "annotation" + PORT = settings.ANNOTATION_PORT + DB_CREDENTIALS = { + "POSTGRES_DB": "annotation", + } \ No newline at end of file diff --git a/dev_runner/dev_runner/runners/assets_runner.py b/dev_runner/dev_runner/runners/assets_runner.py new file mode 100644 index 000000000..7a84369d4 --- /dev/null +++ b/dev_runner/dev_runner/runners/assets_runner.py @@ -0,0 +1,23 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class AssetsRunner(BaseRunner): + PACKAGE_NAME = "assets" + PORT = settings.ASSETS_PORT + APP_NAME = "assets" + DB_CREDENTIALS = { + "POSTGRES_DB": "file_management" + } + ENVIRONMENT = { + "APP_NAME": "assets", + "UPLOADING_LIMIT": "100", + "WIDTH": "450", + "BBOX_EXT": "20", + "ROOT_PATH": "", + "LOG_FILE": "False", + "S3_PREFIX": "", + "TEST_REGION": "us-west-2", + "MINIO_SECURE_CONNECTION": "False", + "SQLACLHEMY_POOL_SIZE": "DEBUG", + } diff --git a/dev_runner/dev_runner/runners/base_runner.py b/dev_runner/dev_runner/runners/base_runner.py new file mode 100644 index 000000000..bec12fa23 --- /dev/null +++ b/dev_runner/dev_runner/runners/base_runner.py @@ -0,0 +1,120 @@ +import asyncio +from pathlib import Path +import os +import sys +from importlib import import_module +from uvicorn.config import Config +from uvicorn.server import Server +from uvicorn.supervisors import ChangeReload, Multiprocess +import logging + +ROOT_PATH = Path(__file__).parent.parent.parent.parent + + +class RunnerRegistry(type): + RUNNERS: dict[str, type] + + def __new__(mcs, name, bases, attrs): + new_class = super().__new__(mcs, name, bases, attrs) + if not hasattr(mcs, "RUNNERS"): + mcs.RUNNERS = {} + elif new_class.__name__ != "BaseRunner": + mcs.RUNNERS[new_class.PACKAGE_NAME] = new_class + return new_class + + @classmethod + def get_runners(mcs) -> dict[str, type]: + return mcs.RUNNERS + + @classmethod + async def run(mcs, services: tuple[str]): + if not services: + services = mcs.get_runners().keys() + runners: [BaseRunner] = [] + for runner in mcs.get_runners().values(): + if runner.IS_ACTIVE and runner.PACKAGE_NAME in services: + service = runner().run_app_async() + service.__name__ = runner.PACKAGE_NAME + runners.append(service) + done, pending = await asyncio.wait([service for service in runners], return_when=asyncio.FIRST_COMPLETED) + for task in pending: + task.cancel() + + +class BaseRunner(metaclass=RunnerRegistry): + PACKAGE_NAME: str + APP_NAME: str = "app" + MODULE_NAME: str = "main" + PORT: int + HOST: str = "localhost" + DB_CREDENTIALS: dict = {} + ENVIRONMENT: dict = {} + IS_ACTIVE: bool = True + + def __init__(self, *args, **kwargs): + for attr in ["PACKAGE_NAME", "PORT"]: + if not hasattr(self, attr): + raise NotImplementedError(f"{attr} is not set") + super().__init__(*args, **kwargs) + + def run(self): + self.setup_env() + self.run_app() + + @staticmethod + def _default_db_credentials() -> dict[str, str]: + return { + "POSTGRES_USER": "postgres", + "POSTGRES_PASSWORD": "postgres", + "POSTGRES_HOST": "localhost", + "POSTGRES_PORT": "5432", + "POSTGRES_DB": "postgres" + } + + @staticmethod + def _default_environment() -> dict[str, str]: + return { + "ANNOTATION_NO_AUTH": "True", + } + + def setup_env(self): + db_credentials = self._default_db_credentials() + db_credentials.update(self.DB_CREDENTIALS) + environment = self._default_environment() + environment.update(self.ENVIRONMENT) + os.environ.update(environment) + os.environ.update(db_credentials) + + def create_server(self): + logging.debug(f"[{self.__class__.__name__}]Starting {self.PACKAGE_NAME} on port {self.PORT}") + self.setup_env() + package_path = str(ROOT_PATH / self.PACKAGE_NAME) + sys.path.append(package_path) + try: + module = import_module(f"{self.APP_NAME}.{self.MODULE_NAME}") + app = module.app + print(f"[{self.__class__.__name__}]: Module {module} is imported") + except ModuleNotFoundError as e: + logging.error(f"[{self.__class__.__name__}]: Module {self.APP_NAME}.{self.MODULE_NAME} not found") + raise e + sys.path.remove(package_path) + + config = Config(app, host=self.HOST, port=self.PORT, reload=True) # TODO: check additional folders for reloading + server = Server(config=config) + + if config.should_reload: + sock = config.bind_socket() + ChangeReload(config, target=server.run, sockets=[sock]).run() + elif config.workers > 1: + sock = config.bind_socket() + Multiprocess(config, target=server.run, sockets=[sock]).run() + else: + return server + if config.uds: + os.remove(config.uds) + + def run_app(self): + self.create_server().run() + + async def run_app_async(self): + await self.create_server().serve() diff --git a/dev_runner/dev_runner/runners/convert_runner.py b/dev_runner/dev_runner/runners/convert_runner.py new file mode 100644 index 000000000..9c5b4979f --- /dev/null +++ b/dev_runner/dev_runner/runners/convert_runner.py @@ -0,0 +1,11 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class ConvertRunner(BaseRunner): + PACKAGE_NAME = "convert" + PORT = settings.CONVERT_PORT + APP_NAME = "convert" + ENVIRONMENT = { + "IMPORT_COCO_URL": "http://0.0.0.0:8080/converter/import/" + } diff --git a/dev_runner/dev_runner/runners/jobs_runner.py b/dev_runner/dev_runner/runners/jobs_runner.py new file mode 100644 index 000000000..d1a845cfd --- /dev/null +++ b/dev_runner/dev_runner/runners/jobs_runner.py @@ -0,0 +1,11 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class JobsRunner(BaseRunner): + PACKAGE_NAME = "jobs" + PORT = settings.JOBS_PORT + APP_NAME = "jobs" + ENVIRONMENT = { + "POSTGRESQL_JOBMANAGER_DATABASE_URI": "postgresql+psycopg2://postgres:postgres@localhost:5432/job_manager" + } diff --git a/dev_runner/dev_runner/runners/models_runner.py b/dev_runner/dev_runner/runners/models_runner.py new file mode 100644 index 000000000..9ba6ca029 --- /dev/null +++ b/dev_runner/dev_runner/runners/models_runner.py @@ -0,0 +1,19 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class ModelsRunner(BaseRunner): + PACKAGE_NAME = "models" + PORT = settings.MODELS_PORT + APP_NAME = "models" + DB_CREDENTIALS = { + "POSTGRES_DB": "models", + } + ENVIRONMENT = { + "DATABASE_URL": "postgresql+psycopg2://postgres:postgres@localhost:5432/models", + "MODELS_NAMESPACE": "dev2", + "DOMAIN_NAME": "localhost", + "ALGORITHM": "RS256", + "SECRET": "some_secret_key", + "DOCKER_REGISTRY_URL": "localhost:5000", + } diff --git a/dev_runner/dev_runner/runners/pipelines_runner.py b/dev_runner/dev_runner/runners/pipelines_runner.py new file mode 100644 index 000000000..73a29a32d --- /dev/null +++ b/dev_runner/dev_runner/runners/pipelines_runner.py @@ -0,0 +1,21 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class PipelinesRunner(BaseRunner): + PACKAGE_NAME = "pipelines" + PORT = settings.PIPELINES_PORT + APP_NAME = "pipelines" + MODULE_NAME = "app" + DB_CREDENTIALS = { + "POSTGRES_DB": "pipelines", + } + ENVIRONMENT = { + "HEARTBEAT_TIMEOUT": "15", + "HEARTBEAT_THRESHOLD_MUL": "10", + "RUNNER_TIMEOUT": "5", + "MAX_WORKERS": "20", + "DEBUG_MERGE": "True", + "SA_POOL_SIZE": "40", + "LOG_LEVEL": "DEBUG", + } diff --git a/dev_runner/dev_runner/runners/processing_runner.py b/dev_runner/dev_runner/runners/processing_runner.py new file mode 100644 index 000000000..498f6432b --- /dev/null +++ b/dev_runner/dev_runner/runners/processing_runner.py @@ -0,0 +1,21 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings +import logging + + +class ProcessingRunner(BaseRunner): + PACKAGE_NAME = "processing" + PORT = settings.PROCESSING_PORT + APP_NAME = "processing" + DB_CREDENTIALS = { + "POSTGRES_DB": "processing", + } + ENVIRONMENT = { + "POSTGRES_DB": "processing", + "MODELS_POSTFIX": "", + "LOCAL_RUN": "1", + "SERVICE_NAME": "processing", + "HOST": "localhost", + "PORT": str(PORT), + "LOG_LEVEL": "10", + } diff --git a/dev_runner/dev_runner/runners/scheduler_runner.py b/dev_runner/dev_runner/runners/scheduler_runner.py new file mode 100644 index 000000000..8e4c62360 --- /dev/null +++ b/dev_runner/dev_runner/runners/scheduler_runner.py @@ -0,0 +1,26 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class SchedulerRunner(BaseRunner): + PACKAGE_NAME = "scheduler" + PORT = settings.SCHEDULER_PORT + APP_NAME = "scheduler" + MODULE_NAME = "app" + DB_CREDENTIALS = { + "POSTGRES_DB": "scheduler" + } + ENVIRONMENT = { + "DB_NAME": "scheduler", + "DB_URL": "postgresql+psycopg2://postgres:postgres@localhost:5432/scheduler", + "TEST_MODE": "False", + "SA_POOL_SIZE": "10", + "KAFKA_BOOTSTRAP_SERVER": "localhost:9092", + "KAFKA_GROUP_ID": "scheduler_group", + "KAFKA_CONSUME_TOPICS": "pipelines", + "KAFKA_TOPICS_PARTITIONS": "1", + "KAFKA_REPLICATION_FACTORS": "1", + "HEARTBEAT_TIMEOUT": "10", + "THRESHOLD_MUL": "3", + "LOG_LEVEL": "DEBUG", + } diff --git a/dev_runner/dev_runner/runners/search_runner.py b/dev_runner/dev_runner/runners/search_runner.py new file mode 100644 index 000000000..adabe94ca --- /dev/null +++ b/dev_runner/dev_runner/runners/search_runner.py @@ -0,0 +1,24 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class SearchRunner(BaseRunner): + PACKAGE_NAME = "search" + PORT = settings.SEARCH_PORT + APP_NAME = "search" + ENVIRONMENT = { + "S3_START_PATH": "annotation", + "APP_TITLE": "Badgerdoc Search", + "JOBS_SEARCH": "/jobs/search", + "ANNOTATION_CATEGORIES": "/categories", + "ANNOTATION_CATEGORIES_SEARCH": "/categories/search", + "MANIFEST": "manifest.json", + "TEXT_PIECES_PATH": "/pieces", + "INDEXATION_PATH": "indexation", + "COMPUTED_FIELDS": '["job_id", "category"]', + "JWT_ALGORITHM": "RS256", + "KAFKA_GROUP_ID": "search_group", + "KAFKA_SEARCH_TOPIC": "search", + "KAFKA_SEARCH_TOPIC_PARTITIONS": "50", + "KAFKA_SEARCH_REPLICATION_FACTOR": "1", + } diff --git a/dev_runner/dev_runner/runners/taxonomy_runner.py b/dev_runner/dev_runner/runners/taxonomy_runner.py new file mode 100644 index 000000000..2de80283a --- /dev/null +++ b/dev_runner/dev_runner/runners/taxonomy_runner.py @@ -0,0 +1,15 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class TaxonomyRunner(BaseRunner): + PACKAGE_NAME = "taxonomy" + PORT = settings.TAXONOMY_PORT + APP_NAME = "taxonomy" + DB_CREDENTIALS = { + "POSTGRES_DB": "taxonomy", + } + ENVIRONMENT = { + "APP_HOST": "localhost", + "APP_PORT": str(PORT), + } diff --git a/dev_runner/dev_runner/runners/users_runner.py b/dev_runner/dev_runner/runners/users_runner.py new file mode 100644 index 000000000..4eae4f4f0 --- /dev/null +++ b/dev_runner/dev_runner/runners/users_runner.py @@ -0,0 +1,25 @@ +from .base_runner import BaseRunner +from dev_runner.conf import settings + + +class UsersRunner(BaseRunner): + PACKAGE_NAME = "users" + PORT = settings.USERS_PORT + APP_NAME = "users" + DB_CREDENTIALS = { + "POSTGRES_DB": "keycloak_db", + } + ENVIRONMENT = { + "DB_VENDOR": "POSTGRES", + "DB_ADDR": "postgres", + "DB_DATABASE": "keycloak_db", + "DB_USER": "postgres", + "DB_PASSWORD": "postgres", + "POSTGRES_HOST": "postgres", + "KEYCLOAK_USER": "user", + "KEYCLOAK_PASSWORD": "secretpassword", + "KEYCLOAK_ENDPOINT": "http://localhost/", + "KEYCLOAK_REALM": "master", + "KEYCLOAK_ROLE_ADMIN": "admin", + "KEYCLOAK_USERS_PUBLIC_KEY": "", + } \ No newline at end of file diff --git a/dev_runner/docker-compose.yml b/dev_runner/docker-compose.yml new file mode 100644 index 000000000..f7478f6fe --- /dev/null +++ b/dev_runner/docker-compose.yml @@ -0,0 +1,117 @@ +version: "3.9" + + +services: + postgres-postgresql: + image: postgres:13.4 + volumes: + - ./pg-init-scripts:/docker-entrypoint-initdb.d + - pgdata:/var/lib/postgresql/data + container_name: postgres + networks: + - bd + environment: + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=postgres + - POSTGRES_MULTIPLE_DATABASES=annotation,file_management,job_manager,models,pipelines,processing,scheduler,taxonomy,keycloak_db + ports: + - "5432:5432" + restart: always + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 + + minio: + image: 'bitnami/minio:latest' + ports: + - '9000:9000' + - '9001:9001' + networks: + - bd + environment: + - MINIO_ROOT_USER=minioadmin + - MINIO_ROOT_PASSWORD=minioadmin + + gotenberg: + image: gotenberg/gotenberg:7 + ports: + - "3000:3000" + networks: + - bd + env_file: + - goten.env + + zookeeper: + image: wurstmeister/zookeeper + container_name: zookeeper + networks: + - bd + ports: + - "2181:2181" + environment: + - ALLOW_ANONYMOUS_LOGIN=yes + + kafka: + image: wurstmeister/kafka + container_name: kafka + networks: + - bd + ports: + - "9092:9092" + environment: + KAFKA_ADVERTISED_HOST_NAME: localhost + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 +# KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'false' + depends_on: + - zookeeper + + keycloak: + image: jboss/keycloak + container_name: keycloak + networks: + - bd + ports: + - "8080:8080" + - "8443:8443" + environment: + - KEYCLOAK_USER=admin + - KEYCLOAK_PASSWORD=admin + - DB_VENDOR=POSTGRES + - DB_ADDR=postgres-postgresql + - DB_DATABASE=keycloak_db + - DB_USER=postgres + - DB_PASSWORD=postgres + depends_on: + - postgres-postgresql + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.13.2 + container_name: elastic + networks: + - bd + environment: + - xpack.security.enabled=false + - discovery.type=single-node + - ES_JAVA_OPTS=-Xmx300m + restart: always + deploy: + resources: + limits: + memory: 512m + healthcheck: + test: curl --fail http://localhost:9200 || exit 1 + interval: 30s + timeout: 3s + retries: 10 + start_period: 30s + ports: + - "9200:9200" + +volumes: + pgdata: + +networks: + bd: + driver: bridge diff --git a/dev_runner/goten.env b/dev_runner/goten.env new file mode 100644 index 000000000..70bc115b2 --- /dev/null +++ b/dev_runner/goten.env @@ -0,0 +1,33 @@ +# Globals +APP_NAME="assets" +UPLOADING_LIMIT=100 +WIDTH=450 +BBOX_EXT=20 +ROOT_PATH= +LOG_FILE=false + +POSTGRES_USER=admin +POSTGRES_PASSWORD=admin +POSTGRES_DB=file_management +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 +DATABASE_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}" + +S3_PREFIX= +S3_ENDPOINT=minio:9000 +S3_ACCESS_KEY=minioadmin +S3_SECRET_KEY =minioadmin +TEST_REGION=us-west-2 +MINIO_SECURE_CONNECTION=False + +SQLACLHEMY_POOL_SIZE=20 + +KEYCLOAK_URI=http://bagerdoc-keycloack + +GOTENBERG=gotenberg:3000 + +GOTENBERG_LIBRE_OFFICE_ENDPOINT="http://${GOTENBERG}/forms/libreoffice/convert" + +GOTENBERG_FORMATS=[".txt",".docx",".doc",".bib",".xml",".fodt",".html",".ltx",".odt",".ott",".pdb",".psw",".rtf",".sdw",".stw",".sxw",".uot",".vor",".wps",".epub",".emf",".fodg",".met",".odd",".otg",".std",".svg",".svm",".swf",".sxd",".sxw",".tiff",".xhtml",".xpm",".fodp",".potm",".pot",".pptx",".pps",".ppt",".pwp",".sda",".sdd",".sti",".sxi",".uop",".wmf",".odp"] + +IMAGE_FORMATS=[".png",".bmp", ".pbm", ".pct", ".pgm", ".ppm", ".ras", ".tiff"] diff --git a/dev_runner/pg-init-scripts/create-multiple-postgresql-databases.sh b/dev_runner/pg-init-scripts/create-multiple-postgresql-databases.sh new file mode 100755 index 000000000..aa665fa46 --- /dev/null +++ b/dev_runner/pg-init-scripts/create-multiple-postgresql-databases.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -e +set -u + +function create_user_and_database() { + local database=$1 + echo " Creating user and database '$database'" + psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL + CREATE USER $database; + CREATE DATABASE $database; + GRANT ALL PRIVILEGES ON DATABASE $database TO $database; +EOSQL +} + +if [ -n "$POSTGRES_MULTIPLE_DATABASES" ]; then + echo "Multiple database creation requested: $POSTGRES_MULTIPLE_DATABASES" + for db in $(echo $POSTGRES_MULTIPLE_DATABASES | tr ',' ' '); do + create_user_and_database $db + done + echo "Multiple databases created" +fi diff --git a/dev_runner/pyproject.toml b/dev_runner/pyproject.toml new file mode 100644 index 000000000..2a6c08993 --- /dev/null +++ b/dev_runner/pyproject.toml @@ -0,0 +1,106 @@ +[tool.poetry] +name = "dev-runner" +version = "0.1.0" +description = "run badgerdoc services locally" +authors = ["Your Name "] +readme = "README.md" +packages = [{include = "dev_runner"}] + +[tool.poetry.dependencies] +python = "^3.9" +aiocache = "0.11.1" +async-timeout = "4.0.2" +elasticsearch = {version = "7.13.4", extras = ["async"]} +kafka-python = "2.0.2" +python-dateutil = "2.8.2" +six = "1.16.0" +chardet = "4.0.0" +mypy-extensions = "0.4.4" +pdfminer-six = "20200517" +pdfplumber = "0.5.28" +pycryptodome = "3.17" +pymupdf-fonts = "1.0.5" +pymupdf = "1.21.1" +sortedcontainers = "2.4.0" +types-awscrt = "0.16.4" +wand = "0.6.11" +bcrypt = "4.0.1" +google-auth = "2.15.0" +kubernetes = "19.15.0" +oauthlib = "3.2.2" +paramiko = "2.12.0" +pyasn1-modules = "0.2.8" +pyasn1 = "0.4.8" +pynacl = "1.5.0" +python-multipart = "0.0.5" +rsa = "4.9" +async-cache = "1.1.1" +numpy = "1.24.1" +httpcore = "0.16.2" +httpx = "0.23.1" +exceptiongroup = "1.0.4" +iniconfig = "1.1.1" +packaging = "22.0" +pdf2image = "1.16.0" +pluggy = "1.0.0" +pytest = "7.2.0" +python-magic = "0.4.25" +tomli = "2.0.1" +sqlalchemy-utils = "*" +pyjwt = "*" +email-validator = "1.1.3" +apscheduler = "3.9.1" +starlette = "*" +fastapi = "*" +aiohttp = "*" +sqlalchemy = "*" +aiosignal = "*" +alembic = "*" +anyio = "*" +asgiref = "*" +attrs = "*" +boto3 = "*" +botocore = "*" +cachetools = "*" +certifi = "*" +cffi = "*" +charset-normalizer = "*" +click = "*" +colorama = "*" +cryptography = "*" +h11 = "*" +idna = "*" +importlib-metadata = "*" +importlib-resources = "*" +jmespath = "*" +mako = "*" +markupsafe = "*" +pillow = "*" +psycopg2-binary = "*" +pycparser = "*" +pydantic = "*" +python-dotenv = "*" +pyyaml = "*" +requests = "*" +s3transfer = "*" +setuptools = "*" +typing-extensions = "*" +urllib3 = "*" +uvicorn = "*" +yarl = "*" +zipp = "*" +frozenlist = "*" +minio = "*" +multidict = "*" +sniffio = "*" +aiokafka = "*" +websocket-client = "*" +tenant-dependency = {path = "../lib/tenants"} +filter-lib = {path = "../lib/filter_lib"} +python-magic-bin = "^0.4.14" + + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/dev_runner/start.py b/dev_runner/start.py new file mode 100644 index 000000000..657ab3d48 --- /dev/null +++ b/dev_runner/start.py @@ -0,0 +1,38 @@ +import asyncio +from pathlib import Path + +import click +from dotenv import load_dotenv + +from dev_runner.runners.base_runner import RunnerRegistry +from dev_runner.runners.annotation_runner import AnnotationRunner +from dev_runner.runners.assets_runner import AssetsRunner +from dev_runner.runners.convert_runner import ConvertRunner +from dev_runner.runners.jobs_runner import JobsRunner +from dev_runner.runners.models_runner import ModelsRunner +from dev_runner.runners.pipelines_runner import PipelinesRunner +from dev_runner.runners.processing_runner import ProcessingRunner +from dev_runner.runners.scheduler_runner import SchedulerRunner +from dev_runner.runners.search_runner import SearchRunner +from dev_runner.runners.taxonomy_runner import TaxonomyRunner +from dev_runner.runners.users_runner import UsersRunner + + +ROOT_DIR = Path(__file__).parent +SHARED_DOT_ENV = ROOT_DIR / "conf" / "shared.env" + + +def _info(message): + click.echo(click.style(message, fg="green")) + + +@click.command() +@click.argument("services", nargs=-1, type=click.Choice(RunnerRegistry.get_runners().keys())) +def cli(services): + _info(f"Starting {services or 'all'} service{'s' if not services or len(services) > 1 else ''}...") + load_dotenv(SHARED_DOT_ENV) + asyncio.run(RunnerRegistry.run(services)) + + +if __name__ == "__main__": + cli() From 917b9f6ef754631eab53a2047aa7defa365299d5 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 13:09:21 +0400 Subject: [PATCH 02/22] feat: rename app for assets --- assets/Dockerfile | 2 +- assets/alembic/env.py | 6 +- ..._add_original_ext_column_to_files_table.py | 2 +- .../versions/9e837ea0c11d_image_pages.py | 2 +- .../versions/afa33cc83d57_new_fields.py | 2 +- .../versions/fe5926249504_count_datasets.py | 2 +- assets/{src => assets}/config.py | 0 assets/assets/db/__init__.py | 2 + assets/{src => assets}/db/models.py | 2 +- assets/{src => assets}/db/service.py | 6 +- assets/{src => assets}/db/utils.py | 0 assets/{src => assets}/exceptions.py | 0 assets/{src => assets}/logger.py | 2 +- assets/{src => assets}/main.py | 4 +- assets/assets/routers/__init__.py | 5 + .../{src => assets}/routers/bonds_router.py | 2 +- .../routers/datasets_router.py | 2 +- .../{src => assets}/routers/files_router.py | 2 +- .../{src => assets}/routers/minio_router.py | 4 +- assets/{src => assets}/routers/s3_router.py | 2 +- assets/{src => assets}/schemas.py | 2 +- assets/assets/utils/__init__.py | 4 + assets/{src => assets}/utils/common_utils.py | 10 +- .../utils/convert_service_utils.py | 4 +- assets/{src => assets}/utils/minio_utils.py | 4 +- assets/{src => assets}/utils/s3_utils.py | 4 +- assets/src/db/__init__.py | 2 - assets/src/routers/__init__.py | 5 - assets/src/utils/__init__.py | 4 - assets/tests/conftest.py | 14 +-- assets/tests/test_helpers.py | 8 +- assets/tests/test_main.py | 14 +-- assets/tests/test_models.py | 2 +- assets/tests/test_utils.py | 110 +++++++++--------- 34 files changed, 118 insertions(+), 118 deletions(-) rename assets/{src => assets}/config.py (100%) create mode 100644 assets/assets/db/__init__.py rename assets/{src => assets}/db/models.py (99%) rename assets/{src => assets}/db/service.py (97%) rename assets/{src => assets}/db/utils.py (100%) rename assets/{src => assets}/exceptions.py (100%) rename assets/{src => assets}/logger.py (97%) rename assets/{src => assets}/main.py (90%) create mode 100644 assets/assets/routers/__init__.py rename assets/{src => assets}/routers/bonds_router.py (99%) rename assets/{src => assets}/routers/datasets_router.py (99%) rename assets/{src => assets}/routers/files_router.py (99%) rename assets/{src => assets}/routers/minio_router.py (98%) rename assets/{src => assets}/routers/s3_router.py (98%) rename assets/{src => assets}/schemas.py (98%) create mode 100644 assets/assets/utils/__init__.py rename assets/{src => assets}/utils/common_utils.py (98%) rename assets/{src => assets}/utils/convert_service_utils.py (93%) rename assets/{src => assets}/utils/minio_utils.py (99%) rename assets/{src => assets}/utils/s3_utils.py (97%) delete mode 100644 assets/src/db/__init__.py delete mode 100644 assets/src/routers/__init__.py delete mode 100644 assets/src/utils/__init__.py diff --git a/assets/Dockerfile b/assets/Dockerfile index a0d53378a..43d370e2b 100644 --- a/assets/Dockerfile +++ b/assets/Dockerfile @@ -14,7 +14,7 @@ RUN apt-get update -y && apt-get install -y poppler-utils \ && pipenv install --system --deploy COPY alembic alembic -COPY src src +COPY assets assets CMD alembic upgrade afa33cc83d57 && alembic upgrade fe5926249504 && alembic upgrade 0f6c859c1d1c && alembic upgrade head && uvicorn src.main:app --host 0.0.0.0 --port 8080 diff --git a/assets/alembic/env.py b/assets/alembic/env.py index 79a2ff191..c8a6cf694 100644 --- a/assets/alembic/env.py +++ b/assets/alembic/env.py @@ -3,8 +3,8 @@ from sqlalchemy import engine_from_config, pool from alembic import context -from src.db.utils import get_test_db_url -from src.config import settings +from assets.db.utils import get_test_db_url +from assets.config import settings # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -23,7 +23,7 @@ # for 'autogenerate' support # from myapp import mymodel # target_metadata = mymodel.Base.metadata -from src.db.models import Base # noqa +from assets.db.models import Base # noqa target_metadata = Base.metadata diff --git a/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py b/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py index dd03ad5ff..e9dd71ea0 100644 --- a/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py +++ b/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py @@ -12,7 +12,7 @@ from sqlalchemy.orm import Session # noqa from alembic import op -from src.db.models import FileObject # noqa +from assets.db.models import FileObject # noqa revision = "0f6c859c1d1c" down_revision = "fe5926249504" diff --git a/assets/alembic/versions/9e837ea0c11d_image_pages.py b/assets/alembic/versions/9e837ea0c11d_image_pages.py index 33c049b57..9dd24bc92 100644 --- a/assets/alembic/versions/9e837ea0c11d_image_pages.py +++ b/assets/alembic/versions/9e837ea0c11d_image_pages.py @@ -8,7 +8,7 @@ from sqlalchemy.orm import Session from alembic import op -from src.db.models import FileObject +from assets.db.models import FileObject # revision identifiers, used by Alembic. diff --git a/assets/alembic/versions/afa33cc83d57_new_fields.py b/assets/alembic/versions/afa33cc83d57_new_fields.py index e04e300c1..68b08323d 100644 --- a/assets/alembic/versions/afa33cc83d57_new_fields.py +++ b/assets/alembic/versions/afa33cc83d57_new_fields.py @@ -8,7 +8,7 @@ import sqlalchemy as sa from alembic import op -from src.db.models import TSVector +from assets.db.models import TSVector # revision identifiers, used by Alembic. revision = "afa33cc83d57" diff --git a/assets/alembic/versions/fe5926249504_count_datasets.py b/assets/alembic/versions/fe5926249504_count_datasets.py index b5e73df8c..e1124f2f4 100644 --- a/assets/alembic/versions/fe5926249504_count_datasets.py +++ b/assets/alembic/versions/fe5926249504_count_datasets.py @@ -9,7 +9,7 @@ from sqlalchemy.orm import Session from alembic import op -from src.db.models import Association, Datasets, FileObject +from assets.db.models import Association, Datasets, FileObject # revision identifiers, used by Alembic. revision = "fe5926249504" diff --git a/assets/src/config.py b/assets/assets/config.py similarity index 100% rename from assets/src/config.py rename to assets/assets/config.py diff --git a/assets/assets/db/__init__.py b/assets/assets/db/__init__.py new file mode 100644 index 000000000..638b59da7 --- /dev/null +++ b/assets/assets/db/__init__.py @@ -0,0 +1,2 @@ +import assets.db.models +import assets.db.service # noqa diff --git a/assets/src/db/models.py b/assets/assets/db/models.py similarity index 99% rename from assets/src/db/models.py rename to assets/assets/db/models.py index 8accd114a..30edb04ad 100644 --- a/assets/src/db/models.py +++ b/assets/assets/db/models.py @@ -9,7 +9,7 @@ from sqlalchemy.orm import relationship, sessionmaker from sqlalchemy.types import TypeDecorator -from src.config import settings +from assets.config import settings Base = declarative_base() engine = sa.create_engine( diff --git a/assets/src/db/service.py b/assets/assets/db/service.py similarity index 97% rename from assets/src/db/service.py rename to assets/assets/db/service.py index 801a9365c..a6620896d 100644 --- a/assets/src/db/service.py +++ b/assets/assets/db/service.py @@ -4,9 +4,9 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Query, Session, load_only, selectinload -from src.db.models import Association, Datasets, FileObject, SessionLocal -from src.logger import get_logger -from src.schemas import FileProcessingStatusForUpdate +from assets.db.models import Association, Datasets, FileObject, SessionLocal +from assets.logger import get_logger +from assets.schemas import FileProcessingStatusForUpdate logger = get_logger(__name__) diff --git a/assets/src/db/utils.py b/assets/assets/db/utils.py similarity index 100% rename from assets/src/db/utils.py rename to assets/assets/db/utils.py diff --git a/assets/src/exceptions.py b/assets/assets/exceptions.py similarity index 100% rename from assets/src/exceptions.py rename to assets/assets/exceptions.py diff --git a/assets/src/logger.py b/assets/assets/logger.py similarity index 97% rename from assets/src/logger.py rename to assets/assets/logger.py index a9c495025..dd0d3da8c 100644 --- a/assets/src/logger.py +++ b/assets/assets/logger.py @@ -1,6 +1,6 @@ import logging -from src.config import settings +from assets.config import settings _log_format = f"%(asctime)s - [%(levelname)s] - %(name)s - (%(filename)s).%(funcName)s(%(lineno)d) - %(message)s" # noqa _datefmt = "%d-%b-%y %H:%M:%S" diff --git a/assets/src/main.py b/assets/assets/main.py similarity index 90% rename from assets/src/main.py rename to assets/assets/main.py index d4d6bd4d0..25897e99c 100644 --- a/assets/src/main.py +++ b/assets/assets/main.py @@ -1,8 +1,8 @@ from fastapi import Depends, FastAPI from tenant_dependency import get_tenant_info -from src import routers -from src.config import settings +from assets import routers +from assets.config import settings tenant = get_tenant_info(url=settings.keycloak_uri, algorithm="RS256") diff --git a/assets/assets/routers/__init__.py b/assets/assets/routers/__init__.py new file mode 100644 index 000000000..33cf1f3ae --- /dev/null +++ b/assets/assets/routers/__init__.py @@ -0,0 +1,5 @@ +import assets.routers.bonds_router +import assets.routers.datasets_router +import assets.routers.files_router +import assets.routers.minio_router +import assets.routers.s3_router # noqa diff --git a/assets/src/routers/bonds_router.py b/assets/assets/routers/bonds_router.py similarity index 99% rename from assets/src/routers/bonds_router.py rename to assets/assets/routers/bonds_router.py index 7bb5bf6af..38bfb6c0c 100644 --- a/assets/src/routers/bonds_router.py +++ b/assets/assets/routers/bonds_router.py @@ -5,7 +5,7 @@ import filter_lib import sqlalchemy.orm -from src import db, schemas, utils +from assets import db, schemas, utils router = fastapi.APIRouter(prefix="/datasets/bonds", tags=["bonds"]) diff --git a/assets/src/routers/datasets_router.py b/assets/assets/routers/datasets_router.py similarity index 99% rename from assets/src/routers/datasets_router.py rename to assets/assets/routers/datasets_router.py index 992336d6b..f6ffa3e89 100644 --- a/assets/src/routers/datasets_router.py +++ b/assets/assets/routers/datasets_router.py @@ -7,7 +7,7 @@ import sqlalchemy.orm import sqlalchemy_filters.exceptions -from src import db, schemas +from assets import db, schemas router = fastapi.APIRouter(prefix="/datasets", tags=["datasets"]) diff --git a/assets/src/routers/files_router.py b/assets/assets/routers/files_router.py similarity index 99% rename from assets/src/routers/files_router.py rename to assets/assets/routers/files_router.py index 7516c4747..67ab315cf 100644 --- a/assets/src/routers/files_router.py +++ b/assets/assets/routers/files_router.py @@ -7,7 +7,7 @@ import sqlalchemy.orm import sqlalchemy_filters.exceptions -from src import db, exceptions, schemas, utils +from assets import db, exceptions, schemas, utils router = fastapi.APIRouter(prefix="/files", tags=["files"]) diff --git a/assets/src/routers/minio_router.py b/assets/assets/routers/minio_router.py similarity index 98% rename from assets/src/routers/minio_router.py rename to assets/assets/routers/minio_router.py index 154613de3..9822a0fe5 100644 --- a/assets/src/routers/minio_router.py +++ b/assets/assets/routers/minio_router.py @@ -5,8 +5,8 @@ import sqlalchemy.orm import urllib3.exceptions -from src import db, schemas, utils -from src.config import settings +from assets import db, schemas, utils +from assets.config import settings router = fastapi.APIRouter(tags=["minio"]) diff --git a/assets/src/routers/s3_router.py b/assets/assets/routers/s3_router.py similarity index 98% rename from assets/src/routers/s3_router.py rename to assets/assets/routers/s3_router.py index 78b7bd947..b145cff21 100644 --- a/assets/src/routers/s3_router.py +++ b/assets/assets/routers/s3_router.py @@ -5,7 +5,7 @@ import sqlalchemy.orm import urllib3.exceptions -from src import db, exceptions, schemas, utils +from assets import db, exceptions, schemas, utils router = fastapi.APIRouter(prefix="/s3_upload", tags=["s_3"]) diff --git a/assets/src/schemas.py b/assets/assets/schemas.py similarity index 98% rename from assets/src/schemas.py rename to assets/assets/schemas.py index c24a805a1..59fb27157 100644 --- a/assets/src/schemas.py +++ b/assets/assets/schemas.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, validator -from src.db.models import Datasets +from assets.db.models import Datasets class MinioObjects(BaseModel): diff --git a/assets/assets/utils/__init__.py b/assets/assets/utils/__init__.py new file mode 100644 index 000000000..dc7556bdf --- /dev/null +++ b/assets/assets/utils/__init__.py @@ -0,0 +1,4 @@ +import assets.utils.common_utils +import assets.utils.convert_service_utils +import assets.utils.minio_utils +import assets.utils.s3_utils # noqa diff --git a/assets/src/utils/common_utils.py b/assets/assets/utils/common_utils.py similarity index 98% rename from assets/src/utils/common_utils.py rename to assets/assets/utils/common_utils.py index 18e47f72f..64cda6323 100644 --- a/assets/src/utils/common_utils.py +++ b/assets/assets/utils/common_utils.py @@ -10,11 +10,11 @@ import sqlalchemy.orm import starlette.datastructures -from src import db, exceptions, logger, schemas -from src.config import settings -from src.utils import minio_utils -from src.utils.convert_service_utils import post_to_convert -from src.utils.minio_utils import create_minio_config +from assets import db, exceptions, logger, schemas +from assets.config import settings +from assets.utils import minio_utils +from assets.utils.convert_service_utils import post_to_convert +from assets.utils.minio_utils import create_minio_config logger_ = logger.get_logger(__name__) diff --git a/assets/src/utils/convert_service_utils.py b/assets/assets/utils/convert_service_utils.py similarity index 93% rename from assets/src/utils/convert_service_utils.py rename to assets/assets/utils/convert_service_utils.py index e31dca9a3..69b0af269 100644 --- a/assets/src/utils/convert_service_utils.py +++ b/assets/assets/utils/convert_service_utils.py @@ -1,7 +1,7 @@ import requests -from src import logger -from src.config import settings +from assets import logger +from assets.config import settings logger_ = logger.get_logger(__name__) diff --git a/assets/src/utils/minio_utils.py b/assets/assets/utils/minio_utils.py similarity index 99% rename from assets/src/utils/minio_utils.py rename to assets/assets/utils/minio_utils.py index 8098c7188..a75a54291 100644 --- a/assets/src/utils/minio_utils.py +++ b/assets/assets/utils/minio_utils.py @@ -8,8 +8,8 @@ import urllib3.exceptions from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider -from src import db, logger -from src.config import settings +from assets import db, logger +from assets.config import settings logger_ = logger.get_logger(__name__) diff --git a/assets/src/utils/s3_utils.py b/assets/assets/utils/s3_utils.py similarity index 97% rename from assets/src/utils/s3_utils.py rename to assets/assets/utils/s3_utils.py index 48583f94f..df5a514ef 100644 --- a/assets/src/utils/s3_utils.py +++ b/assets/assets/utils/s3_utils.py @@ -4,8 +4,8 @@ import boto3 import urllib3.exceptions -from src import exceptions, logger -from src.config import settings +from assets import exceptions, logger +from assets.config import settings logger_ = logger.get_logger(__name__) diff --git a/assets/src/db/__init__.py b/assets/src/db/__init__.py deleted file mode 100644 index bc34dc758..000000000 --- a/assets/src/db/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -import src.db.models -import src.db.service # noqa diff --git a/assets/src/routers/__init__.py b/assets/src/routers/__init__.py deleted file mode 100644 index efdae3e25..000000000 --- a/assets/src/routers/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -import src.routers.bonds_router -import src.routers.datasets_router -import src.routers.files_router -import src.routers.minio_router -import src.routers.s3_router # noqa diff --git a/assets/src/utils/__init__.py b/assets/src/utils/__init__.py deleted file mode 100644 index 9b3f2d2c3..000000000 --- a/assets/src/utils/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -import src.utils.common_utils -import src.utils.convert_service_utils -import src.utils.minio_utils -import src.utils.s3_utils # noqa diff --git a/assets/tests/conftest.py b/assets/tests/conftest.py index 4c6a2950d..8e82fd48d 100644 --- a/assets/tests/conftest.py +++ b/assets/tests/conftest.py @@ -16,15 +16,15 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy_utils import create_database, database_exists -import src.utils.minio_utils as minio_utils +import assets.utils.minio_utils as minio_utils from alembic import command from alembic.config import Config -from src.config import settings -from src.db.models import Base -from src.db.service import session_scope_for_dependency -from src.main import app, tenant -from src.utils.minio_utils import get_storage -from src.db.utils import get_test_db_url +from assets.config import settings +from assets.db.models import Base +from assets.db.service import session_scope_for_dependency +from assets.main import app, tenant +from assets.utils.minio_utils import get_storage +from assets.db.utils import get_test_db_url BUCKET_TESTS = "tests" + uuid.uuid4().hex diff --git a/assets/tests/test_helpers.py b/assets/tests/test_helpers.py index d1849524c..c736c60c2 100644 --- a/assets/tests/test_helpers.py +++ b/assets/tests/test_helpers.py @@ -4,10 +4,10 @@ import pytest from fastapi import HTTPException -from src.db.models import FileObject -from src.db.service import delete_file_from_db, insert_file, update_file_status -from src.schemas import FileProcessingStatus -from src.utils.minio_utils import check_bucket, delete_one_from_minio +from assets.db.models import FileObject +from assets.db.service import delete_file_from_db, insert_file, update_file_status +from assets.schemas import FileProcessingStatus +from assets.utils.minio_utils import check_bucket, delete_one_from_minio @pytest.fixture diff --git a/assets/tests/test_main.py b/assets/tests/test_main.py index 25e461f79..5e69c7dc9 100644 --- a/assets/tests/test_main.py +++ b/assets/tests/test_main.py @@ -23,7 +23,7 @@ def test_bucket_name_on_create_bucket_with_prefix( ): test_prefix = "test_prefix" - from src.config import settings + from assets.config import settings monkeypatch.setattr(target=settings, name="s3_prefix", value=test_prefix) @@ -44,7 +44,7 @@ def test_bucket_name_on_create_bucket_without_prefix( ): test_prefix = None - from src.config import settings + from assets.config import settings monkeypatch.setattr(target=settings, name="s3_prefix", value=test_prefix) @@ -75,8 +75,8 @@ def test_upload_and_delete_file_without_conversion(client_app_main): assert id_ == res.json()[0]["id"] -@patch("src.utils.s3_utils.S3Manager.get_files") -@patch("src.utils.s3_utils.S3Manager.check_s3") +@patch("assets.utils.s3_utils.S3Manager.get_files") +@patch("assets.utils.s3_utils.S3Manager.check_s3") def test_upload_and_delete_file_s3( check_s3, get_files, client_app_main, s3_retrieved_file ): @@ -439,7 +439,7 @@ def test_download_negative(client_app_main): def test_download_positive(client_app_main): - with patch("src.routers.minio_router.fastapi.responses.StreamingResponse"): + with patch("assets.routers.minio_router.fastapi.responses.StreamingResponse"): with NamedTemporaryFile(suffix=".jpg") as file: data = {"files": file} res_upload = client_app_main.post( @@ -455,14 +455,14 @@ def test_download_positive(client_app_main): assert res_download.status_code == 200 -@patch("src.utils.common_utils.requests.post") +@patch("assets.utils.common_utils.requests.post") def test_download_positive_originals( gotenberg, pdf_file_bytes, client_app_main ): response = Response() response._content = pdf_file_bytes gotenberg.return_value = response - with patch("src.routers.minio_router.fastapi.responses.StreamingResponse"): + with patch("assets.routers.minio_router.fastapi.responses.StreamingResponse"): with NamedTemporaryFile(suffix=".doc", prefix="some_file") as file: data = {"files": file} res_upload = client_app_main.post( diff --git a/assets/tests/test_models.py b/assets/tests/test_models.py index c37c7071f..03bbbb925 100644 --- a/assets/tests/test_models.py +++ b/assets/tests/test_models.py @@ -2,7 +2,7 @@ import pytest -from src.db.models import Association, Datasets, FileObject +from assets.db.models import Association, Datasets, FileObject @pytest.fixture diff --git a/assets/tests/test_utils.py b/assets/tests/test_utils.py index 392829008..19cf374c0 100644 --- a/assets/tests/test_utils.py +++ b/assets/tests/test_utils.py @@ -9,23 +9,23 @@ from requests import Response from sqlalchemy.orm import Session -import src.utils.minio_utils as minio_utils -from src.config import settings -from src.db.models import FileObject -from src.exceptions import ( +import assets.utils.minio_utils as minio_utils +from assets.config import settings +from assets.db.models import FileObject +from assets.exceptions import ( BucketError, FileConversionError, FileKeyError, UploadLimitExceedError, ) -from src.schemas import ActionResponse -from src.utils.common_utils import ( +from assets.schemas import ActionResponse +from assets.utils.common_utils import ( FileConverter, FileProcessor, check_uploading_limit, to_obj, ) -from src.utils.s3_utils import S3Manager +from assets.utils.s3_utils import S3Manager ID_ = 12 @@ -82,7 +82,7 @@ def test_file_processor_is_extension_correct_without_extension(): assert mock_instance.is_extension_correct() is False -# @patch("src.utils.common_utils.db.service.insert_file") +# @patch("assets.utils.common_utils.db.service.insert_file") # def test_file_processor_is_inserted_to_database_file_inserted( # insert_file, pdf_file_bytes # ): @@ -99,7 +99,7 @@ def test_file_processor_is_extension_correct_without_extension(): # insert_file.assert_called() -# @patch("src.utils.common_utils.db.service.insert_file") +# @patch("assets.utils.common_utils.db.service.insert_file") # def test_file_processor_is_inserted_to_database_file_not_inserted( # insert_file, pdf_file_bytes # ): @@ -117,7 +117,7 @@ def test_file_processor_is_extension_correct_without_extension(): # insert_file.assert_called() -@patch("src.utils.minio_utils.upload_in_minio") +@patch("assets.utils.minio_utils.upload_in_minio") def test_file_processor_is_uploaded_to_storage_file_uploaded(upload_in_minio): file_processor = FileProcessor( file=BytesIO(), @@ -131,8 +131,8 @@ def test_file_processor_is_uploaded_to_storage_file_uploaded(upload_in_minio): upload_in_minio.assert_called() -@patch("src.utils.common_utils.db.service.update_file_status") -@patch("src.utils.minio_utils.upload_in_minio") +@patch("assets.utils.common_utils.db.service.update_file_status") +@patch("assets.utils.minio_utils.upload_in_minio") def test_file_processor_is_uploaded_to_storage_not_uploaded( upload_in_minio, update_file_status ): @@ -152,7 +152,7 @@ def test_file_processor_is_uploaded_to_storage_not_uploaded( update_file_status.assert_called() -@patch("src.utils.common_utils.db.service.update_file_status") +@patch("assets.utils.common_utils.db.service.update_file_status") def test_file_processor_is_file_updated_status_updated(update_file_status): file_processor = FileProcessor( file=BytesIO(), @@ -168,7 +168,7 @@ def test_file_processor_is_file_updated_status_updated(update_file_status): update_file_status.assert_called() -@patch("src.utils.common_utils.db.service.update_file_status") +@patch("assets.utils.common_utils.db.service.update_file_status") def test_file_processor_is_file_updated_status_not_updated(update_file_status): file_processor = FileProcessor( file=BytesIO(), @@ -184,15 +184,15 @@ def test_file_processor_is_file_updated_status_not_updated(update_file_status): update_file_status.assert_called() -@patch("src.utils.common_utils.FileProcessor.is_file_updated") -@patch("src.utils.common_utils.FileProcessor.is_blank_is_created") +@patch("assets.utils.common_utils.FileProcessor.is_file_updated") +@patch("assets.utils.common_utils.FileProcessor.is_blank_is_created") @patch( - "src.utils.common_utils.FileProcessor.is_original_file_uploaded_to_storage" + "assets.utils.common_utils.FileProcessor.is_original_file_uploaded_to_storage" ) -@patch("src.utils.common_utils.FileProcessor.is_uploaded_to_storage") -@patch("src.utils.common_utils.FileProcessor.is_inserted_to_database") -@patch("src.utils.common_utils.FileProcessor.is_converted_file") -@patch("src.utils.common_utils.FileProcessor.is_extension_correct") +@patch("assets.utils.common_utils.FileProcessor.is_uploaded_to_storage") +@patch("assets.utils.common_utils.FileProcessor.is_inserted_to_database") +@patch("assets.utils.common_utils.FileProcessor.is_converted_file") +@patch("assets.utils.common_utils.FileProcessor.is_extension_correct") def test_file_processor_run_all_stages_passed( is_blank_is_created, is_extension_correct, @@ -228,10 +228,10 @@ def test_file_processor_run_all_stages_passed( is_file_updated.assert_called() -@patch("src.utils.common_utils.FileProcessor.is_file_updated") -@patch("src.utils.common_utils.FileProcessor.is_uploaded_to_storage") -@patch("src.utils.common_utils.FileProcessor.is_inserted_to_database") -@patch("src.utils.common_utils.FileProcessor.is_extension_correct") +@patch("assets.utils.common_utils.FileProcessor.is_file_updated") +@patch("assets.utils.common_utils.FileProcessor.is_uploaded_to_storage") +@patch("assets.utils.common_utils.FileProcessor.is_inserted_to_database") +@patch("assets.utils.common_utils.FileProcessor.is_extension_correct") def test_file_processor_run_extension_check_failed( is_extension_correct, is_inserted_to_database, @@ -258,7 +258,7 @@ def test_file_processor_run_extension_check_failed( is_file_updated.assert_not_called() -@patch("src.utils.common_utils.requests.post") +@patch("assets.utils.common_utils.requests.post") def test_file_processor_is_converted_file_converted(gotenberg, pdf_file_bytes): response = Response() response._content = pdf_file_bytes @@ -274,8 +274,8 @@ def test_file_processor_is_converted_file_converted(gotenberg, pdf_file_bytes): assert file_processor.is_converted_file() -@patch("src.utils.common_utils.get_mimetype") -@patch("src.utils.common_utils.requests.post") +@patch("assets.utils.common_utils.get_mimetype") +@patch("assets.utils.common_utils.requests.post") def test_file_processor_is_converted_file_conversion_error( gotenberg, get_mimetype, pdf_file_bytes ): @@ -296,7 +296,7 @@ def test_file_processor_is_converted_file_conversion_error( assert file_processor.conversion_status == "conversion error" -@patch("src.utils.common_utils.requests.post") +@patch("assets.utils.common_utils.requests.post") def test_file_processor_is_converted_file_conversion_not_in_formats( gotenberg, pdf_file_bytes ): @@ -316,14 +316,14 @@ def test_file_processor_is_converted_file_conversion_not_in_formats( assert file_processor.conversion_status is None -# @patch("src.utils.common_utils.FileProcessor.is_file_updated") +# @patch("assets.utils.common_utils.FileProcessor.is_file_updated") # @patch( -# "src.utils.common_utils.FileProcessor.is_original_file_uploaded_to_storage" +# "assets.utils.common_utils.FileProcessor.is_original_file_uploaded_to_storage" # ) -# @patch("src.utils.common_utils.FileProcessor.is_uploaded_to_storage") -# @patch("src.utils.common_utils.FileProcessor.is_inserted_to_database") -# @patch("src.utils.common_utils.FileProcessor.is_converted_file") -# @patch("src.utils.common_utils.FileProcessor.is_extension_correct") +# @patch("assets.utils.common_utils.FileProcessor.is_uploaded_to_storage") +# @patch("assets.utils.common_utils.FileProcessor.is_inserted_to_database") +# @patch("assets.utils.common_utils.FileProcessor.is_converted_file") +# @patch("assets.utils.common_utils.FileProcessor.is_extension_correct") # def test_file_processor_run_database_insert_failed( # is_extension_correct, # is_converted_file, @@ -356,10 +356,10 @@ def test_file_processor_is_converted_file_conversion_not_in_formats( # is_file_updated.assert_not_called() -# @patch("src.utils.common_utils.FileProcessor.is_file_updated") -# @patch("src.utils.common_utils.FileProcessor.is_uploaded_to_storage") -# @patch("src.utils.common_utils.FileProcessor.is_inserted_to_database") -# @patch("src.utils.common_utils.FileProcessor.is_extension_correct") +# @patch("assets.utils.common_utils.FileProcessor.is_file_updated") +# @patch("assets.utils.common_utils.FileProcessor.is_uploaded_to_storage") +# @patch("assets.utils.common_utils.FileProcessor.is_inserted_to_database") +# @patch("assets.utils.common_utils.FileProcessor.is_extension_correct") # def test_file_processor_run_storage_upload_failed( # is_extension_correct, # is_inserted_to_database, @@ -386,10 +386,10 @@ def test_file_processor_is_converted_file_conversion_not_in_formats( # is_file_updated.assert_not_called() -# @patch("src.utils.common_utils.FileProcessor.is_file_updated") -# @patch("src.utils.common_utils.FileProcessor.is_uploaded_to_storage") -# @patch("src.utils.common_utils.FileProcessor.is_inserted_to_database") -# @patch("src.utils.common_utils.FileProcessor.is_extension_correct") +# @patch("assets.utils.common_utils.FileProcessor.is_file_updated") +# @patch("assets.utils.common_utils.FileProcessor.is_uploaded_to_storage") +# @patch("assets.utils.common_utils.FileProcessor.is_inserted_to_database") +# @patch("assets.utils.common_utils.FileProcessor.is_extension_correct") # def test_file_processor_run_status_update_failed( # is_extension_correct, # is_inserted_to_database, @@ -428,8 +428,8 @@ def test_s3_manager_get_files(): assert file_key in file_keys -@patch("src.utils.s3_utils.S3Manager._check_bucket_exist") -@patch("src.utils.s3_utils.S3Manager._check_files_exist") +@patch("assets.utils.s3_utils.S3Manager._check_bucket_exist") +@patch("assets.utils.s3_utils.S3Manager._check_files_exist") def test_s3_manager_check_s3_buckets_and_files_exist( check_buckets, check_files ): @@ -441,8 +441,8 @@ def test_s3_manager_check_s3_buckets_and_files_exist( check_files.assert_called() -@patch("src.utils.s3_utils.S3Manager._check_bucket_exist") -@patch("src.utils.s3_utils.S3Manager._check_files_exist") +@patch("assets.utils.s3_utils.S3Manager._check_bucket_exist") +@patch("assets.utils.s3_utils.S3Manager._check_files_exist") def test_s3_manager_check_s3_buckets_not_exist(check_files, check_buckets): s3 = S3Manager("a", "b", endpoint_url=None) check_buckets.side_effect = BucketError @@ -453,8 +453,8 @@ def test_s3_manager_check_s3_buckets_not_exist(check_files, check_buckets): check_files.assert_not_called() -@patch("src.utils.s3_utils.S3Manager._check_bucket_exist") -@patch("src.utils.s3_utils.S3Manager._check_files_exist") +@patch("assets.utils.s3_utils.S3Manager._check_bucket_exist") +@patch("assets.utils.s3_utils.S3Manager._check_files_exist") def test_s3_manager_check_s3_file_not_exist(check_files, check_buckets): s3 = S3Manager("a", "b", endpoint_url=None) check_buckets.return_value = None @@ -476,8 +476,8 @@ def test_check_uploading_limit_not_exceed(): assert check_uploading_limit(uploading_list) is None -@patch("src.utils.common_utils.get_mimetype") -@patch("src.utils.common_utils.requests.post") +@patch("assets.utils.common_utils.get_mimetype") +@patch("assets.utils.common_utils.requests.post") def test_file_processor_conversion_error( gotenberg, get_mimetype, pdf_file_bytes ): @@ -494,7 +494,7 @@ def test_file_processor_conversion_error( assert converter.conversion_status == "conversion error" -@patch("src.utils.common_utils.requests.post") +@patch("assets.utils.common_utils.requests.post") def test_file_converted_converted_to_pdf(gotenberg, pdf_file_bytes): response = Response() response._content = pdf_file_bytes @@ -508,8 +508,8 @@ def test_file_converted_converted_to_pdf(gotenberg, pdf_file_bytes): assert converter.conversion_status == "converted to PDF" -@patch("src.utils.common_utils.get_mimetype") -@patch("src.utils.common_utils.requests.post") +@patch("assets.utils.common_utils.get_mimetype") +@patch("assets.utils.common_utils.requests.post") def test_file_converted_converted_to_pdf_side_effect( gotenberg, get_mimetype, pdf_file_bytes ): @@ -593,7 +593,7 @@ def test_extend_bbox(bbox, page_size, ext, expected_result): ) def test_get_pdf_page_size(file, return_value, expected_result): with patch( - "src.utils.minio_utils.pdf2image.pdfinfo_from_bytes", + "assets.utils.minio_utils.pdf2image.pdfinfo_from_bytes", return_value=return_value, ): assert minio_utils.get_pdf_pts_page_size(file) == expected_result From 1197c26a2fd0cc19597a2f13746dd6eef7656325 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 13:15:07 +0400 Subject: [PATCH 03/22] feat: fix convert annotation issues --- convert/Dockerfile | 4 ++-- convert/{src => convert}/__init__.py | 0 .../{src => convert}/coco_export/__init__.py | 0 .../{src => convert}/coco_export/convert.py | 14 +++++++------- .../coco_export/export_service.py | 8 ++++---- .../{src => convert}/coco_import/__init__.py | 0 .../{src => convert}/coco_import/convert.py | 10 +++++----- .../coco_import/import_job.py | 8 ++++---- .../coco_import/import_service.py | 14 +++++++------- convert/{src => convert}/config.py | 2 +- convert/{src => convert}/exceptions.py | 0 .../label_studio_to_badgerdoc/__init__.py | 0 .../badgerdoc_format/__init__.py | 0 .../badgerdoc_format/annotation_converter.py | 0 .../annotation_converter_practic.py | 0 .../badgerdoc_format/badgerdoc_format.py | 0 .../badgerdoc_format/pdf_renderer.py | 0 .../badgerdoc_format/plain_text_converter.py | 0 .../badgerdoc_to_label_studio_use_case.py | 6 +++--- .../label_studio_to_badgerdoc_use_case.py | 16 ++++++++-------- .../labelstudio_format/__init__.py | 0 .../labelstudio_format/label_studio_format.py | 4 ++-- .../models/__init__.py | 0 .../models/bd_annotation_model.py | 0 .../models/bd_annotation_model_practic.py | 0 .../models/bd_manifest_model_practic.py | 0 .../models/bd_tokens_model.py | 0 .../models/common.py | 0 .../models/label_studio_models.py | 0 .../models/text_model.py | 0 .../text_to_badgerdoc_use_case.py | 0 convert/{src => convert}/logger.py | 0 convert/{src => convert}/main.py | 6 +++--- convert/{src => convert}/models/__init__.py | 0 convert/{src => convert}/models/coco.py | 0 convert/{src => convert}/routers/__init__.py | 0 convert/{src => convert}/routers/coco.py | 16 ++++++++-------- .../{src => convert}/routers/label_studio.py | 10 +++++----- convert/{src => convert}/routers/text.py | 6 +++--- convert/{src => convert}/utils/__init__.py | 0 .../{src => convert}/utils/common_utils.py | 4 ++-- convert/{src => convert}/utils/json_utils.py | 2 +- .../{src => convert}/utils/render_pdf_page.py | 6 +++--- convert/{src => convert}/utils/s3_utils.py | 10 +++++----- convert/docker-compose.yaml | 4 ++-- convert/src/.env | 19 ------------------- .../tests/test_label_studio/test_export.py | 10 +++++----- .../tests/test_label_studio/test_import.py | 8 ++++---- .../test_label_studio/test_text_wrapper.py | 2 +- jobs/jobs/main.py | 6 +++--- 50 files changed, 88 insertions(+), 107 deletions(-) rename convert/{src => convert}/__init__.py (100%) rename convert/{src => convert}/coco_export/__init__.py (100%) rename convert/{src => convert}/coco_export/convert.py (97%) rename convert/{src => convert}/coco_export/export_service.py (90%) rename convert/{src => convert}/coco_import/__init__.py (100%) rename convert/{src => convert}/coco_import/convert.py (95%) rename convert/{src => convert}/coco_import/import_job.py (86%) rename convert/{src => convert}/coco_import/import_service.py (86%) rename convert/{src => convert}/config.py (99%) rename convert/{src => convert}/exceptions.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/__init__.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/badgerdoc_format/__init__.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/badgerdoc_format/badgerdoc_format.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py (94%) rename convert/{src => convert}/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py (96%) rename convert/{src => convert}/label_studio_to_badgerdoc/labelstudio_format/__init__.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py (99%) rename convert/{src => convert}/label_studio_to_badgerdoc/models/__init__.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/models/bd_annotation_model.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/models/bd_annotation_model_practic.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/models/bd_manifest_model_practic.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/models/bd_tokens_model.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/models/common.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/models/label_studio_models.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/models/text_model.py (100%) rename convert/{src => convert}/label_studio_to_badgerdoc/text_to_badgerdoc_use_case.py (100%) rename convert/{src => convert}/logger.py (100%) rename convert/{src => convert}/main.py (71%) rename convert/{src => convert}/models/__init__.py (100%) rename convert/{src => convert}/models/coco.py (100%) rename convert/{src => convert}/routers/__init__.py (100%) rename convert/{src => convert}/routers/coco.py (91%) rename convert/{src => convert}/routers/label_studio.py (84%) rename convert/{src => convert}/routers/text.py (68%) rename convert/{src => convert}/utils/__init__.py (100%) rename convert/{src => convert}/utils/common_utils.py (88%) rename convert/{src => convert}/utils/json_utils.py (99%) rename convert/{src => convert}/utils/render_pdf_page.py (89%) rename convert/{src => convert}/utils/s3_utils.py (94%) delete mode 100644 convert/src/.env diff --git a/convert/Dockerfile b/convert/Dockerfile index 0c683a3a2..b7d38a29e 100644 --- a/convert/Dockerfile +++ b/convert/Dockerfile @@ -20,7 +20,7 @@ RUN poetry config virtualenvs.create false \ FROM base as build #ENV ROOT_PATH="/api/v1/convert" WORKDIR /convert -COPY src/ /convert/src +COPY convert/ /convert/convert CMD uvicorn src.main:app --host 0.0.0.0 --port 8080 @@ -31,7 +31,7 @@ CMD ["python", "-m", "pytest", "--cov=app"] FROM sonarsource/sonar-scanner-cli:4.6 AS sonar COPY tests /working/tests -COPY src /working/src +COPY convert /working/convert COPY sonar-project.properties /working/sonar-project.properties CMD sonar-scanner \ diff --git a/convert/src/__init__.py b/convert/convert/__init__.py similarity index 100% rename from convert/src/__init__.py rename to convert/convert/__init__.py diff --git a/convert/src/coco_export/__init__.py b/convert/convert/coco_export/__init__.py similarity index 100% rename from convert/src/coco_export/__init__.py rename to convert/convert/coco_export/__init__.py diff --git a/convert/src/coco_export/convert.py b/convert/convert/coco_export/convert.py similarity index 97% rename from convert/src/coco_export/convert.py rename to convert/convert/coco_export/convert.py index f32d74719..c356bf12a 100644 --- a/convert/src/coco_export/convert.py +++ b/convert/convert/coco_export/convert.py @@ -10,13 +10,13 @@ import requests from botocore.exceptions import ClientError -from src.config import minio_client, minio_resource, settings -from src.logger import get_logger -from src.models.coco import Annotation, Category, CocoDataset, Image -from src.utils.common_utils import add_to_zip_and_local_remove, get_headers -from src.utils.json_utils import export_save_to_json, load_from_json -from src.utils.render_pdf_page import pdf_page_to_jpg -from src.utils.s3_utils import convert_bucket_name_if_s3prefix +from convert.config import minio_client, minio_resource, settings +from convert.logger import get_logger +from convert.models.coco import Annotation, Category, CocoDataset, Image +from convert.utils.common_utils import add_to_zip_and_local_remove, get_headers +from convert.utils.json_utils import export_save_to_json, load_from_json +from convert.utils.render_pdf_page import pdf_page_to_jpg +from convert.utils.s3_utils import convert_bucket_name_if_s3prefix LOGGER = get_logger(__file__) diff --git a/convert/src/coco_export/export_service.py b/convert/convert/coco_export/export_service.py similarity index 90% rename from convert/src/coco_export/export_service.py rename to convert/convert/coco_export/export_service.py index 7a4b31daa..67157962c 100644 --- a/convert/src/coco_export/export_service.py +++ b/convert/convert/coco_export/export_service.py @@ -5,10 +5,10 @@ from fastapi import BackgroundTasks -from src.coco_export.convert import ConvertToCoco, ExportConvertBase -from src.config import minio_client -from src.logger import get_logger -from src.utils.s3_utils import convert_bucket_name_if_s3prefix +from convert.coco_export.convert import ConvertToCoco, ExportConvertBase +from convert.config import minio_client +from convert.logger import get_logger +from convert.utils.s3_utils import convert_bucket_name_if_s3prefix LOGGER = get_logger(__file__) diff --git a/convert/src/coco_import/__init__.py b/convert/convert/coco_import/__init__.py similarity index 100% rename from convert/src/coco_import/__init__.py rename to convert/convert/coco_import/__init__.py diff --git a/convert/src/coco_import/convert.py b/convert/convert/coco_import/convert.py similarity index 95% rename from convert/src/coco_import/convert.py rename to convert/convert/coco_import/convert.py index e06c0d3ef..706eaf541 100644 --- a/convert/src/coco_import/convert.py +++ b/convert/convert/coco_import/convert.py @@ -5,11 +5,11 @@ from pathlib import Path from typing import Any, Dict, Set -from src.config import get_request_session, settings -from src.logger import get_logger -from src.models.coco import DataS3 -from src.utils.json_utils import import_save_to_json, load_from_json -from src.utils.s3_utils import S3Manager, s3_download_files +from convert.config import get_request_session, settings +from convert.logger import get_logger +from convert.models.coco import DataS3 +from convert.utils.json_utils import import_save_to_json, load_from_json +from convert.utils.s3_utils import S3Manager, s3_download_files LOGGER = get_logger(__file__) SESSION = get_request_session() diff --git a/convert/src/coco_import/import_job.py b/convert/convert/coco_import/import_job.py similarity index 86% rename from convert/src/coco_import/import_job.py rename to convert/convert/coco_import/import_job.py index ca0400eea..03a981d95 100644 --- a/convert/src/coco_import/import_job.py +++ b/convert/convert/coco_import/import_job.py @@ -3,10 +3,10 @@ from urllib.error import HTTPError from uuid import uuid4 -from src.coco_import.import_service import import_run -from src.config import get_request_session, settings -from src.logger import get_logger -from src.models.coco import DataS3 +from convert.coco_import.import_service import import_run +from convert.config import get_request_session, settings +from convert.logger import get_logger +from convert.models.coco import DataS3 LOGGER = get_logger(__file__) diff --git a/convert/src/coco_import/import_service.py b/convert/convert/coco_import/import_service.py similarity index 86% rename from convert/src/coco_import/import_service.py rename to convert/convert/coco_import/import_service.py index 3cf4dd2c9..891c46012 100644 --- a/convert/src/coco_import/import_service.py +++ b/convert/convert/coco_import/import_service.py @@ -6,13 +6,13 @@ import requests from fastapi import HTTPException, status -from src.coco_import.convert import ConvertToBadgerdoc -from src.config import settings -from src.exceptions import UploadLimitExceedError -from src.logger import get_logger -from src.models import coco -from src.utils.common_utils import check_uploading_limit -from src.utils.s3_utils import S3Manager, s3_download_files +from convert.coco_import.convert import ConvertToBadgerdoc +from convert.config import settings +from convert.exceptions import UploadLimitExceedError +from convert.logger import get_logger +from convert.models import coco +from convert.utils.common_utils import check_uploading_limit +from convert.utils.s3_utils import S3Manager, s3_download_files LOGGER = get_logger(__file__) diff --git a/convert/src/config.py b/convert/convert/config.py similarity index 99% rename from convert/src/config.py rename to convert/convert/config.py index 780b1421a..9b6002f6d 100644 --- a/convert/src/config.py +++ b/convert/convert/config.py @@ -10,7 +10,7 @@ from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry -from src import logger +from convert import logger from dotenv import load_dotenv load_dotenv() diff --git a/convert/src/exceptions.py b/convert/convert/exceptions.py similarity index 100% rename from convert/src/exceptions.py rename to convert/convert/exceptions.py diff --git a/convert/src/label_studio_to_badgerdoc/__init__.py b/convert/convert/label_studio_to_badgerdoc/__init__.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/__init__.py rename to convert/convert/label_studio_to_badgerdoc/__init__.py diff --git a/convert/src/label_studio_to_badgerdoc/badgerdoc_format/__init__.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/__init__.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/badgerdoc_format/__init__.py rename to convert/convert/label_studio_to_badgerdoc/badgerdoc_format/__init__.py diff --git a/convert/src/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py rename to convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py diff --git a/convert/src/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py rename to convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py diff --git a/convert/src/label_studio_to_badgerdoc/badgerdoc_format/badgerdoc_format.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/badgerdoc_format.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/badgerdoc_format/badgerdoc_format.py rename to convert/convert/label_studio_to_badgerdoc/badgerdoc_format/badgerdoc_format.py diff --git a/convert/src/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py rename to convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py diff --git a/convert/src/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py rename to convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py diff --git a/convert/src/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py similarity index 94% rename from convert/src/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py rename to convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py index bb921d7bf..0373a0d72 100644 --- a/convert/src/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py @@ -5,11 +5,11 @@ from botocore.client import BaseClient from tenant_dependency import TenantData -from src.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter_practic import ( +from convert.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter_practic import ( AnnotationConverterToTheory, ) -from src.label_studio_to_badgerdoc.labelstudio_format import LabelStudioFormat -from src.logger import get_logger +from convert.label_studio_to_badgerdoc.labelstudio_format import LabelStudioFormat +from convert.logger import get_logger from .models import S3Path, bd_annotation_model_practic from .models.bd_annotation_model import BadgerdocAnnotation diff --git a/convert/src/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py b/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py similarity index 96% rename from convert/src/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py rename to convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py index c9ceee707..11bd8687e 100644 --- a/convert/src/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py +++ b/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py @@ -11,26 +11,26 @@ from fastapi.encoders import jsonable_encoder from tenant_dependency import TenantData -from src.config import DEFAULT_PAGE_BORDER_OFFSET, settings -from src.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter import ( +from convert.config import DEFAULT_PAGE_BORDER_OFFSET, settings +from convert.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter import ( AnnotationConverter, ) -from src.label_studio_to_badgerdoc.badgerdoc_format.badgerdoc_format import ( +from convert.label_studio_to_badgerdoc.badgerdoc_format.badgerdoc_format import ( BadgerdocFormat, ) -from src.label_studio_to_badgerdoc.badgerdoc_format.pdf_renderer import ( +from convert.label_studio_to_badgerdoc.badgerdoc_format.pdf_renderer import ( PDFRenderer, ) -from src.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( +from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( TextToBadgerdocTokensConverter, ) -from src.label_studio_to_badgerdoc.models import BadgerdocToken, DocumentLink -from src.label_studio_to_badgerdoc.models.label_studio_models import ( +from convert.label_studio_to_badgerdoc.models import BadgerdocToken, DocumentLink +from convert.label_studio_to_badgerdoc.models.label_studio_models import ( LabelStudioModel, S3Path, ValidationType, ) -from src.logger import get_logger +from convert.logger import get_logger LOGGER = get_logger(__file__) LOGGER.setLevel("DEBUG") diff --git a/convert/src/label_studio_to_badgerdoc/labelstudio_format/__init__.py b/convert/convert/label_studio_to_badgerdoc/labelstudio_format/__init__.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/labelstudio_format/__init__.py rename to convert/convert/label_studio_to_badgerdoc/labelstudio_format/__init__.py diff --git a/convert/src/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py b/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py similarity index 99% rename from convert/src/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py rename to convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py index 465858811..f73621b9a 100644 --- a/convert/src/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py +++ b/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py @@ -4,8 +4,8 @@ import requests from fastapi import HTTPException, status -from src.config import settings -from src.logger import get_logger +from convert.config import settings +from convert.logger import get_logger from ..models.bd_annotation_model import AnnotationLink, BadgerdocAnnotation from ..models.bd_manifest_model_practic import Manifest diff --git a/convert/src/label_studio_to_badgerdoc/models/__init__.py b/convert/convert/label_studio_to_badgerdoc/models/__init__.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/models/__init__.py rename to convert/convert/label_studio_to_badgerdoc/models/__init__.py diff --git a/convert/src/label_studio_to_badgerdoc/models/bd_annotation_model.py b/convert/convert/label_studio_to_badgerdoc/models/bd_annotation_model.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/models/bd_annotation_model.py rename to convert/convert/label_studio_to_badgerdoc/models/bd_annotation_model.py diff --git a/convert/src/label_studio_to_badgerdoc/models/bd_annotation_model_practic.py b/convert/convert/label_studio_to_badgerdoc/models/bd_annotation_model_practic.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/models/bd_annotation_model_practic.py rename to convert/convert/label_studio_to_badgerdoc/models/bd_annotation_model_practic.py diff --git a/convert/src/label_studio_to_badgerdoc/models/bd_manifest_model_practic.py b/convert/convert/label_studio_to_badgerdoc/models/bd_manifest_model_practic.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/models/bd_manifest_model_practic.py rename to convert/convert/label_studio_to_badgerdoc/models/bd_manifest_model_practic.py diff --git a/convert/src/label_studio_to_badgerdoc/models/bd_tokens_model.py b/convert/convert/label_studio_to_badgerdoc/models/bd_tokens_model.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/models/bd_tokens_model.py rename to convert/convert/label_studio_to_badgerdoc/models/bd_tokens_model.py diff --git a/convert/src/label_studio_to_badgerdoc/models/common.py b/convert/convert/label_studio_to_badgerdoc/models/common.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/models/common.py rename to convert/convert/label_studio_to_badgerdoc/models/common.py diff --git a/convert/src/label_studio_to_badgerdoc/models/label_studio_models.py b/convert/convert/label_studio_to_badgerdoc/models/label_studio_models.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/models/label_studio_models.py rename to convert/convert/label_studio_to_badgerdoc/models/label_studio_models.py diff --git a/convert/src/label_studio_to_badgerdoc/models/text_model.py b/convert/convert/label_studio_to_badgerdoc/models/text_model.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/models/text_model.py rename to convert/convert/label_studio_to_badgerdoc/models/text_model.py diff --git a/convert/src/label_studio_to_badgerdoc/text_to_badgerdoc_use_case.py b/convert/convert/label_studio_to_badgerdoc/text_to_badgerdoc_use_case.py similarity index 100% rename from convert/src/label_studio_to_badgerdoc/text_to_badgerdoc_use_case.py rename to convert/convert/label_studio_to_badgerdoc/text_to_badgerdoc_use_case.py diff --git a/convert/src/logger.py b/convert/convert/logger.py similarity index 100% rename from convert/src/logger.py rename to convert/convert/logger.py diff --git a/convert/src/main.py b/convert/convert/main.py similarity index 71% rename from convert/src/main.py rename to convert/convert/main.py index 7a877f92f..281d03761 100644 --- a/convert/src/main.py +++ b/convert/convert/main.py @@ -1,8 +1,8 @@ from fastapi import FastAPI # type: ignore -from src.config import API_NAME, API_VERSION, settings -from src.logger import get_logger -from src.routers import coco, text, label_studio +from convert.config import API_NAME, API_VERSION, settings +from convert.logger import get_logger +from convert.routers import coco, text, label_studio LOGGER = get_logger(__file__) diff --git a/convert/src/models/__init__.py b/convert/convert/models/__init__.py similarity index 100% rename from convert/src/models/__init__.py rename to convert/convert/models/__init__.py diff --git a/convert/src/models/coco.py b/convert/convert/models/coco.py similarity index 100% rename from convert/src/models/coco.py rename to convert/convert/models/coco.py diff --git a/convert/src/routers/__init__.py b/convert/convert/routers/__init__.py similarity index 100% rename from convert/src/routers/__init__.py rename to convert/convert/routers/__init__.py diff --git a/convert/src/routers/coco.py b/convert/convert/routers/coco.py similarity index 91% rename from convert/src/routers/coco.py rename to convert/convert/routers/coco.py index 747fd0831..a0a46528a 100644 --- a/convert/src/routers/coco.py +++ b/convert/convert/routers/coco.py @@ -7,17 +7,17 @@ from requests import HTTPError from tenant_dependency import TenantData, get_tenant_info -from src.coco_export.convert import ConvertToCoco, ExportBadgerdoc -from src.coco_export.export_service import ( +from convert.coco_export.convert import ConvertToCoco, ExportBadgerdoc +from convert.coco_export.export_service import ( export_run, export_run_and_return_url, ) -from src.coco_import.convert import ConvertToBadgerdoc -from src.coco_import.import_job import create_import_job -from src.config import minio_client, settings -from src.logger import get_logger -from src.models import coco -from src.utils.s3_utils import get_bucket_path +from convert.coco_import.convert import ConvertToBadgerdoc +from convert.coco_import.import_job import create_import_job +from convert.config import minio_client, settings +from convert.logger import get_logger +from convert.models import coco +from convert.utils.s3_utils import get_bucket_path router = APIRouter(prefix="/coco", tags=["coco"]) LOGGER = get_logger(__file__) diff --git a/convert/src/routers/label_studio.py b/convert/convert/routers/label_studio.py similarity index 84% rename from convert/src/routers/label_studio.py rename to convert/convert/routers/label_studio.py index d3563ff3b..2dfcec552 100644 --- a/convert/src/routers/label_studio.py +++ b/convert/convert/routers/label_studio.py @@ -3,15 +3,15 @@ from fastapi import APIRouter, Depends, Header, status from tenant_dependency import TenantData, get_tenant_info -from src.config import minio_client, settings -from src.label_studio_to_badgerdoc.badgerdoc_to_label_studio_use_case import ( +from convert.config import minio_client, settings +from convert.label_studio_to_badgerdoc.badgerdoc_to_label_studio_use_case import ( BDToLabelStudioConvertUseCase, ) -from src.label_studio_to_badgerdoc.label_studio_to_badgerdoc_use_case import ( +from convert.label_studio_to_badgerdoc.label_studio_to_badgerdoc_use_case import ( LabelStudioToBDConvertUseCase, ) -from src.label_studio_to_badgerdoc.models import LabelStudioRequest -from src.label_studio_to_badgerdoc.models.label_studio_models import ( +from convert.label_studio_to_badgerdoc.models import LabelStudioRequest +from convert.label_studio_to_badgerdoc.models.label_studio_models import ( BadgerdocToLabelStudioRequest, ) diff --git a/convert/src/routers/text.py b/convert/convert/routers/text.py similarity index 68% rename from convert/src/routers/text.py rename to convert/convert/routers/text.py index ffb719fdb..272bec5b2 100644 --- a/convert/src/routers/text.py +++ b/convert/convert/routers/text.py @@ -1,8 +1,8 @@ from fastapi import APIRouter, status -from src.config import minio_client -from src.label_studio_to_badgerdoc.models.text_model import TextRequest -from src.label_studio_to_badgerdoc.text_to_badgerdoc_use_case import TextToBDConvertUseCase +from convert.config import minio_client +from convert.label_studio_to_badgerdoc.models.text_model import TextRequest +from convert.label_studio_to_badgerdoc.text_to_badgerdoc_use_case import TextToBDConvertUseCase router = APIRouter(prefix="/text", tags=["text"]) diff --git a/convert/src/utils/__init__.py b/convert/convert/utils/__init__.py similarity index 100% rename from convert/src/utils/__init__.py rename to convert/convert/utils/__init__.py diff --git a/convert/src/utils/common_utils.py b/convert/convert/utils/common_utils.py similarity index 88% rename from convert/src/utils/common_utils.py rename to convert/convert/utils/common_utils.py index 17401565a..929a588c9 100644 --- a/convert/src/utils/common_utils.py +++ b/convert/convert/utils/common_utils.py @@ -2,8 +2,8 @@ from typing import Any, Dict, List from zipfile import ZipFile -from src.config import minio_client, settings -from src.exceptions import UploadLimitExceedError +from convert.config import minio_client, settings +from convert.exceptions import UploadLimitExceedError def check_uploading_limit(files_list: List[str]) -> Any: diff --git a/convert/src/utils/json_utils.py b/convert/convert/utils/json_utils.py similarity index 99% rename from convert/src/utils/json_utils.py rename to convert/convert/utils/json_utils.py index fee190800..b03c98f90 100644 --- a/convert/src/utils/json_utils.py +++ b/convert/convert/utils/json_utils.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional -from src.logger import get_logger +from convert.logger import get_logger LOGGER = get_logger(__file__) diff --git a/convert/src/utils/render_pdf_page.py b/convert/convert/utils/render_pdf_page.py similarity index 89% rename from convert/src/utils/render_pdf_page.py rename to convert/convert/utils/render_pdf_page.py index 6b8d2751b..d17d8101f 100644 --- a/convert/src/utils/render_pdf_page.py +++ b/convert/convert/utils/render_pdf_page.py @@ -4,9 +4,9 @@ import pdfplumber -from src.config import settings -from src.logger import get_logger -from src.utils.common_utils import add_to_zip_and_local_remove +from convert.config import settings +from convert.logger import get_logger +from convert.utils.common_utils import add_to_zip_and_local_remove LOGGER = get_logger(__file__) diff --git a/convert/src/utils/s3_utils.py b/convert/convert/utils/s3_utils.py similarity index 94% rename from convert/src/utils/s3_utils.py rename to convert/convert/utils/s3_utils.py index 19f61514a..d1f890151 100644 --- a/convert/src/utils/s3_utils.py +++ b/convert/convert/utils/s3_utils.py @@ -5,11 +5,11 @@ import urllib3 from fastapi import HTTPException, status -from src.config import settings -from src.exceptions import BucketError, FileKeyError, UploadLimitExceedError -from src.logger import get_logger -from src.models import coco -from src.utils.common_utils import check_uploading_limit +from convert.config import settings +from convert.exceptions import BucketError, FileKeyError, UploadLimitExceedError +from convert.logger import get_logger +from convert.models import coco +from convert.utils.common_utils import check_uploading_limit logger = get_logger(__name__) diff --git a/convert/docker-compose.yaml b/convert/docker-compose.yaml index 796ec3bca..ca9bbb402 100644 --- a/convert/docker-compose.yaml +++ b/convert/docker-compose.yaml @@ -7,7 +7,7 @@ services: - "9000:9000" - "9001:9001" env_file: - - src/.env + - convert/.env command: server --console-address ":9001" /export web: @@ -16,7 +16,7 @@ services: target: build restart: on-failure env_file: - - src/.env + - convert/.env volumes: - "./src:/convert/src" command: "uvicorn src.main:app --host 0.0.0.0 --port 8080 --reload" diff --git a/convert/src/.env b/convert/src/.env deleted file mode 100644 index 3ce7a8617..000000000 --- a/convert/src/.env +++ /dev/null @@ -1,19 +0,0 @@ -MINIO_HOST=http://minio:9000 -MINIO_ACCESS_KEY=minio -MINIO_SECRET_KEY=minio123 -MINIO_SERVER=minio:9000 -MINIO_ROOT_USER=minio -MINIO_ROOT_PASSWORD=minio123 -S3_PREFIX= -# S3_CREDENTIALS_PROVIDER can be: minio (default), aws_iam -S3_CREDENTIALS_PROVIDER=minio - -ASSETS_SERVICE_URL=http://dev2.badgerdoc.com/api/v1/assets/files/ -CATEGORY_SERVICE_URL=http://dev2.badgerdoc.com/api/v1/annotation/categories/ -JOB_SERVICE_URL=http://dev2.badgerdoc.com/api/v1/jobs/jobs/ -ANNOTATION_SERVICE_URL=http://dev2.badgerdoc.com/api/v1/annotation/ -TAXONOMY_SERVICE_URL=http://dev2.badgerdoc.com/api/v1/taxonomy/ -IMPORT_COCO_URL=http://0.0.0.0:8080/converter/import/ -KEYCLOAK_URL=http://dev2.badgerdoc.com -ROOT_PATH= - diff --git a/convert/tests/test_label_studio/test_export.py b/convert/tests/test_label_studio/test_export.py index 6ab52a7dd..e3a30b379 100644 --- a/convert/tests/test_label_studio/test_export.py +++ b/convert/tests/test_label_studio/test_export.py @@ -1,17 +1,17 @@ from pathlib import Path -from src.label_studio_to_badgerdoc.badgerdoc_format import ( +from convert.label_studio_to_badgerdoc.badgerdoc_format import ( annotation_converter_practic, ) -from src.label_studio_to_badgerdoc.labelstudio_format.label_studio_format import ( +from convert.label_studio_to_badgerdoc.labelstudio_format.label_studio_format import ( LabelStudioFormat, ) -from src.label_studio_to_badgerdoc.models import ( +from convert.label_studio_to_badgerdoc.models import ( bd_annotation_model_practic, bd_manifest_model_practic, ) -from src.label_studio_to_badgerdoc.models.bd_tokens_model import Page -from src.label_studio_to_badgerdoc.models.label_studio_models import ( +from convert.label_studio_to_badgerdoc.models.bd_tokens_model import Page +from convert.label_studio_to_badgerdoc.models.label_studio_models import ( LabelStudioModel, ) diff --git a/convert/tests/test_label_studio/test_import.py b/convert/tests/test_label_studio/test_import.py index 23eb7c123..048bbe6e0 100644 --- a/convert/tests/test_label_studio/test_import.py +++ b/convert/tests/test_label_studio/test_import.py @@ -2,20 +2,20 @@ from pathlib import Path from tempfile import TemporaryDirectory -from src.config import ( +from convert.config import ( DEFAULT_PAGE_BORDER_OFFSET, DEFAULT_PDF_FONT_HEIGHT, DEFAULT_PDF_FONT_WIDTH, DEFAULT_PDF_LINE_SPACING, DEFAULT_PDF_PAGE_WIDTH, ) -from src.label_studio_to_badgerdoc.badgerdoc_format.badgerdoc_format import ( +from convert.label_studio_to_badgerdoc.badgerdoc_format.badgerdoc_format import ( BadgerdocFormat, ) -from src.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( +from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( TextToBadgerdocTokensConverter, ) -from src.label_studio_to_badgerdoc.models.label_studio_models import ( +from convert.label_studio_to_badgerdoc.models.label_studio_models import ( LabelStudioModel, ) diff --git a/convert/tests/test_label_studio/test_text_wrapper.py b/convert/tests/test_label_studio/test_text_wrapper.py index 9655171fb..aef00398f 100644 --- a/convert/tests/test_label_studio/test_text_wrapper.py +++ b/convert/tests/test_label_studio/test_text_wrapper.py @@ -1,7 +1,7 @@ import collections import string -from src.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( # noqa: E501 +from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( # noqa: E501 TextWrapper, ) diff --git a/jobs/jobs/main.py b/jobs/jobs/main.py index e8a1037ee..57a8939f7 100644 --- a/jobs/jobs/main.py +++ b/jobs/jobs/main.py @@ -138,7 +138,7 @@ async def run_job( current_tenant: Optional[str] = Header(None, alias="X-Current-Tenant"), db: Session = Depends(db_service.get_session), token_data: TenantData = Depends(tenant), -) -> Union[Dict[str, Any], HTTPException]: +) -> Dict[str, Any]: """Runs any type of Job""" jw_token = token_data.token job_to_run = db_service.get_job_in_db_by_id(db, job_id) @@ -203,7 +203,7 @@ async def change_job( token_data: TenantData = Depends(tenant), current_tenant: Optional[str] = Header(None, alias="X-Current-Tenant"), db: Session = Depends(db_service.get_session), -) -> Union[Dict[str, Any], HTTPException]: +) -> Dict[str, Any]: """Provides an ability to change any value of any field of any Job in the database""" job_to_change = db_service.get_job_in_db_by_id(db, job_id) @@ -347,7 +347,7 @@ async def delete_job( current_tenant: Optional[str] = Header(None, alias="X-Current-Tenant"), db: Session = Depends(db_service.get_session), token_data: TenantData = Depends(tenant), -) -> Union[Dict[str, Any], HTTPException]: +) -> Dict[str, Any]: """Deletes Job instance by its id""" jw_token = token_data.token job_to_delete = db_service.get_job_in_db_by_id(db, job_id) From 668295f512e7eacd3101453e094136944cd73ab6 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 13:20:58 +0400 Subject: [PATCH 04/22] refactor: models app name --- models/Dockerfile | 4 +-- models/alembic/env.py | 6 ++-- models/docker-compose.yaml | 6 ++-- models/{src => models}/colab_ssh_utils.py | 6 ++-- .../{src => models}/colab_tunnel_script.ipynb | 0 models/{src => models}/constants.py | 2 +- models/{src => models}/convert_utils.py | 4 +-- models/{src => models}/crud.py | 4 +-- models/{src => models}/db.py | 2 +- models/{src => models}/errors.py | 0 models/{src => models}/logger.py | 0 models/{src => models}/main.py | 6 ++-- models/{src => models}/routers/__init__.py | 2 +- .../routers/basements_routers.py | 11 ++++--- .../routers/deployed_models_routers.py | 4 +-- .../{src => models}/routers/models_routers.py | 10 ++++--- .../routers/training_routers.py | 15 ++++++---- models/{src => models}/schemas.py | 2 +- models/{src => models}/utils.py | 10 +++---- models/src/.env | 29 ------------------- models/tests/conftest.py | 10 +++---- models/tests/test_basement_routers.py | 10 +++---- models/tests/test_colab_interactions.py | 8 ++--- models/tests/test_colab_start_training.py | 18 ++++++------ models/tests/test_crud.py | 8 ++--- models/tests/test_deployed_models_routers.py | 4 +-- models/tests/test_models_routers.py | 6 ++-- models/tests/test_schemas.py | 2 +- models/tests/test_trainings_routers.py | 10 +++---- models/tests/test_utils.py | 26 ++++++++--------- models/tests/utils.py | 2 +- 31 files changed, 103 insertions(+), 124 deletions(-) rename models/{src => models}/colab_ssh_utils.py (95%) rename models/{src => models}/colab_tunnel_script.ipynb (100%) rename models/{src => models}/constants.py (97%) rename models/{src => models}/convert_utils.py (91%) rename models/{src => models}/crud.py (95%) rename models/{src => models}/db.py (98%) rename models/{src => models}/errors.py (100%) rename models/{src => models}/logger.py (100%) rename models/{src => models}/main.py (95%) rename models/{src => models}/routers/__init__.py (67%) rename models/{src => models}/routers/basements_routers.py (97%) rename models/{src => models}/routers/deployed_models_routers.py (98%) rename models/{src => models}/routers/models_routers.py (98%) rename models/{src => models}/routers/training_routers.py (97%) rename models/{src => models}/schemas.py (99%) rename models/{src => models}/utils.py (98%) delete mode 100644 models/src/.env diff --git a/models/Dockerfile b/models/Dockerfile index 66bf45704..11f35d66a 100644 --- a/models/Dockerfile +++ b/models/Dockerfile @@ -20,7 +20,7 @@ FROM base AS build WORKDIR /working -COPY src /working/src +COPY models /working/models COPY alembic /working/alembic COPY alembic.ini /working @@ -56,7 +56,7 @@ RUN python3 -m pytest --cov=src tests/ -m "not integration" FROM sonarsource/sonar-scanner-cli:4.6 AS sonar COPY tests /working/tests -COPY src /working/src +COPY models /working/models COPY sonar-project.properties /working/sonar-project.properties CMD sonar-scanner \ diff --git a/models/alembic/env.py b/models/alembic/env.py index 984d6dd0e..1889ab486 100644 --- a/models/alembic/env.py +++ b/models/alembic/env.py @@ -4,9 +4,9 @@ from sqlalchemy import engine_from_config, pool from alembic import context -from src.constants import DATABASE_URL -from src.db import Base -from src.utils import get_test_db_url +from models.constants import DATABASE_URL +from models.db import Base +from models.utils import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/models/docker-compose.yaml b/models/docker-compose.yaml index 1c233d56e..ecd5021ee 100644 --- a/models/docker-compose.yaml +++ b/models/docker-compose.yaml @@ -11,7 +11,7 @@ services: - '9000:9000' - '9001:9001' env_file: - - src/.env + - models/.env networks: - app-tier @@ -20,7 +20,7 @@ services: ports: - 5432:5432 env_file: - - src/.env + - models/.env networks: - app-tier @@ -30,7 +30,7 @@ services: target: build container_name: models_web_app env_file: - - src/.env + - models/.env volumes: - .:/working ports: diff --git a/models/src/colab_ssh_utils.py b/models/models/colab_ssh_utils.py similarity index 95% rename from models/src/colab_ssh_utils.py rename to models/models/colab_ssh_utils.py index 85ed605c7..55c40d843 100644 --- a/models/src/colab_ssh_utils.py +++ b/models/models/colab_ssh_utils.py @@ -8,9 +8,9 @@ from paramiko import AutoAddPolicy, SSHClient from paramiko.ssh_exception import SSHException -from src.constants import MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY -from src.errors import ColabFileUploadError -from src.schemas import TrainingCredentials +from models.constants import MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY +from models.errors import ColabFileUploadError +from models.schemas import TrainingCredentials LOGGER = logging.getLogger(name="models") COLAB_TRAINING_DIRECTORY = "/content/training/" diff --git a/models/src/colab_tunnel_script.ipynb b/models/models/colab_tunnel_script.ipynb similarity index 100% rename from models/src/colab_tunnel_script.ipynb rename to models/models/colab_tunnel_script.ipynb diff --git a/models/src/constants.py b/models/models/constants.py similarity index 97% rename from models/src/constants.py rename to models/models/constants.py index d9526f040..998b6c2d9 100644 --- a/models/src/constants.py +++ b/models/models/constants.py @@ -3,7 +3,7 @@ from dotenv import find_dotenv, load_dotenv -load_dotenv(find_dotenv("./src/.env")) +load_dotenv(find_dotenv("./models/.env")) POSTGRES_USER = os.environ.get("POSTGRES_USER") POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD") diff --git a/models/src/convert_utils.py b/models/models/convert_utils.py similarity index 91% rename from models/src/convert_utils.py rename to models/models/convert_utils.py index 62c7010d4..ca7e01f48 100644 --- a/models/src/convert_utils.py +++ b/models/models/convert_utils.py @@ -6,8 +6,8 @@ from fastapi import HTTPException from requests import ConnectionError, RequestException, Timeout -from src.constants import CONVERT_EXPORT_URL, HEADER_TENANT -from src.schemas import ConvertRequestSchema +from models.constants import CONVERT_EXPORT_URL, HEADER_TENANT +from models.schemas import ConvertRequestSchema LOGGER = logging.getLogger(name="models") diff --git a/models/src/crud.py b/models/models/crud.py similarity index 95% rename from models/src/crud.py rename to models/models/crud.py index ed80af9da..83f8e75e8 100644 --- a/models/src/crud.py +++ b/models/models/crud.py @@ -3,8 +3,8 @@ from sqlalchemy import desc from sqlalchemy.orm import Session -from src.db import Basement, Model, Training -from src.schemas import BasementBase, ModelBase, TrainingBase, TrainingUpdate +from models.db import Basement, Model, Training +from models.schemas import BasementBase, ModelBase, TrainingBase, TrainingUpdate def is_id_existing( diff --git a/models/src/db.py b/models/models/db.py similarity index 98% rename from models/src/db.py rename to models/models/db.py index 739e370c4..854e6b735 100644 --- a/models/src/db.py +++ b/models/models/db.py @@ -18,7 +18,7 @@ from sqlalchemy.orm import Session, relationship, sessionmaker from sqlalchemy.types import ARRAY -from src.constants import DATABASE_URL +from models.constants import DATABASE_URL Base = declarative_base() diff --git a/models/src/errors.py b/models/models/errors.py similarity index 100% rename from models/src/errors.py rename to models/models/errors.py diff --git a/models/src/logger.py b/models/models/logger.py similarity index 100% rename from models/src/logger.py rename to models/models/logger.py diff --git a/models/src/main.py b/models/models/main.py similarity index 95% rename from models/src/main.py rename to models/models/main.py index 0813dc2d5..7f2525886 100644 --- a/models/src/main.py +++ b/models/models/main.py @@ -8,8 +8,8 @@ from paramiko.ssh_exception import SSHException from sqlalchemy.exc import SQLAlchemyError -from src.constants import API_NAME, API_VERSION, ROOT_PATH -from src.errors import ( +from models.constants import API_NAME, API_VERSION, ROOT_PATH +from models.errors import ( ColabFileUploadError, NoSuchTenant, botocore_error_handler, @@ -20,7 +20,7 @@ ssh_connection_error_handler, subprocess_called_error_handler, ) -from src.routers import ( +from models.routers import ( basements_routers, deployed_models_routers, models_routers, diff --git a/models/src/routers/__init__.py b/models/models/routers/__init__.py similarity index 67% rename from models/src/routers/__init__.py rename to models/models/routers/__init__.py index 9973914dc..fe4a7429a 100644 --- a/models/src/routers/__init__.py +++ b/models/models/routers/__init__.py @@ -1,5 +1,5 @@ from tenant_dependency import get_tenant_info -from src.constants import ALGORITHM, KEYCLOACK_URI +from models.constants import ALGORITHM, KEYCLOACK_URI tenant = get_tenant_info(url=KEYCLOACK_URI, algorithm=ALGORITHM) diff --git a/models/src/routers/basements_routers.py b/models/models/routers/basements_routers.py similarity index 97% rename from models/src/routers/basements_routers.py rename to models/models/routers/basements_routers.py index dd0814a51..5a166793f 100644 --- a/models/src/routers/basements_routers.py +++ b/models/models/routers/basements_routers.py @@ -12,10 +12,10 @@ from sqlalchemy.orm import Session from tenant_dependency import TenantData -from src import crud, schemas -from src.db import Basement, get_db -from src.routers import tenant -from src.utils import ( +from models import crud, schemas +from models.db import Basement, get_db +from models.routers import tenant +from models.utils import ( NoSuchTenant, convert_bucket_name_if_s3prefix, get_minio_resource, @@ -32,6 +32,7 @@ @router.post( "/create", status_code=201, + response_model=None, responses={ 201: { "model": schemas.Basement, @@ -95,6 +96,7 @@ def search_basements( @router.get( "/{basements_id}", + response_model=None, responses={ 200: { "model": schemas.Basement, @@ -118,6 +120,7 @@ def get_basement_by_id( @router.put( "/update", + response_model=None, responses={ 200: { "model": schemas.Basement, diff --git a/models/src/routers/deployed_models_routers.py b/models/models/routers/deployed_models_routers.py similarity index 98% rename from models/src/routers/deployed_models_routers.py rename to models/models/routers/deployed_models_routers.py index 36c7f1f93..8ca563f03 100644 --- a/models/src/routers/deployed_models_routers.py +++ b/models/models/routers/deployed_models_routers.py @@ -6,8 +6,8 @@ from kubernetes import client, config from kubernetes.client.exceptions import ApiException -from src import schemas, utils -from src.constants import MODELS_NAMESPACE +from models import schemas, utils +from models.constants import MODELS_NAMESPACE router = APIRouter(prefix="/deployed_models", tags=["deployed_models"]) LOGGER = logging.getLogger(name="models") diff --git a/models/src/routers/models_routers.py b/models/models/routers/models_routers.py similarity index 98% rename from models/src/routers/models_routers.py rename to models/models/routers/models_routers.py index 8b1d9d5e7..8bf314bb8 100644 --- a/models/src/routers/models_routers.py +++ b/models/models/routers/models_routers.py @@ -12,10 +12,10 @@ from sqlalchemy.orm import Session from tenant_dependency import TenantData -from src import crud, schemas, utils -from src.crud import get_latest_model, get_second_latest_model -from src.db import Basement, Model, Training, get_db -from src.routers import tenant +from models import crud, schemas, utils +from models.crud import get_latest_model, get_second_latest_model +from models.db import Basement, Model, Training, get_db +from models.routers import tenant LOGGER = logging.getLogger(name="models") @@ -117,6 +117,7 @@ def search_models( @router.get( "/{models_id}", + response_model=None, responses={ 200: { "model": schemas.Model, @@ -169,6 +170,7 @@ def get_model_by_id_and_version( @router.put( "/update", + response_model=None, responses={ 200: { "model": schemas.Model, diff --git a/models/src/routers/training_routers.py b/models/models/routers/training_routers.py similarity index 97% rename from models/src/routers/training_routers.py rename to models/models/routers/training_routers.py index 7b042fc15..e06a55584 100644 --- a/models/src/routers/training_routers.py +++ b/models/models/routers/training_routers.py @@ -24,8 +24,8 @@ from sqlalchemy.orm import Session from tenant_dependency import TenantData -from src import crud, schemas, utils -from src.colab_ssh_utils import ( +from models import crud, schemas, utils +from models.colab_ssh_utils import ( COLAB_TRAINING_DIRECTORY, check_aws_credentials_file, connect_colab, @@ -33,10 +33,10 @@ sync_colab_with_minio, upload_file_to_colab, ) -from src.convert_utils import prepare_dataset_info -from src.db import Basement, Training, get_db -from src.routers import tenant -from src.utils import ( +from models.convert_utils import prepare_dataset_info +from models.db import Basement, Training, get_db +from models.routers import tenant +from models.utils import ( NoSuchTenant, convert_bucket_name_if_s3prefix, get_minio_object, @@ -55,6 +55,7 @@ @router.post( "/create", status_code=201, + response_model=None, responses={ 201: { "model": schemas.Training, @@ -153,6 +154,7 @@ def search_training( @router.get( "/{trainings_id}", + response_model=None, responses={ 200: { "model": schemas.Training, @@ -176,6 +178,7 @@ def get_training_by_id( @router.put( "/update", + response_model=None, responses={ 200: { "model": schemas.Training, diff --git a/models/src/schemas.py b/models/models/schemas.py similarity index 99% rename from models/src/schemas.py rename to models/models/schemas.py index 3db682d57..71f5e13f6 100644 --- a/models/src/schemas.py +++ b/models/models/schemas.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, ConstrainedStr, Field, PositiveInt, validator -from src.db import StatusEnum +from models.db import StatusEnum class AtLeastOneChar(ConstrainedStr): diff --git a/models/src/utils.py b/models/models/utils.py similarity index 98% rename from models/src/utils.py rename to models/models/utils.py index d82e17108..3f580002e 100644 --- a/models/src/utils.py +++ b/models/models/utils.py @@ -10,8 +10,8 @@ from sqlalchemy.orm import Session from starlette.datastructures import UploadFile -import src.logger as logger -from src.constants import ( +import models.logger as logger +from models.constants import ( CONTAINER_NAME, DOCKER_REGISTRY_URL, DOMAIN_NAME, @@ -25,9 +25,9 @@ S3_CREDENTIALS_PROVIDER, S3_PREFIX, ) -from src.db import Basement, Model -from src.errors import NoSuchTenant -from src.schemas import DeployedModelPod, MinioHTTPMethod +from models.db import Basement, Model +from models.errors import NoSuchTenant +from models.schemas import DeployedModelPod, MinioHTTPMethod logger_ = logger.get_logger(__name__) diff --git a/models/src/.env b/models/src/.env deleted file mode 100644 index 34a66ca3b..000000000 --- a/models/src/.env +++ /dev/null @@ -1,29 +0,0 @@ -POSTGRES_USER=admin -POSTGRES_PASSWORD=admin -POSTGRES_DB=models -POSTGRES_HOST=db -POSTGRES_PORT=5432 -DATABASE_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}" - -MINIO_HOST="minio:9000" -MINIO_PUBLIC_HOST=${MINIO_HOST} -MINIO_ACCESS_KEY="minio" -MINIO_SECRET_KEY="minio123" -MINIO_ROOT_USER=${MINIO_ACCESS_KEY} -MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY} -S3_PREFIX= -S3_CREDENTIALS_PROVIDER=minio - -DOCKER_REGISTRY_URL="localhost:5000" - -SECRET=some_secret_key - -DOCKER_REGISTRY_URL="10.228.0.184:5000" -MODELS_NAMESPACE=dev2 -ROOT_PATH="" -DOMAIN_NAME="badgerdoc.com" - -KEYCLOACK_URI="http://bagerdoc-keycloack" -ALGORITHM="RS256" - -CONVERT_EXPORT_URL="http://convert/export" diff --git a/models/tests/conftest.py b/models/tests/conftest.py index 5d0c7a48f..35f041828 100644 --- a/models/tests/conftest.py +++ b/models/tests/conftest.py @@ -16,16 +16,16 @@ from sqlalchemy_utils import create_database, database_exists from alembic import command -from src.constants import ( +from models.constants import ( DATABASE_URL, MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY, ) -from src.db import Base, Basement, Training, get_db -from src.main import app -from src.routers import tenant -from src.utils import get_test_db_url +from models.db import Base, Basement, Training, get_db +from models.main import app +from models.routers import tenant +from models.utils import get_test_db_url from .override_app_dependency import override from .test_colab_start_training import ( diff --git a/models/tests/test_basement_routers.py b/models/tests/test_basement_routers.py index cd2b4ed55..6756b340f 100644 --- a/models/tests/test_basement_routers.py +++ b/models/tests/test_basement_routers.py @@ -5,9 +5,9 @@ from fastapi.exceptions import HTTPException from fastapi.testclient import TestClient -from src.db import Basement -from src.main import app -from src.routers import basements_routers +from models.db import Basement +from models.main import app +from models.routers import basements_routers from tests.test_utils import TEST_LIMITS @@ -85,7 +85,7 @@ def test_get_basement_by_id_withot_basement(get): @patch.object(basements_routers.crud, "get_instance") def test_delete_basement_by_id(delete, get, client, monkeypatch): monkeypatch.setattr( - "src.routers.basements_routers.get_minio_resource", Mock() + "models.routers.basements_routers.get_minio_resource", Mock() ) data = {"id": "id"} get.return_value = "expected" @@ -98,7 +98,7 @@ def test_delete_basement_by_id(delete, get, client, monkeypatch): @patch.object(basements_routers.crud, "get_instance") def test_delete_basement_by_id_calls_crud(delete, get, monkeypatch): monkeypatch.setattr( - "src.routers.basements_routers.get_minio_resource", Mock() + "models.routers.basements_routers.get_minio_resource", Mock() ) data = basements_routers.schemas.BasementDelete(id="id") get.return_value = "expected" diff --git a/models/tests/test_colab_interactions.py b/models/tests/test_colab_interactions.py index c05e806fa..17b16b454 100644 --- a/models/tests/test_colab_interactions.py +++ b/models/tests/test_colab_interactions.py @@ -3,12 +3,12 @@ import pytest -from src.colab_ssh_utils import ( +from models.colab_ssh_utils import ( COLAB_TRAINING_DIRECTORY, connect_colab, upload_file_to_colab, ) -from src.errors import ColabFileUploadError +from models.errors import ColabFileUploadError TEST_FILE_NAME = "test_file.py" TEST_CREDENTIALS = { @@ -47,11 +47,11 @@ def test_connect_colab_called_with_credentials(monkeypatch) -> None: mock_ssh = mock.Mock(return_value=mock_client) mock_policy = mock.Mock() monkeypatch.setattr( - "src.colab_ssh_utils.SSHClient", + "models.colab_ssh_utils.SSHClient", mock_ssh, ) monkeypatch.setattr( - "src.colab_ssh_utils.AutoAddPolicy", + "models.colab_ssh_utils.AutoAddPolicy", mock.Mock(return_value=mock_policy), ) mock_client.connect = mock.Mock(return_value=1) diff --git a/models/tests/test_colab_start_training.py b/models/tests/test_colab_start_training.py index 7f53623a9..823a4741d 100644 --- a/models/tests/test_colab_start_training.py +++ b/models/tests/test_colab_start_training.py @@ -32,7 +32,7 @@ def test_start_training_db_error(monkeypatch, overrided_token_client) -> None: """Test handling of db connection errors""" monkeypatch.setattr( - "src.crud.Session.query", + "models.crud.Session.query", Mock(side_effect=SQLAlchemyError("some error message")), ) response = overrided_token_client.post( @@ -90,7 +90,7 @@ def test_start_training_colab_connection_error( description in message. """ monkeypatch.setattr( - "src.routers.training_routers.connect_colab", + "models.routers.training_routers.connect_colab", Mock(side_effect=SSHException("some ssh error")), ) response = overrided_token_client.post( @@ -129,11 +129,11 @@ def test_start_training_no_such_bucket_error( """ other_tenant = TEST_TENANTS[1] monkeypatch.setattr( - "src.utils.boto3.resource", + "models.utils.boto3.resource", Mock(return_value=moto_minio), ) monkeypatch.setattr( - "src.routers.training_routers.connect_colab", MockSSHContext + "models.routers.training_routers.connect_colab", MockSSHContext ) response = overrided_token_client.post( START_TRAINING_PATH.format(EXIST_TRAINING_ID), @@ -158,11 +158,11 @@ def test_start_training_boto3_error( return 500 status with error description in message. """ monkeypatch.setattr( - "src.routers.training_routers.get_minio_object", + "models.routers.training_routers.get_minio_object", Mock(side_effect=BotoCoreError()), ) monkeypatch.setattr( - "src.routers.training_routers.connect_colab", MockSSHContext + "models.routers.training_routers.connect_colab", MockSSHContext ) response = overrided_token_client.post( START_TRAINING_PATH.format(EXIST_TRAINING_ID), @@ -191,14 +191,14 @@ def test_start_training_integration( """ mock_upload = Mock() monkeypatch.setattr( - "src.routers.training_routers.upload_file_to_colab", mock_upload + "models.routers.training_routers.upload_file_to_colab", mock_upload ) monkeypatch.setattr( - "src.utils.boto3.resource", + "models.utils.boto3.resource", Mock(return_value=save_start_training_minio_objects), ) monkeypatch.setattr( - "src.routers.training_routers.connect_colab", MockSSHContext + "models.routers.training_routers.connect_colab", MockSSHContext ) response = overrided_token_client.post( START_TRAINING_PATH.format(EXIST_TRAINING_ID), diff --git a/models/tests/test_crud.py b/models/tests/test_crud.py index bf93882f7..8a097ad01 100644 --- a/models/tests/test_crud.py +++ b/models/tests/test_crud.py @@ -2,10 +2,10 @@ from pytest import mark -from src import crud -from src.crud import get_instance, get_latest_model -from src.db import Basement, Model, StatusEnum, Training -from src.schemas import BasementBase +from models import crud +from models.crud import get_instance, get_latest_model +from models.db import Basement, Model, StatusEnum, Training +from models.schemas import BasementBase from tests.test_utils import TEST_LIMITS from tests.utils import create_expected_models, delete_date_field, row_to_dict diff --git a/models/tests/test_deployed_models_routers.py b/models/tests/test_deployed_models_routers.py index 18587123c..256908b6f 100644 --- a/models/tests/test_deployed_models_routers.py +++ b/models/tests/test_deployed_models_routers.py @@ -3,8 +3,8 @@ import pytest from kubernetes.client.exceptions import ApiException -from src.routers import deployed_models_routers -from src.schemas import DeployedModelPod +from models.routers import deployed_models_routers +from models.schemas import DeployedModelPod def test_get_deployed_model_list_returns_list_of_models(client): diff --git a/models/tests/test_models_routers.py b/models/tests/test_models_routers.py index 96181a3ee..893f273c3 100644 --- a/models/tests/test_models_routers.py +++ b/models/tests/test_models_routers.py @@ -5,9 +5,9 @@ from fastapi.exceptions import HTTPException from fastapi.testclient import TestClient -from src.db import Basement, Model, StatusEnum -from src.main import app -from src.routers import models_routers +from models.db import Basement, Model, StatusEnum +from models.main import app +from models.routers import models_routers from tests.override_app_dependency import TEST_HEADER, TEST_TENANTS from tests.test_crud import GET_BASEMENT from tests.utils import create_expected_models, delete_date_field, row_to_dict diff --git a/models/tests/test_schemas.py b/models/tests/test_schemas.py index 22d8a833e..5bd2e7cd7 100644 --- a/models/tests/test_schemas.py +++ b/models/tests/test_schemas.py @@ -1,7 +1,7 @@ import pytest from pydantic import ValidationError -from src import schemas +from models import schemas from tests.test_utils import TEST_LIMITS diff --git a/models/tests/test_trainings_routers.py b/models/tests/test_trainings_routers.py index 6517739f1..42e148111 100644 --- a/models/tests/test_trainings_routers.py +++ b/models/tests/test_trainings_routers.py @@ -5,9 +5,9 @@ from fastapi.exceptions import HTTPException from fastapi.testclient import TestClient -from src.db import Basement, Training -from src.main import app -from src.routers import training_routers +from models.db import Basement, Training +from models.main import app +from models.routers import training_routers from .override_app_dependency import TEST_HEADER @@ -99,7 +99,7 @@ def test_get_training_by_id_withot_training(get): @patch.object(training_routers.crud, "delete_instance") @patch.object(training_routers.crud, "get_instance") -@patch("src.routers.training_routers.get_minio_resource", Mock()) +@patch("models.routers.training_routers.get_minio_resource", Mock()) def test_delete_training_by_id(get, delete, client): data = {"id": 1} training_routers.get_db = Mock() @@ -113,7 +113,7 @@ def test_delete_training_by_id(get, delete, client): @patch.object(training_routers.crud, "delete_instance") @patch.object(training_routers.crud, "get_instance") -@patch("src.routers.training_routers.get_minio_resource", Mock()) +@patch("models.routers.training_routers.get_minio_resource", Mock()) def test_delete_training_by_id_calls_crud(get, delete): data = training_routers.schemas.TrainingDelete(id=1) db_entity = Mock() diff --git a/models/tests/test_utils.py b/models/tests/test_utils.py index 41e2f1ede..187efbe12 100644 --- a/models/tests/test_utils.py +++ b/models/tests/test_utils.py @@ -8,10 +8,10 @@ from botocore.exceptions import ClientError from kubernetes.client.rest import ApiException -from src import utils -from src.constants import MINIO_HOST -from src.errors import NoSuchTenant -from src.schemas import ( +from models import utils +from models.constants import MINIO_HOST +from models.errors import NoSuchTenant +from models.schemas import ( BasementBase, DeployedModelPod, MinioHTTPMethod, @@ -34,7 +34,7 @@ def test_minio_no_such_bucket_error_handling(moto_minio, monkeypatch): wrong_tenant = "wrong_tenant" error_message = f"Bucket {wrong_tenant} does not exist" monkeypatch.setattr( - "src.utils.boto3.resource", + "models.utils.boto3.resource", Mock(return_value=moto_minio), ) with pytest.raises(utils.NoSuchTenant, match=error_message): @@ -68,7 +68,7 @@ def test_get_object_via_presigned_url( ): """Tests possibility to GET object via generated presigned URL.""" monkeypatch.setattr( - "src.utils.get_minio_resource", + "models.utils.get_minio_resource", Mock(return_value=save_object_minio), ) presigned_url = utils.generate_presigned_url( @@ -101,7 +101,7 @@ def test_generate_presigned_url( URL with correct key, expiration time and signature-generating algorithm. """ monkeypatch.setattr( - "src.utils.get_minio_resource", + "models.utils.get_minio_resource", Mock(return_value=moto_minio), ) presigned_url = utils.generate_presigned_url( @@ -116,7 +116,7 @@ def test_generate_presigned_url( @pytest.mark.skip("Fails in GitHub Actions for some reason") -@patch("src.utils.MINIO_PUBLIC_HOST", MINIO_HOST) +@patch("models.utils.MINIO_PUBLIC_HOST", MINIO_HOST) @pytest.mark.integration def test_expired_presigned_url(create_minio_bucket): """Tests that http_method actions for minio Object won't be applicable @@ -142,7 +142,7 @@ def test_generate_presigned_url_error(moto_minio, monkeypatch): of boto3 errors. """ monkeypatch.setattr( - "src.utils.get_minio_resource", + "models.utils.get_minio_resource", Mock(return_value=moto_minio), ) presigned_url = utils.generate_presigned_url( @@ -159,7 +159,7 @@ def test_put_object_via_presigned_url(moto_minio, monkeypatch): key = "test_file.json" test_data = {"file_id": 1} monkeypatch.setattr( - "src.utils.get_minio_resource", + "models.utils.get_minio_resource", Mock(return_value=moto_minio), ) presigned_url = utils.generate_presigned_url( @@ -521,7 +521,7 @@ def test_get_minio_object_wrong_tenant(monkeypatch, moto_minio) -> None: 'NoSuchTenant' exception with 'Bucket for tenant does not exist' message. """ monkeypatch.setattr( - "src.utils.boto3.resource", + "models.utils.boto3.resource", Mock(return_value=moto_minio), ) wrong_tenant = "wrong_tenant" @@ -539,7 +539,7 @@ def test_get_minio_object_wrong_key(monkeypatch, save_object_minio) -> None: 'The specified key does not exist' in error message. """ monkeypatch.setattr( - "src.utils.boto3.resource", + "models.utils.boto3.resource", Mock(return_value=save_object_minio), ) with pytest.raises(ClientError, match=r"The specified key does not exist"): @@ -553,7 +553,7 @@ def test_get_minio_object(monkeypatch, save_object_minio) -> None: """Tests that get_minio_object returns correct object with actual size.""" expected_obj = json.dumps({"file_id": 1}) monkeypatch.setattr( - "src.utils.boto3.resource", + "models.utils.boto3.resource", Mock(return_value=save_object_minio), ) data, size = utils.get_minio_object(TEST_TENANT, "file_1.json") diff --git a/models/tests/utils.py b/models/tests/utils.py index 63aebe023..f343db122 100644 --- a/models/tests/utils.py +++ b/models/tests/utils.py @@ -2,7 +2,7 @@ from typing import List, Union from uuid import UUID -from src.db import StatusEnum +from models.db import StatusEnum def create_expected_models( From 5d4d12a500a6be82163120084cfbff721e1fa284 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 13:24:05 +0400 Subject: [PATCH 05/22] refactor: pipelines app name --- pipelines/Dockerfile | 4 +- pipelines/alembic/env.py | 6 +- ...3c56436d0_change_task_webhook_to_string.py | 2 +- ...5e65cf34b_fix_default_type_to_inference.py | 2 +- ..._add_original_pipeline_id_and_is_latest.py | 2 +- ...aebbddd8_change_pipeline_version_to_int.py | 2 +- pipelines/{src => pipelines}/__init__.py | 0 pipelines/{src => pipelines}/app.py | 14 +-- pipelines/{src => pipelines}/config.py | 0 pipelines/pipelines/db/__init__.py | 1 + pipelines/{src => pipelines}/db/logger.py | 8 +- pipelines/{src => pipelines}/db/models.py | 0 pipelines/{src => pipelines}/db/service.py | 4 +- pipelines/{src => pipelines}/execution.py | 8 +- pipelines/{src => pipelines}/http_utils.py | 2 +- pipelines/{src => pipelines}/kafka_utils.py | 2 +- pipelines/{src => pipelines}/log.py | 2 +- .../{src => pipelines}/pipeline_runner.py | 4 +- .../{src => pipelines}/result_processing.py | 2 +- pipelines/{src => pipelines}/s3.py | 112 +++++++++--------- pipelines/{src => pipelines}/schemas.py | 4 +- pipelines/{src => pipelines}/service_token.py | 4 +- pipelines/{src => pipelines}/webhooks.py | 4 +- pipelines/setup.py | 6 +- pipelines/src/db/__init__.py | 1 - pipelines/tests/conftest.py | 20 ++-- pipelines/tests/db/test_logger.py | 10 +- pipelines/tests/db/test_models.py | 2 +- pipelines/tests/db/test_service.py | 10 +- pipelines/tests/test_app.py | 12 +- pipelines/tests/test_execution.py | 34 +++--- pipelines/tests/test_http_utils.py | 4 +- pipelines/tests/test_pipeline_runner.py | 26 ++-- pipelines/tests/test_result_processing.py | 64 +++++----- pipelines/tests/test_s3.py | 4 +- pipelines/tests/test_schemas.py | 6 +- pipelines/tests/test_webhooks.py | 8 +- pipelines/tests/testing_data.py | 6 +- 38 files changed, 201 insertions(+), 201 deletions(-) rename pipelines/{src => pipelines}/__init__.py (100%) rename pipelines/{src => pipelines}/app.py (97%) rename pipelines/{src => pipelines}/config.py (100%) create mode 100644 pipelines/pipelines/db/__init__.py rename pipelines/{src => pipelines}/db/logger.py (93%) rename pipelines/{src => pipelines}/db/models.py (100%) rename pipelines/{src => pipelines}/db/service.py (99%) rename pipelines/{src => pipelines}/execution.py (99%) rename pipelines/{src => pipelines}/http_utils.py (98%) rename pipelines/{src => pipelines}/kafka_utils.py (98%) rename pipelines/{src => pipelines}/log.py (97%) rename pipelines/{src => pipelines}/pipeline_runner.py (96%) rename pipelines/{src => pipelines}/result_processing.py (99%) rename pipelines/{src => pipelines}/s3.py (95%) rename pipelines/{src => pipelines}/schemas.py (99%) rename pipelines/{src => pipelines}/service_token.py (93%) rename pipelines/{src => pipelines}/webhooks.py (95%) delete mode 100644 pipelines/src/db/__init__.py diff --git a/pipelines/Dockerfile b/pipelines/Dockerfile index 32f6881e9..c831dd29e 100644 --- a/pipelines/Dockerfile +++ b/pipelines/Dockerfile @@ -3,7 +3,7 @@ FROM ${base_image} as build WORKDIR /opt/pipeline_executor -COPY src src +COPY pipelines src COPY alembic alembic COPY alembic.ini wait-for-it.sh .env requirements.txt version.txt setup.py ./ @@ -21,7 +21,7 @@ RUN pip install --no-cache-dir -r requirements_dev.txt && python3 -m pip install FROM sonarsource/sonar-scanner-cli:4.6 AS sonar -COPY src /sonar/src +COPY pipelines /sonar/src COPY alembic /sonar/alembic COPY tests /sonar/tests COPY sonar-project.properties /sonar/sonar-project.properties diff --git a/pipelines/alembic/env.py b/pipelines/alembic/env.py index 23995c1ca..3754b3c1c 100644 --- a/pipelines/alembic/env.py +++ b/pipelines/alembic/env.py @@ -3,10 +3,10 @@ from sqlalchemy import engine_from_config, pool -import src.config as settings +import pipelines.config as settings from alembic import context -from src.db.models import Base -from src.db.service import get_test_db_url +from pipelines.db.models import Base +from pipelines.db.service import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/pipelines/alembic/versions/0a53c56436d0_change_task_webhook_to_string.py b/pipelines/alembic/versions/0a53c56436d0_change_task_webhook_to_string.py index 6dd3ebe5d..9a472edb7 100644 --- a/pipelines/alembic/versions/0a53c56436d0_change_task_webhook_to_string.py +++ b/pipelines/alembic/versions/0a53c56436d0_change_task_webhook_to_string.py @@ -9,7 +9,7 @@ from sqlalchemy import orm from alembic import op -from src.db import models +from pipelines.db import models # revision identifiers, used by Alembic. revision = "0a53c56436d0" diff --git a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py index a288848c0..6a1df2714 100644 --- a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py +++ b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py @@ -9,7 +9,7 @@ from sqlalchemy import orm from alembic import op -from src.db import models +from pipelines.db import models # revision identifiers, used by Alembic. revision = "0ab5e65cf34b" diff --git a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py index 63acf819a..46a937c4c 100644 --- a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py +++ b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py @@ -9,7 +9,7 @@ from sqlalchemy import orm from alembic import op -from src.db import models +from pipelines.db import models # revision identifiers, used by Alembic. revision = "764961499e2b" diff --git a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py index fbc421ff4..cf22b1af8 100644 --- a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py +++ b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py @@ -9,7 +9,7 @@ from sqlalchemy import orm from alembic import op -from src.db import models +from pipelines.db import models # revision identifiers, used by Alembic. revision = "b0cbaebbddd8" diff --git a/pipelines/src/__init__.py b/pipelines/pipelines/__init__.py similarity index 100% rename from pipelines/src/__init__.py rename to pipelines/pipelines/__init__.py diff --git a/pipelines/src/app.py b/pipelines/pipelines/app.py similarity index 97% rename from pipelines/src/app.py rename to pipelines/pipelines/app.py index 2124c4954..5d78b239e 100644 --- a/pipelines/src/app.py +++ b/pipelines/pipelines/app.py @@ -8,13 +8,13 @@ from sqlalchemy_filters.exceptions import BadFilterFormat from tenant_dependency import TenantData, get_tenant_info -import src.config as config -import src.db.models as dbm -import src.db.service as service -import src.execution as execution -import src.schemas as schemas -from src.kafka_utils import Kafka -from src.pipeline_runner import run_pipeline +import pipelines.config as config +import pipelines.db.models as dbm +import pipelines.db.service as service +import pipelines.execution as execution +import pipelines.schemas as schemas +from pipelines.kafka_utils import Kafka +from pipelines.pipeline_runner import run_pipeline TOKEN = get_tenant_info(url=config.KEYCLOAK_URI, algorithm="RS256") diff --git a/pipelines/src/config.py b/pipelines/pipelines/config.py similarity index 100% rename from pipelines/src/config.py rename to pipelines/pipelines/config.py diff --git a/pipelines/pipelines/db/__init__.py b/pipelines/pipelines/db/__init__.py new file mode 100644 index 000000000..c0069eb45 --- /dev/null +++ b/pipelines/pipelines/db/__init__.py @@ -0,0 +1 @@ +import pipelines.db.logger # noqa: F401 diff --git a/pipelines/src/db/logger.py b/pipelines/pipelines/db/logger.py similarity index 93% rename from pipelines/src/db/logger.py rename to pipelines/pipelines/db/logger.py index c9de9073a..ec52b1085 100644 --- a/pipelines/src/db/logger.py +++ b/pipelines/pipelines/db/logger.py @@ -4,10 +4,10 @@ from sqlalchemy.engine import Connection from sqlalchemy.orm import Mapper -import src.db.models as models -import src.db.service as service -import src.pipeline_runner as runner -import src.schemas as schemas +import pipelines.db.models as models +import pipelines.db.service as service +import pipelines.pipeline_runner as runner +import pipelines.schemas as schemas def create_log(event_type: str, entity: models.Table) -> schemas.Log: diff --git a/pipelines/src/db/models.py b/pipelines/pipelines/db/models.py similarity index 100% rename from pipelines/src/db/models.py rename to pipelines/pipelines/db/models.py diff --git a/pipelines/src/db/service.py b/pipelines/pipelines/db/service.py similarity index 99% rename from pipelines/src/db/service.py rename to pipelines/pipelines/db/service.py index bbbf699ef..8a07d0512 100644 --- a/pipelines/src/db/service.py +++ b/pipelines/pipelines/db/service.py @@ -7,8 +7,8 @@ from sqlalchemy import create_engine from sqlalchemy.orm import Session, sessionmaker -import src.db.models as dbm -from src import config, execution, log, schemas +import pipelines.db.models as dbm +from pipelines import config, execution, log, schemas logger = log.get_logger(__file__) diff --git a/pipelines/src/execution.py b/pipelines/pipelines/execution.py similarity index 99% rename from pipelines/src/execution.py rename to pipelines/pipelines/execution.py index d659e1bfc..390a99333 100644 --- a/pipelines/src/execution.py +++ b/pipelines/pipelines/execution.py @@ -14,10 +14,10 @@ from pydantic import BaseModel, Field from sqlalchemy import orm -import src.db.models as dbm -import src.db.service as service -import src.result_processing as postprocessing -from src import config, http_utils, log, s3, schemas, service_token, webhooks +import pipelines.db.models as dbm +import pipelines.db.service as service +import pipelines.result_processing as postprocessing +from pipelines import config, http_utils, log, s3, schemas, service_token, webhooks logger = log.get_logger(__file__) minio_client = s3.get_minio_client() diff --git a/pipelines/src/http_utils.py b/pipelines/pipelines/http_utils.py similarity index 98% rename from pipelines/src/http_utils.py rename to pipelines/pipelines/http_utils.py index b92ce7f68..8ee693a20 100644 --- a/pipelines/src/http_utils.py +++ b/pipelines/pipelines/http_utils.py @@ -3,7 +3,7 @@ import requests -from src import config, log, schemas, service_token +from pipelines import config, log, schemas, service_token logger = log.get_logger(__file__) diff --git a/pipelines/src/kafka_utils.py b/pipelines/pipelines/kafka_utils.py similarity index 98% rename from pipelines/src/kafka_utils.py rename to pipelines/pipelines/kafka_utils.py index d5f0f01ce..44cc7f2f3 100644 --- a/pipelines/src/kafka_utils.py +++ b/pipelines/pipelines/kafka_utils.py @@ -4,7 +4,7 @@ import aiokafka from kafka import admin, errors -from src import config, log +from pipelines import config, log logger = log.get_logger(__name__) diff --git a/pipelines/src/log.py b/pipelines/pipelines/log.py similarity index 97% rename from pipelines/src/log.py rename to pipelines/pipelines/log.py index b9bd62ecf..63d4d78f9 100644 --- a/pipelines/src/log.py +++ b/pipelines/pipelines/log.py @@ -1,7 +1,7 @@ import logging from typing import Any, Dict, Optional -from src.config import LOG_LEVEL +from pipelines.config import LOG_LEVEL _log_format = ( "%(asctime)s - [%(levelname)s] - %(name)s - " diff --git a/pipelines/src/pipeline_runner.py b/pipelines/pipelines/pipeline_runner.py similarity index 96% rename from pipelines/src/pipeline_runner.py rename to pipelines/pipelines/pipeline_runner.py index 51ead4eed..432ae70f0 100644 --- a/pipelines/src/pipeline_runner.py +++ b/pipelines/pipelines/pipeline_runner.py @@ -5,8 +5,8 @@ import aiokafka from aiokafka import AIOKafkaConsumer, AIOKafkaProducer -from src import execution, schemas -from src.log import get_logger +from pipelines import execution, schemas +from pipelines.log import get_logger logger = get_logger(__file__) diff --git a/pipelines/src/result_processing.py b/pipelines/pipelines/result_processing.py similarity index 99% rename from pipelines/src/result_processing.py rename to pipelines/pipelines/result_processing.py index cc468024b..e5cce42bb 100644 --- a/pipelines/src/result_processing.py +++ b/pipelines/pipelines/result_processing.py @@ -10,7 +10,7 @@ from minio import error as minioerr from pydantic import BaseModel, ValidationError -from src import config, http_utils, log +from pipelines import config, http_utils, log logger = log.get_logger(__file__) diff --git a/pipelines/src/s3.py b/pipelines/pipelines/s3.py similarity index 95% rename from pipelines/src/s3.py rename to pipelines/pipelines/s3.py index b86994b2c..bd39a4df3 100644 --- a/pipelines/src/s3.py +++ b/pipelines/pipelines/s3.py @@ -1,56 +1,56 @@ -import enum -from typing import Any, Dict, Optional - -from minio import Minio, credentials - -from src import config, log - -logger = log.get_logger(__file__) - - -class S3Providers(str, enum.Enum): - MINIO = "minio" - AWS_IAM = "aws_iam" - AWS_ENV = "aws_env" - AWS_CONF = "aws_config" - - -def get_minio_config( - s3_provider: S3Providers, - endpoint: Optional[str], - access_key: Optional[str], - secret_key: Optional[str], - **kwargs: Optional[str], -) -> Dict[str, Any]: - minio_config = {"endpoint": endpoint, "secure": False} - if s3_provider == S3Providers.MINIO: - minio_config["access_key"] = access_key - minio_config["secret_key"] = secret_key - elif s3_provider == S3Providers.AWS_IAM: - minio_config["credentials"] = credentials.IamAwsProvider() - elif s3_provider == S3Providers.AWS_ENV: - minio_config["credentials"] = credentials.EnvAWSProvider() - elif s3_provider == S3Providers.AWS_CONF: - minio_config["credentials"] = credentials.AWSConfigProvider( - profile=kwargs.get("aws_profile") - ) - return minio_config - - -def get_minio_client() -> Minio: - """Return Minio client if URI is provided via config.py.""" - s3_provider = S3Providers(config.S3_CREDENTIALS_PROVIDER) - logger.debug("S3_CREDENTIALS_PROVIDER is set to %s", s3_provider) - minio_config = get_minio_config( - s3_provider=s3_provider, - endpoint=config.S3_ENDPOINT, - access_key=config.S3_ACCESS_KEY, - secret_key=config.S3_SECRET_KEY, - aws_profile=config.AWS_PROFILE, - ) - return Minio(**minio_config) - - -def tenant_from_bucket(bucket: str) -> str: - prefix = f"{config.S3_PREFIX}-" if config.S3_PREFIX else "" - return bucket.replace(prefix, "", 1) +import enum +from typing import Any, Dict, Optional + +from minio import Minio, credentials + +from pipelines import config, log + +logger = log.get_logger(__file__) + + +class S3Providers(str, enum.Enum): + MINIO = "minio" + AWS_IAM = "aws_iam" + AWS_ENV = "aws_env" + AWS_CONF = "aws_config" + + +def get_minio_config( + s3_provider: S3Providers, + endpoint: Optional[str], + access_key: Optional[str], + secret_key: Optional[str], + **kwargs: Optional[str], +) -> Dict[str, Any]: + minio_config = {"endpoint": endpoint, "secure": False} + if s3_provider == S3Providers.MINIO: + minio_config["access_key"] = access_key + minio_config["secret_key"] = secret_key + elif s3_provider == S3Providers.AWS_IAM: + minio_config["credentials"] = credentials.IamAwsProvider() + elif s3_provider == S3Providers.AWS_ENV: + minio_config["credentials"] = credentials.EnvAWSProvider() + elif s3_provider == S3Providers.AWS_CONF: + minio_config["credentials"] = credentials.AWSConfigProvider( + profile=kwargs.get("aws_profile") + ) + return minio_config + + +def get_minio_client() -> Minio: + """Return Minio client if URI is provided via config.py.""" + s3_provider = S3Providers(config.S3_CREDENTIALS_PROVIDER) + logger.debug("S3_CREDENTIALS_PROVIDER is set to %s", s3_provider) + minio_config = get_minio_config( + s3_provider=s3_provider, + endpoint=config.S3_ENDPOINT, + access_key=config.S3_ACCESS_KEY, + secret_key=config.S3_SECRET_KEY, + aws_profile=config.AWS_PROFILE, + ) + return Minio(**minio_config) + + +def tenant_from_bucket(bucket: str) -> str: + prefix = f"{config.S3_PREFIX}-" if config.S3_PREFIX else "" + return bucket.replace(prefix, "", 1) diff --git a/pipelines/src/schemas.py b/pipelines/pipelines/schemas.py similarity index 99% rename from pipelines/src/schemas.py rename to pipelines/pipelines/schemas.py index 2b750026d..9609276ed 100644 --- a/pipelines/src/schemas.py +++ b/pipelines/pipelines/schemas.py @@ -7,8 +7,8 @@ from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator -import src.db.models as dbm -from src import log +import pipelines.db.models as dbm +from pipelines import log logger = log.get_logger(__file__) diff --git a/pipelines/src/service_token.py b/pipelines/pipelines/service_token.py similarity index 93% rename from pipelines/src/service_token.py rename to pipelines/pipelines/service_token.py index 7d8732ddf..8faaebe88 100644 --- a/pipelines/src/service_token.py +++ b/pipelines/pipelines/service_token.py @@ -1,8 +1,8 @@ import json from typing import Optional -from src import config, http_utils -from src.log import get_logger +from pipelines import config, http_utils +from pipelines.log import get_logger ACCESS_TOKEN = "access_token" diff --git a/pipelines/src/webhooks.py b/pipelines/pipelines/webhooks.py similarity index 95% rename from pipelines/src/webhooks.py rename to pipelines/pipelines/webhooks.py index ebf9b7644..2ac1aa094 100644 --- a/pipelines/src/webhooks.py +++ b/pipelines/pipelines/webhooks.py @@ -1,8 +1,8 @@ import urllib.parse from typing import Any, Dict, Optional, Tuple, Union -from src import http_utils, log, schemas, service_token -from src.db import service +from pipelines import http_utils, log, schemas, service_token +from pipelines.db import service logger = log.get_logger(__file__) diff --git a/pipelines/setup.py b/pipelines/setup.py index bf3ede6e9..286f9641c 100644 --- a/pipelines/setup.py +++ b/pipelines/setup.py @@ -3,7 +3,7 @@ setuptools.setup( name="pipelines", version="0.1.0", - packages=["src"], - package_dir={"pipelines": "src"}, - entry_points={"console_scripts": ["executor = src.cli:execute"]}, + packages=["pipelines"], + package_dir={"pipelines": "pipelines"}, + entry_points={"console_scripts": ["executor = pipelines.cli:execute"]}, ) diff --git a/pipelines/src/db/__init__.py b/pipelines/src/db/__init__.py deleted file mode 100644 index 268a5c44e..000000000 --- a/pipelines/src/db/__init__.py +++ /dev/null @@ -1 +0,0 @@ -import src.db.logger # noqa: F401 diff --git a/pipelines/tests/conftest.py b/pipelines/tests/conftest.py index 71a5b7af8..663ae2db3 100644 --- a/pipelines/tests/conftest.py +++ b/pipelines/tests/conftest.py @@ -9,14 +9,14 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy_utils import create_database, database_exists, drop_database -import src.app as app -import src.db.models as dbm -import src.db.service as service -import src.execution as execution +import pipelines.app as app +import pipelines.db.models as dbm +import pipelines.db.service as service +import pipelines.execution as execution import tests.testing_data as td from alembic import command from alembic.config import Config -from src.config import DB_URI +from pipelines.config import DB_URI test_db_url = service.get_test_db_url(DB_URI) alembic_cfg = Config("alembic.ini") @@ -72,7 +72,7 @@ def setup_token(): def testing_app(testing_engine, testing_session, setup_token): session = sessionmaker(bind=testing_engine) app.app.dependency_overrides[app.TOKEN] = lambda: setup_token - with patch("src.db.service.LocalSession", session): + with patch("pipelines.db.service.LocalSession", session): app.app.dependency_overrides[ service.get_session ] = lambda: testing_session @@ -102,19 +102,19 @@ def testing_task(testing_pipeline): @pytest.fixture def session_mock(): - with patch("src.db.service.LocalSession") as mock: + with patch("pipelines.db.service.LocalSession") as mock: yield mock @pytest.fixture def request_mock(): - with patch("src.http_utils.requests.request") as mock: + with patch("pipelines.http_utils.requests.request") as mock: yield mock @pytest.fixture def run_in_session_mock(): - with patch("src.db.service.run_in_session") as mock: + with patch("pipelines.db.service.run_in_session") as mock: yield mock @@ -124,7 +124,7 @@ async def check_preprocessing_status_mock(x, y): return True with patch( - "src.execution.PipelineTask.check_preprocessing_status", + "pipelines.execution.PipelineTask.check_preprocessing_status", check_preprocessing_status_mock, ) as mock: yield mock diff --git a/pipelines/tests/db/test_logger.py b/pipelines/tests/db/test_logger.py index 1ae092228..0ffead07a 100644 --- a/pipelines/tests/db/test_logger.py +++ b/pipelines/tests/db/test_logger.py @@ -1,12 +1,12 @@ -"""Testing src/db/logger.py.""" +"""Testing pipelines/db/logger.py.""" import uuid import sqlalchemy.event -import src.db.logger as logger -import src.db.models as models -import src.schemas as schemas -import src.pipeline_runner as runner +import pipelines.db.logger as logger +import pipelines.db.models as models +import pipelines.schemas as schemas +import pipelines.pipeline_runner as runner def test_create_log(testing_session): diff --git a/pipelines/tests/db/test_models.py b/pipelines/tests/db/test_models.py index 8e2a64a93..d7c82a532 100644 --- a/pipelines/tests/db/test_models.py +++ b/pipelines/tests/db/test_models.py @@ -1,4 +1,4 @@ -"""Testing src/db/models.py.""" +"""Testing pipelines/db/models.py.""" import tests.testing_data as td diff --git a/pipelines/tests/db/test_service.py b/pipelines/tests/db/test_service.py index 32c45c2b5..8793484d8 100644 --- a/pipelines/tests/db/test_service.py +++ b/pipelines/tests/db/test_service.py @@ -1,4 +1,4 @@ -"""Testing src/db/service.py.""" +"""Testing pipelines/db/service.py.""" import datetime import uuid @@ -9,10 +9,10 @@ from aiokafka import AIOKafkaProducer from freezegun import freeze_time -import src.db.models as dbm -import src.db.service as service -import src.execution as execution -import src.schemas as schemas +import pipelines.db.models as dbm +import pipelines.db.service as service +import pipelines.execution as execution +import pipelines.schemas as schemas import tests.testing_data as td pytest_plugins = ("pytest_asyncio",) diff --git a/pipelines/tests/test_app.py b/pipelines/tests/test_app.py index 6b0b508c6..28a8affbb 100644 --- a/pipelines/tests/test_app.py +++ b/pipelines/tests/test_app.py @@ -1,15 +1,15 @@ -"""Testing src/app.py.""" +"""Testing pipelines/app.py.""" from copy import deepcopy from typing import Dict import pytest -import src.app as app -import src.db.models as dbm -import src.db.service as service -import src.execution as execution -import src.schemas as schemas +import pipelines.app as app +import pipelines.db.models as dbm +import pipelines.db.service as service +import pipelines.execution as execution +import pipelines.schemas as schemas import tests.testing_data as td diff --git a/pipelines/tests/test_execution.py b/pipelines/tests/test_execution.py index 9d972bb79..3ade7dbad 100644 --- a/pipelines/tests/test_execution.py +++ b/pipelines/tests/test_execution.py @@ -1,4 +1,4 @@ -"""Testing src/execution.py.""" +"""Testing pipelines/execution.py.""" import logging from itertools import cycle from typing import Optional @@ -9,9 +9,9 @@ from fastapi import HTTPException from pydantic import BaseModel -import src.db.models as dbm -import src.execution as execution -import src.schemas as schemas +import pipelines.db.models as dbm +import pipelines.execution as execution +import pipelines.schemas as schemas import tests.testing_data as td LOGGER = logging.getLogger(__name__) @@ -34,9 +34,9 @@ def uuid_mock(): @patch( - "src.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock + "pipelines.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock ) -@patch("src.execution.ExecutionStep.step_execution") +@patch("pipelines.execution.ExecutionStep.step_execution") @pytest.mark.asyncio async def test_step_execution_with_logging( step_exec_mock, pipeline_step, run_in_session_mock, caplog @@ -63,9 +63,9 @@ async def test_step_execution_with_logging( "It passes when run separately, but fails when all tests are run." ) @patch( - "src.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock + "pipelines.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock ) -@patch("src.execution.ExecutionStep.send") +@patch("pipelines.execution.ExecutionStep.send") @pytest.mark.asyncio async def test_step_execution( mock_send, model_url, caplog, run_in_session_mock @@ -239,8 +239,8 @@ def test_adjust_pipeline(): "Test should be fixed - it 'blinks'. " "It passes when run separately, but fails when all tests are run." ) -@patch("src.execution.ExecutionStep.step_execution_with_logging") -@patch("src.execution.PipelineTask.send_status") +@patch("pipelines.execution.ExecutionStep.step_execution_with_logging") +@patch("pipelines.execution.PipelineTask.send_status") @pytest.mark.asyncio async def test_start_task( webhook_mock, @@ -284,11 +284,11 @@ async def test_start_task( # return True with patch( - "src.execution.PipelineTask.get_pipeline_type", + "pipelines.execution.PipelineTask.get_pipeline_type", lambda _: schemas.PipelineTypes.INFERENCE, ): # with patch( - # "src.execution.PipelineTask.check_preprocessing_status", + # "pipelines.execution.PipelineTask.check_preprocessing_status", # check_preprocessing_status_mock, # ): await task.start(AIOKafkaProducer) @@ -301,7 +301,7 @@ async def test_start_task( "Test should be fixed - it 'blinks'. " "It passes when run separately, but fails when all tests are run." ) -@patch("src.execution.ExecutionStep.step_execution_with_logging") +@patch("pipelines.execution.ExecutionStep.step_execution_with_logging") @pytest.mark.asyncio async def test_process_next_steps(exec_step, caplog): exec_step.return_value = None @@ -335,9 +335,9 @@ async def test_process_next_steps(exec_step, caplog): "steps": [received_step, child_step], } ) - with patch("src.execution.PipelineTask.get_by_id", lambda id_: task): + with patch("pipelines.execution.PipelineTask.get_by_id", lambda id_: task): with patch( - "src.execution.PipelineTask.get_pipeline_type", + "pipelines.execution.PipelineTask.get_pipeline_type", lambda _: schemas.PipelineTypes.INFERENCE, ): await received_step.process_next_steps(AIOKafkaProducer) @@ -349,7 +349,7 @@ async def test_process_next_steps(exec_step, caplog): "Test should be fixed - it 'blinks'. " "It passes when run separately, but fails when all tests are run." ) -@patch("src.execution.ExecutionStep.step_execution_with_logging") +@patch("pipelines.execution.ExecutionStep.step_execution_with_logging") @pytest.mark.asyncio async def test_process_next_staps_without_child_steps(exec_step, caplog): received_step = execution.ExecutionStep.parse_obj( @@ -383,7 +383,7 @@ async def test_process_next_staps_without_child_steps(exec_step, caplog): } ) - with patch("src.execution.PipelineTask.get_by_id", lambda id_: task): + with patch("pipelines.execution.PipelineTask.get_by_id", lambda id_: task): await received_step.process_next_steps(AIOKafkaProducer) assert caplog.messages[0].startswith("Step with id = 58 from task = 20") diff --git a/pipelines/tests/test_http_utils.py b/pipelines/tests/test_http_utils.py index 6eb261bcf..81ca2889a 100644 --- a/pipelines/tests/test_http_utils.py +++ b/pipelines/tests/test_http_utils.py @@ -3,7 +3,7 @@ import requests import pytest -from src import http_utils, schemas +from pipelines import http_utils, schemas def test_make_request(request_mock): @@ -26,7 +26,7 @@ def test_make_request(request_mock): def test_make_request_with_retry(s_effect, expected, call_count, request_mock): """Testing make_request_with_retry.""" with patch( - "src.http_utils.make_request", side_effect=s_effect + "pipelines.http_utils.make_request", side_effect=s_effect ) as req_mock: assert http_utils.make_request_with_retry("", {}, start=0) == expected assert req_mock.call_count == call_count diff --git a/pipelines/tests/test_pipeline_runner.py b/pipelines/tests/test_pipeline_runner.py index 21c72b0b6..9610f8f1e 100644 --- a/pipelines/tests/test_pipeline_runner.py +++ b/pipelines/tests/test_pipeline_runner.py @@ -1,4 +1,4 @@ -# """Testing src/pipeline_runner.py.""" +# """Testing pipelines/pipeline_runner.py.""" import logging from unittest.mock import patch @@ -6,8 +6,8 @@ from aiokafka import AIOKafkaProducer from pydantic import BaseModel -import src.execution as execution -import src.pipeline_runner as runner +import pipelines.execution as execution +import pipelines.pipeline_runner as runner LOGGER = logging.getLogger(__name__) @@ -84,9 +84,9 @@ def test_response_message_incorrect(caplog): ) -@patch("src.execution.PipelineTask.get_by_id") -@patch("src.execution.ExecutionStep.get_by_id") -@patch("src.execution.ExecutionStep.process_next_steps") +@patch("pipelines.execution.PipelineTask.get_by_id") +@patch("pipelines.execution.ExecutionStep.get_by_id") +@patch("pipelines.execution.ExecutionStep.process_next_steps") @pytest.mark.asyncio async def test_process_message_task_not_finished( process_next_steps, get_step, get_task, testing_app @@ -145,9 +145,9 @@ async def test_process_message_task_not_finished( assert process_next_steps.called -@patch("src.execution.PipelineTask.get_by_id") -@patch("src.execution.ExecutionStep.get_by_id") -@patch("src.execution.PipelineTask.finish") +@patch("pipelines.execution.PipelineTask.get_by_id") +@patch("pipelines.execution.ExecutionStep.get_by_id") +@patch("pipelines.execution.PipelineTask.finish") @pytest.mark.asyncio async def test_process_message_task_finished( finish_task, get_step, get_task, testing_app @@ -210,9 +210,9 @@ async def test_process_message_task_finished( "Test should be fixed - it 'blinks'. " "It passes when run separately, but fails when all tests are run." ) -@patch("src.execution.PipelineTask.get_by_id") -@patch("src.execution.ExecutionStep.get_by_id") -@patch("src.execution.PipelineTask.finish") +@patch("pipelines.execution.PipelineTask.get_by_id") +@patch("pipelines.execution.ExecutionStep.get_by_id") +@patch("pipelines.execution.PipelineTask.finish") @pytest.mark.asyncio async def test_process_message_task_failed( finish_task, get_step, get_task, testing_app, caplog @@ -267,7 +267,7 @@ async def test_process_message_task_failed( "Test should be fixed - it 'blinks'. " "It passes when run separately, but fails when all tests are run." ) -@patch("src.pipeline_runner.process_message") +@patch("pipelines.pipeline_runner.process_message") @pytest.mark.asyncio async def test_run_pipeline(process_message, caplog): message_1 = KafkaMessage.parse_obj( diff --git a/pipelines/tests/test_result_processing.py b/pipelines/tests/test_result_processing.py index 8f1c5c10b..d052c0a6b 100644 --- a/pipelines/tests/test_result_processing.py +++ b/pipelines/tests/test_result_processing.py @@ -1,10 +1,10 @@ -"""Testing src/result_processing.py.""" +"""Testing pipelines/result_processing.py.""" from unittest.mock import MagicMock, patch import pytest from minio import S3Error -import src.result_processing as processing +import pipelines.result_processing as processing def test_merge_outputs(): @@ -185,7 +185,7 @@ def test_merge_geometry_objects_no_objects_provided(): ) def test_get_annotation_uri(job_id, file_id, expected): """Testing get_annotation_uri.""" - with patch("src.result_processing.config.ANNOTATION_URI", "foobar/ann"): + with patch("pipelines.result_processing.config.ANNOTATION_URI", "foobar/ann"): assert processing.get_annotation_uri(job_id, file_id) == expected @@ -232,7 +232,7 @@ def test_get_pipeline_leaves_data(): def test_get_pipeline_leaves_data_minio_error(): """Testing get_pipeline_leaves_data when S3Error occurred.""" err = S3Error("", "", "", "", "", "") - with patch("src.result_processing.list_object_names", side_effect=err): + with patch("pipelines.result_processing.list_object_names", side_effect=err): res = processing.get_pipeline_leaves_data(MagicMock(), "", "") assert res is None @@ -249,7 +249,7 @@ def test_merge_pipeline_leaves_data(): b'{"id": 3, "bbox": [3, 3, 3, 3], "category": "some"}]}]}', ] with patch( - "src.result_processing.get_pipeline_leaves_data", + "pipelines.result_processing.get_pipeline_leaves_data", return_value=leaves_data, ): res = processing.merge_pipeline_leaves_data(MagicMock(), "", "") @@ -295,7 +295,7 @@ def test_merge_pipeline_leaves_data(): def test_merge_pipeline_leaves_data_no_files_data(): """Testing merge_pipeline_leaves_data when there's no files data.""" with patch( - "src.result_processing.get_pipeline_leaves_data", return_value=None + "pipelines.result_processing.get_pipeline_leaves_data", return_value=None ): assert ( processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None @@ -305,9 +305,9 @@ def test_merge_pipeline_leaves_data_no_files_data(): def test_merge_pipeline_leaves_data_cannot_parse_data(): """Testing merge_pipeline_leaves_data when raw data cannot be parsed.""" with patch( - "src.result_processing.ModelOutput.parse_models", return_value=None + "pipelines.result_processing.ModelOutput.parse_models", return_value=None ): - with patch("src.result_processing.get_pipeline_leaves_data"): + with patch("pipelines.result_processing.get_pipeline_leaves_data"): assert ( processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None @@ -316,8 +316,8 @@ def test_merge_pipeline_leaves_data_cannot_parse_data(): def test_merge_pipeline_leaves_data_cannot_merge_data(): """Testing merge_pipeline_leaves_data when data cannot be merged.""" - with patch("src.result_processing.get_pipeline_leaves_data"): - with patch("src.result_processing.ModelOutput.parse_models"): + with patch("pipelines.result_processing.get_pipeline_leaves_data"): + with patch("pipelines.result_processing.ModelOutput.parse_models"): assert ( processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None @@ -327,7 +327,7 @@ def test_merge_pipeline_leaves_data_cannot_merge_data(): def test_delete_objects(): """Testing delete_objects.""" with patch( - "src.result_processing.list_object_names", return_value=["f", "b"] + "pipelines.result_processing.list_object_names", return_value=["f", "b"] ): client_mock = MagicMock() assert processing.delete_objects(client_mock, "bucket", "") @@ -340,7 +340,7 @@ def test_delete_objects(): def test_delete_objects_minio_error(): """Testing delete_objects when S3Error occurred.""" err = S3Error("", "", "", "", "", "") - with patch("src.result_processing.list_object_names", side_effect=err): + with patch("pipelines.result_processing.list_object_names", side_effect=err): assert not processing.delete_objects(MagicMock(), "bucket", "") @@ -349,11 +349,11 @@ def test_postprocess_result(): m = MagicMock() m.content = b'{"foo": 42}' with patch( - "src.result_processing.http_utils.make_request_with_retry", + "pipelines.result_processing.http_utils.make_request_with_retry", return_value=m, ) as req_mock: with patch( - "src.result_processing.config.POSTPROCESSING_URI", "foo.com" + "pipelines.result_processing.config.POSTPROCESSING_URI", "foo.com" ): res = processing.postprocess_result({"foo": 1}) assert res == {"foo": 42} @@ -364,7 +364,7 @@ def test_postprocess_result(): def test_postprocess_result_no_uri(): """Testing postprocess_result when there's no uri.""" - with patch("src.result_processing.config.POSTPROCESSING_URI", ""): + with patch("pipelines.result_processing.config.POSTPROCESSING_URI", ""): assert processing.postprocess_result({"a": 1}) is None @@ -373,7 +373,7 @@ def test_postprocess_result_invalid_postprocessor_json_response(): m = MagicMock m.content = b'{"asd":}' with patch( - "src.result_processing.http_utils.make_request_with_retry", + "pipelines.result_processing.http_utils.make_request_with_retry", return_value=m, ): assert processing.postprocess_result({"a": 1}) is None @@ -382,9 +382,9 @@ def test_postprocess_result_invalid_postprocessor_json_response(): @pytest.mark.skip("Test should be fixed - del_mock.assert_called_once() fails") def test_manage_result_for_annotator(): """Testing manage_result_for_annotator.""" - with patch("src.result_processing.merge_pipeline_leaves_data"): + with patch("pipelines.result_processing.merge_pipeline_leaves_data"): with patch( - "src.result_processing.postprocess_result", + "pipelines.result_processing.postprocess_result", return_value={ "file": "", "bucket": "", @@ -392,12 +392,12 @@ def test_manage_result_for_annotator(): }, ): with patch( - "src.result_processing.http_utils.make_request_with_retry" + "pipelines.result_processing.http_utils.make_request_with_retry" ) as req_mock: - with patch("src.result_processing.delete_objects") as del_mock: - with patch("src.config.DEBUG_MERGE", False): + with patch("pipelines.result_processing.delete_objects") as del_mock: + with patch("pipelines.config.DEBUG_MERGE", False): with patch( - "src.result_processing.config.ANNOTATION_URI", + "pipelines.result_processing.config.ANNOTATION_URI", "f.com/annotation", ): assert processing.manage_result_for_annotator( @@ -417,7 +417,7 @@ def test_manage_result_for_annotator(): def test_manage_result_for_annotator_no_annotator_uri(): """Testing manage_result_for_annotator when there's no Annotator URI.""" - with patch("src.result_processing.config.ANNOTATION_URI", ""): + with patch("pipelines.result_processing.config.ANNOTATION_URI", ""): assert not processing.manage_result_for_annotator( "", "", "", 0, "", "", "", 8, MagicMock(), "" ) @@ -426,7 +426,7 @@ def test_manage_result_for_annotator_no_annotator_uri(): def test_manage_result_for_annotator_cannot_merge_data(): """Testing manage_result_for_annotator when data cannot be merger.""" with patch( - "src.result_processing.merge_pipeline_leaves_data", return_value=None + "pipelines.result_processing.merge_pipeline_leaves_data", return_value=None ): assert not processing.manage_result_for_annotator( "", "", "", 0, "", "", "", 8, MagicMock(), "" @@ -435,10 +435,10 @@ def test_manage_result_for_annotator_cannot_merge_data(): def test_manage_result_for_annotator_request_not_succeeded(): """Testing manage_result_for_annotator when cannot connect to Annotator.""" - with patch("src.result_processing.merge_pipeline_leaves_data"): - with patch("src.result_processing.postprocess_result"): + with patch("pipelines.result_processing.merge_pipeline_leaves_data"): + with patch("pipelines.result_processing.postprocess_result"): with patch( - "src.result_processing.http_utils.make_request_with_retry", + "pipelines.result_processing.http_utils.make_request_with_retry", return_value=None, ): assert not processing.manage_result_for_annotator( @@ -448,14 +448,14 @@ def test_manage_result_for_annotator_request_not_succeeded(): def test_manage_result_for_annotator_request_debug_merge(): """Debug merge is True and data are not deleted.""" - with patch("src.result_processing.merge_pipeline_leaves_data"): - with patch("src.result_processing.postprocess_result"): + with patch("pipelines.result_processing.merge_pipeline_leaves_data"): + with patch("pipelines.result_processing.postprocess_result"): with patch( - "src.result_processing.http_utils.make_request_with_retry" + "pipelines.result_processing.http_utils.make_request_with_retry" ): - with patch("src.result_processing.config.DEBUG_MERGE", True): + with patch("pipelines.result_processing.config.DEBUG_MERGE", True): with patch( - "src.result_processing.delete_objects" + "pipelines.result_processing.delete_objects" ) as del_mock: assert processing.manage_result_for_annotator( "", "", "", 0, "", "", "", 8, MagicMock(), "" diff --git a/pipelines/tests/test_s3.py b/pipelines/tests/test_s3.py index 939029dfc..ac481fbeb 100644 --- a/pipelines/tests/test_s3.py +++ b/pipelines/tests/test_s3.py @@ -3,7 +3,7 @@ import minio import pytest -from src import s3 +from pipelines import s3 def test_get_minio_client(): @@ -21,5 +21,5 @@ def test_get_minio_client(): ), ) def test_tenant_from_bucket(prefix: str, bucket: str, expected: str) -> None: - with patch("src.config.S3_PREFIX", prefix): + with patch("pipelines.config.S3_PREFIX", prefix): assert s3.tenant_from_bucket(bucket) == expected diff --git a/pipelines/tests/test_schemas.py b/pipelines/tests/test_schemas.py index aa9931f9b..df147e83f 100644 --- a/pipelines/tests/test_schemas.py +++ b/pipelines/tests/test_schemas.py @@ -1,9 +1,9 @@ -"""Testing src/schemas.py.""" +"""Testing pipelines/schemas.py.""" import pytest -import src.db.models as dbm -import src.schemas as schemas +import pipelines.db.models as dbm +import pipelines.schemas as schemas import tests.testing_data as td diff --git a/pipelines/tests/test_webhooks.py b/pipelines/tests/test_webhooks.py index 12c98f439..f14b0bbee 100644 --- a/pipelines/tests/test_webhooks.py +++ b/pipelines/tests/test_webhooks.py @@ -1,9 +1,9 @@ -"""Testing src/webhooks.py.""" +"""Testing pipelines/webhooks.py.""" from unittest.mock import patch -import src.schemas as schemas -import src.webhooks as webhooks +import pipelines.schemas as schemas +import pipelines.webhooks as webhooks def test_create_inference_url_and_body(): @@ -12,7 +12,7 @@ def test_create_inference_url_and_body(): task_status = schemas.Status.RUN status = schemas.JobStatus.RUN with patch( - "src.webhooks.service.get_job_status_if_changed", return_value=status + "pipelines.webhooks.service.get_job_status_if_changed", return_value=status ): url, body = webhooks.create_inference_url_and_body( webhook=webhook, job_id=job_id, task_status=task_status diff --git a/pipelines/tests/testing_data.py b/pipelines/tests/testing_data.py index 021b5f6a8..4dc4b273b 100644 --- a/pipelines/tests/testing_data.py +++ b/pipelines/tests/testing_data.py @@ -1,8 +1,8 @@ import json -import src.db.models as dbm -import src.execution as execution -import src.schemas as schemas +import pipelines.db.models as dbm +import pipelines.execution as execution +import pipelines.schemas as schemas steps_dict = { "model": "bar", From 55faaa6644603a13abafc50a5dfb019f62e5d6f2 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 13:34:36 +0400 Subject: [PATCH 06/22] refactor: users, taxonomy, procesing apps renamed --- processing/Dockerfile | 2 +- processing/alembic/env.py | 6 +-- processing/{src => processing}/__init__.py | 0 processing/{src => processing}/config.py | 0 processing/processing/db/__init__.py | 2 + processing/{src => processing}/db/models.py | 2 +- processing/{src => processing}/db/service.py | 4 +- .../health_check_easy_ocr.py | 6 +-- processing/{src => processing}/main.py | 18 +++---- processing/{src => processing}/schema.py | 0 .../send_preprocess_results.py | 4 +- processing/{src => processing}/tasks.py | 10 ++-- processing/{src => processing}/text_merge.py | 10 ++-- .../LICENSE-2.0_box_utils.txt | 0 .../third_party_code/__init__.py | 0 .../third_party_code/box_util.py | 0 .../third_party_code/table.py | 0 .../{src => processing}/utils/__init__.py | 0 .../utils/aiohttp_utils.py | 4 +- .../{src => processing}/utils/logger.py | 2 +- .../{src => processing}/utils/minio_utils.py | 4 +- processing/{src => processing}/utils/utils.py | 8 +-- processing/src/db/__init__.py | 2 - processing/tests/conftest.py | 4 +- processing/tests/test_assets_status.py | 6 +-- processing/tests/test_text_merge.py | 6 +-- processing/tests/test_utils/test_utils.py | 4 +- taxonomy/Dockerfile | 2 +- taxonomy/alembic/env.py | 4 +- taxonomy/documentation/update_docs.py | 2 +- taxonomy/{app => taxonomy}/__init__.py | 0 taxonomy/{app => taxonomy}/database.py | 0 taxonomy/{app => taxonomy}/errors.py | 0 taxonomy/{app => taxonomy}/filters.py | 2 +- taxonomy/{app => taxonomy}/logging_setup.py | 0 taxonomy/{app => taxonomy}/main.py | 10 ++-- .../microservice_communication/__init__.py | 0 .../microservice_communication/search.py | 0 taxonomy/{app => taxonomy}/models.py | 4 +- .../{app => taxonomy}/schemas/__init__.py | 6 +-- taxonomy/{app => taxonomy}/schemas/errors.py | 0 taxonomy/{app => taxonomy}/schemas/taxon.py | 2 +- .../{app => taxonomy}/schemas/taxonomy.py | 0 taxonomy/{app => taxonomy}/tags.py | 0 taxonomy/{app => taxonomy}/taxon/__init__.py | 0 taxonomy/{app => taxonomy}/taxon/resources.py | 14 +++--- taxonomy/{app => taxonomy}/taxon/services.py | 10 ++-- .../{app => taxonomy}/taxonomy/__init__.py | 0 .../{app => taxonomy}/taxonomy/resources.py | 14 +++--- .../{app => taxonomy}/taxonomy/services.py | 8 +-- .../{app => taxonomy}/token_dependency.py | 0 taxonomy/tests/conftest.py | 14 +++--- taxonomy/tests/test_taxon_crud.py | 2 +- taxonomy/tests/test_taxonomy_router.py | 6 +-- users/Dockerfile | 4 +- users/tests/keycloak/test_query.py | 8 +-- users/tests/keycloak/test_schemas.py | 4 +- users/tests/keycloak/test_utils.py | 4 +- users/tests/test_main.py | 50 +++++++++---------- users/tests/test_schemas.py | 2 +- users/tests/test_utils.py | 4 +- users/{src => users}/__init__.py | 0 users/{src => users}/config.py | 0 users/{src => users}/keycloak/__init__.py | 0 users/{src => users}/keycloak/query.py | 8 +-- users/{src => users}/keycloak/resources.py | 2 +- users/{src => users}/keycloak/schemas.py | 0 users/{src => users}/keycloak/utils.py | 4 +- users/{src => users}/logger.py | 0 users/{src => users}/main.py | 16 +++--- users/{src => users}/s3.py | 2 +- users/{src => users}/schemas.py | 0 users/{src => users}/utils.py | 2 +- 73 files changed, 157 insertions(+), 157 deletions(-) rename processing/{src => processing}/__init__.py (100%) rename processing/{src => processing}/config.py (100%) create mode 100644 processing/processing/db/__init__.py rename processing/{src => processing}/db/models.py (95%) rename processing/{src => processing}/db/service.py (97%) rename processing/{src => processing}/health_check_easy_ocr.py (95%) rename processing/{src => processing}/main.py (91%) rename processing/{src => processing}/schema.py (100%) rename processing/{src => processing}/send_preprocess_results.py (95%) rename processing/{src => processing}/tasks.py (95%) rename processing/{src => processing}/text_merge.py (95%) rename processing/{src => processing}/third_party_code/LICENSE-2.0_box_utils.txt (100%) rename processing/{src => processing}/third_party_code/__init__.py (100%) rename processing/{src => processing}/third_party_code/box_util.py (100%) rename processing/{src => processing}/third_party_code/table.py (100%) rename processing/{src => processing}/utils/__init__.py (100%) rename processing/{src => processing}/utils/aiohttp_utils.py (94%) rename processing/{src => processing}/utils/logger.py (97%) rename processing/{src => processing}/utils/minio_utils.py (96%) rename processing/{src => processing}/utils/utils.py (95%) delete mode 100644 processing/src/db/__init__.py rename taxonomy/{app => taxonomy}/__init__.py (100%) rename taxonomy/{app => taxonomy}/database.py (100%) rename taxonomy/{app => taxonomy}/errors.py (100%) rename taxonomy/{app => taxonomy}/filters.py (79%) rename taxonomy/{app => taxonomy}/logging_setup.py (100%) rename taxonomy/{app => taxonomy}/main.py (88%) rename taxonomy/{app => taxonomy}/microservice_communication/__init__.py (100%) rename taxonomy/{app => taxonomy}/microservice_communication/search.py (100%) rename taxonomy/{app => taxonomy}/models.py (97%) rename taxonomy/{app => taxonomy}/schemas/__init__.py (84%) rename taxonomy/{app => taxonomy}/schemas/errors.py (100%) rename taxonomy/{app => taxonomy}/schemas/taxon.py (97%) rename taxonomy/{app => taxonomy}/schemas/taxonomy.py (100%) rename taxonomy/{app => taxonomy}/tags.py (100%) rename taxonomy/{app => taxonomy}/taxon/__init__.py (100%) rename taxonomy/{app => taxonomy}/taxon/resources.py (93%) rename taxonomy/{app => taxonomy}/taxon/services.py (97%) rename taxonomy/{app => taxonomy}/taxonomy/__init__.py (100%) rename taxonomy/{app => taxonomy}/taxonomy/resources.py (97%) rename taxonomy/{app => taxonomy}/taxonomy/services.py (97%) rename taxonomy/{app => taxonomy}/token_dependency.py (100%) rename users/{src => users}/__init__.py (100%) rename users/{src => users}/config.py (100%) rename users/{src => users}/keycloak/__init__.py (100%) rename users/{src => users}/keycloak/query.py (98%) rename users/{src => users}/keycloak/resources.py (96%) rename users/{src => users}/keycloak/schemas.py (100%) rename users/{src => users}/keycloak/utils.py (78%) rename users/{src => users}/logger.py (100%) rename users/{src => users}/main.py (97%) rename users/{src => users}/s3.py (98%) rename users/{src => users}/schemas.py (100%) rename users/{src => users}/utils.py (98%) diff --git a/processing/Dockerfile b/processing/Dockerfile index 64f948be9..763210433 100644 --- a/processing/Dockerfile +++ b/processing/Dockerfile @@ -13,7 +13,7 @@ ENV PYTHONUNBUFFERED 1 FROM base as build COPY alembic alembic -COPY src src +COPY processing src CMD alembic upgrade head && uvicorn src.main:app --host 0.0.0.0 --port 8080 FROM build as development diff --git a/processing/alembic/env.py b/processing/alembic/env.py index 9f191b943..e38f4f389 100644 --- a/processing/alembic/env.py +++ b/processing/alembic/env.py @@ -4,8 +4,8 @@ from sqlalchemy import engine_from_config, pool from alembic import context -from src.config import settings -from src.db.service import get_test_db_url +from processing.config import settings +from processing.db.service import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -26,7 +26,7 @@ # for 'autogenerate' support # from myapp import mymodel # target_metadata = mymodel.Base.metadata -from src.db.models import Base +from processing.db.models import Base target_metadata = Base.metadata diff --git a/processing/src/__init__.py b/processing/processing/__init__.py similarity index 100% rename from processing/src/__init__.py rename to processing/processing/__init__.py diff --git a/processing/src/config.py b/processing/processing/config.py similarity index 100% rename from processing/src/config.py rename to processing/processing/config.py diff --git a/processing/processing/db/__init__.py b/processing/processing/db/__init__.py new file mode 100644 index 000000000..175d2b491 --- /dev/null +++ b/processing/processing/db/__init__.py @@ -0,0 +1,2 @@ +import processing.db.models +import processing.db.service # noqa: F401 diff --git a/processing/src/db/models.py b/processing/processing/db/models.py similarity index 95% rename from processing/src/db/models.py rename to processing/processing/db/models.py index 1a11f0a2a..f5b41dfed 100644 --- a/processing/src/db/models.py +++ b/processing/processing/db/models.py @@ -2,7 +2,7 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker -from src.config import settings +from processing.config import settings Base = declarative_base() engine = sa.create_engine( diff --git a/processing/src/db/service.py b/processing/processing/db/service.py similarity index 97% rename from processing/src/db/service.py rename to processing/processing/db/service.py index e10d6c74b..17b0cfbf4 100644 --- a/processing/src/db/service.py +++ b/processing/processing/db/service.py @@ -2,8 +2,8 @@ from sqlalchemy.orm import Query, Session -from src import schema -from src.db import models +from processing import schema +from processing.db import models def session_scope() -> Session: diff --git a/processing/src/health_check_easy_ocr.py b/processing/processing/health_check_easy_ocr.py similarity index 95% rename from processing/src/health_check_easy_ocr.py rename to processing/processing/health_check_easy_ocr.py index 66e6a4902..b9f0b78bb 100644 --- a/processing/src/health_check_easy_ocr.py +++ b/processing/processing/health_check_easy_ocr.py @@ -4,9 +4,9 @@ from fastapi import HTTPException from minio.error import MinioException -from src.utils.aiohttp_utils import send_request -from src.utils.logger import get_logger -from src.utils.minio_utils import ( +from processing.utils.aiohttp_utils import send_request +from processing.utils.logger import get_logger +from processing.utils.minio_utils import ( MinioCommunicator, convert_bucket_name_if_s3prefix, ) diff --git a/processing/src/main.py b/processing/processing/main.py similarity index 91% rename from processing/src/main.py rename to processing/processing/main.py index 36c611760..71679bed4 100644 --- a/processing/src/main.py +++ b/processing/processing/main.py @@ -14,15 +14,15 @@ from sqlalchemy.orm import Session from tenant_dependency import TenantData, get_tenant_info -from src import db, schema -from src.config import settings -from src.health_check_easy_ocr import health_check_preprocessing -from src.send_preprocess_results import send_preprocess_result -from src.tasks import GetLanguagesTask, PreprocessingTask -from src.text_merge import merge_words_to_paragraph -from src.utils.logger import get_logger -from src.utils.minio_utils import convert_bucket_name_if_s3prefix -from src.utils.utils import map_finish_status_for_assets +from processing import db, schema +from processing.config import settings +from processing.health_check_easy_ocr import health_check_preprocessing +from processing.send_preprocess_results import send_preprocess_result +from processing.tasks import GetLanguagesTask, PreprocessingTask +from processing.text_merge import merge_words_to_paragraph +from processing.utils.logger import get_logger +from processing.utils.minio_utils import convert_bucket_name_if_s3prefix +from processing.utils.utils import map_finish_status_for_assets logger = get_logger(__name__) app = FastAPI( diff --git a/processing/src/schema.py b/processing/processing/schema.py similarity index 100% rename from processing/src/schema.py rename to processing/processing/schema.py diff --git a/processing/src/send_preprocess_results.py b/processing/processing/send_preprocess_results.py similarity index 95% rename from processing/src/send_preprocess_results.py rename to processing/processing/send_preprocess_results.py index a43a291f7..955807047 100644 --- a/processing/src/send_preprocess_results.py +++ b/processing/processing/send_preprocess_results.py @@ -5,8 +5,8 @@ from fastapi import HTTPException from minio.error import MinioException -from src.utils.logger import get_logger -from src.utils.minio_utils import MinioCommunicator +from processing.utils.logger import get_logger +from processing.utils.minio_utils import MinioCommunicator logger = get_logger(__name__) diff --git a/processing/src/tasks.py b/processing/processing/tasks.py similarity index 95% rename from processing/src/tasks.py rename to processing/processing/tasks.py index 2f60e1319..ac5b17cec 100644 --- a/processing/src/tasks.py +++ b/processing/processing/tasks.py @@ -10,11 +10,11 @@ from fastapi import HTTPException, status from sqlalchemy.orm import Session -from src.config import settings -from src.schema import PreprocessingStatus, Status -from src.utils.aiohttp_utils import send_request -from src.utils.logger import get_log_exception_msg, get_logger -from src.utils.utils import ( +from processing.config import settings +from processing.schema import PreprocessingStatus, Status +from processing.utils.aiohttp_utils import send_request +from processing.utils.logger import get_log_exception_msg, get_logger +from processing.utils.utils import ( execute_pipeline, get_files_data, get_model_url, diff --git a/processing/src/text_merge.py b/processing/processing/text_merge.py similarity index 95% rename from processing/src/text_merge.py rename to processing/processing/text_merge.py index eaced1055..2858d9fb5 100644 --- a/processing/src/text_merge.py +++ b/processing/processing/text_merge.py @@ -12,11 +12,11 @@ from fastapi import HTTPException from minio.error import MinioException -from src import schema -from src.schema import AnnotationData, MatchedPage, Page, ParagraphBbox -from src.third_party_code.box_util import stitch_boxes_into_lines -from src.third_party_code.table import BorderBox -from src.utils.minio_utils import MinioCommunicator +from processing import schema +from processing.schema import AnnotationData, MatchedPage, Page, ParagraphBbox +from processing.third_party_code.box_util import stitch_boxes_into_lines +from processing.third_party_code.table import BorderBox +from processing.utils.minio_utils import MinioCommunicator logger = logging.getLogger(__name__) diff --git a/processing/src/third_party_code/LICENSE-2.0_box_utils.txt b/processing/processing/third_party_code/LICENSE-2.0_box_utils.txt similarity index 100% rename from processing/src/third_party_code/LICENSE-2.0_box_utils.txt rename to processing/processing/third_party_code/LICENSE-2.0_box_utils.txt diff --git a/processing/src/third_party_code/__init__.py b/processing/processing/third_party_code/__init__.py similarity index 100% rename from processing/src/third_party_code/__init__.py rename to processing/processing/third_party_code/__init__.py diff --git a/processing/src/third_party_code/box_util.py b/processing/processing/third_party_code/box_util.py similarity index 100% rename from processing/src/third_party_code/box_util.py rename to processing/processing/third_party_code/box_util.py diff --git a/processing/src/third_party_code/table.py b/processing/processing/third_party_code/table.py similarity index 100% rename from processing/src/third_party_code/table.py rename to processing/processing/third_party_code/table.py diff --git a/processing/src/utils/__init__.py b/processing/processing/utils/__init__.py similarity index 100% rename from processing/src/utils/__init__.py rename to processing/processing/utils/__init__.py diff --git a/processing/src/utils/aiohttp_utils.py b/processing/processing/utils/aiohttp_utils.py similarity index 94% rename from processing/src/utils/aiohttp_utils.py rename to processing/processing/utils/aiohttp_utils.py index e9fe86a49..9f7abec55 100644 --- a/processing/src/utils/aiohttp_utils.py +++ b/processing/processing/utils/aiohttp_utils.py @@ -5,8 +5,8 @@ from aiohttp import ContentTypeError from fastapi import HTTPException -from src.config import settings -from src.utils.logger import get_logger +from processing.config import settings +from processing.utils.logger import get_logger logger = get_logger(__name__) diff --git a/processing/src/utils/logger.py b/processing/processing/utils/logger.py similarity index 97% rename from processing/src/utils/logger.py rename to processing/processing/utils/logger.py index 9ad39fe59..de0dcebb7 100644 --- a/processing/src/utils/logger.py +++ b/processing/processing/utils/logger.py @@ -2,7 +2,7 @@ import traceback from logging.config import dictConfig -from src.config import settings +from processing.config import settings log_config = { "version": 1, diff --git a/processing/src/utils/minio_utils.py b/processing/processing/utils/minio_utils.py similarity index 96% rename from processing/src/utils/minio_utils.py rename to processing/processing/utils/minio_utils.py index 2568688b6..166fcf5e1 100644 --- a/processing/src/utils/minio_utils.py +++ b/processing/processing/utils/minio_utils.py @@ -1,8 +1,8 @@ from minio import Minio from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider -from src.config import settings -from src.utils.logger import get_logger +from processing.config import settings +from processing.utils.logger import get_logger logger = get_logger(__name__) diff --git a/processing/src/utils/utils.py b/processing/processing/utils/utils.py similarity index 95% rename from processing/src/utils/utils.py rename to processing/processing/utils/utils.py index 829087826..b9753cd50 100644 --- a/processing/src/utils/utils.py +++ b/processing/processing/utils/utils.py @@ -4,10 +4,10 @@ from cache import AsyncTTL from sqlalchemy.orm import Session -from src import db, schema -from src.config import settings -from src.utils.aiohttp_utils import send_request -from src.utils.logger import get_log_exception_msg, get_logger +from processing import db, schema +from processing.config import settings +from processing.utils.aiohttp_utils import send_request +from processing.utils.logger import get_log_exception_msg, get_logger logger = get_logger(__name__) T = TypeVar("T") diff --git a/processing/src/db/__init__.py b/processing/src/db/__init__.py deleted file mode 100644 index 037de0603..000000000 --- a/processing/src/db/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -import src.db.models -import src.db.service # noqa: F401 diff --git a/processing/tests/conftest.py b/processing/tests/conftest.py index 7b00f2178..578bf6709 100644 --- a/processing/tests/conftest.py +++ b/processing/tests/conftest.py @@ -10,8 +10,8 @@ from alembic import command from alembic.config import Config -from src.config import settings -from src.db.service import get_test_db_url +from processing.config import settings +from processing.db.service import get_test_db_url pytest_plugins = ["docker_compose"] diff --git a/processing/tests/test_assets_status.py b/processing/tests/test_assets_status.py index 6e4fe04f7..c6258786e 100644 --- a/processing/tests/test_assets_status.py +++ b/processing/tests/test_assets_status.py @@ -2,8 +2,8 @@ import pytest -from src.config import settings -from src.tasks import PreprocessingTask +from processing.config import settings +from processing.tasks import PreprocessingTask @pytest.mark.skip @@ -24,7 +24,7 @@ def mock_preprocessing_task(): @pytest.mark.asyncio @pytest.mark.parametrize("status", ["failed", "in_progress", "preprocessed"]) async def test_send_status_to_assets(mock_preprocessing_task, status): - with patch("src.tasks.send_request") as mock: + with patch("processing.tasks.send_request") as mock: await mock_preprocessing_task.send_status_to_assets(status) mock.assert_awaited_once_with( method="PUT", diff --git a/processing/tests/test_text_merge.py b/processing/tests/test_text_merge.py index 0722150e1..512bd6c9e 100644 --- a/processing/tests/test_text_merge.py +++ b/processing/tests/test_text_merge.py @@ -1,6 +1,6 @@ from unittest.mock import patch -from src.schema import ( +from processing.schema import ( AnnotationData, MatchedPage, Page, @@ -8,7 +8,7 @@ PageSize, Input, ) -from src.text_merge import ( +from processing.text_merge import ( convert_points_to_pixels, match_page, download_files, @@ -223,7 +223,7 @@ def test_stitch_boxes(self): ), ] - @patch("src.text_merge.MinioCommunicator", return_value=MC()) + @patch("processing.text_merge.MinioCommunicator", return_value=MC()) def test_download(self, _1, tmp_path): request_data = AnnotationData( file="some_path/some_file.pdf", diff --git a/processing/tests/test_utils/test_utils.py b/processing/tests/test_utils/test_utils.py index 6ff15dfc7..4f8422c58 100644 --- a/processing/tests/test_utils/test_utils.py +++ b/processing/tests/test_utils/test_utils.py @@ -5,8 +5,8 @@ import responses from fastapi import HTTPException -from src.config import settings -from src.utils import utils +from processing.config import settings +from processing.utils import utils class MockResponse: diff --git a/taxonomy/Dockerfile b/taxonomy/Dockerfile index b9cd3ba01..b708b2d3d 100644 --- a/taxonomy/Dockerfile +++ b/taxonomy/Dockerfile @@ -14,7 +14,7 @@ RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/inst # Copy using poetry.lock in case it doesn't exist yet COPY pyproject.toml poetry.lock .env alembic.ini version.txt ./ COPY ./alembic ./alembic -COPY ./app ./app +COPY taxonomy ./app FROM base as build diff --git a/taxonomy/alembic/env.py b/taxonomy/alembic/env.py index ad3916fdd..f75a28ddc 100644 --- a/taxonomy/alembic/env.py +++ b/taxonomy/alembic/env.py @@ -4,7 +4,7 @@ from sqlalchemy import engine_from_config, pool from alembic import context # type: ignore -from app.database import SQLALCHEMY_DATABASE_URL, get_test_db_url +from taxonomy.database import SQLALCHEMY_DATABASE_URL, get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -14,7 +14,7 @@ # This line sets up loggers basically. fileConfig(config.config_file_name) -from app.models import Base # noqa E402 +from taxonomy.models import Base # noqa E402 target_metadata = Base.metadata diff --git a/taxonomy/documentation/update_docs.py b/taxonomy/documentation/update_docs.py index d49ee5071..8e10cf8ab 100644 --- a/taxonomy/documentation/update_docs.py +++ b/taxonomy/documentation/update_docs.py @@ -1,6 +1,6 @@ import yaml -from app.main import app +from taxonomy.main import app def str_presenter(dumper, data): diff --git a/taxonomy/app/__init__.py b/taxonomy/taxonomy/__init__.py similarity index 100% rename from taxonomy/app/__init__.py rename to taxonomy/taxonomy/__init__.py diff --git a/taxonomy/app/database.py b/taxonomy/taxonomy/database.py similarity index 100% rename from taxonomy/app/database.py rename to taxonomy/taxonomy/database.py diff --git a/taxonomy/app/errors.py b/taxonomy/taxonomy/errors.py similarity index 100% rename from taxonomy/app/errors.py rename to taxonomy/taxonomy/errors.py diff --git a/taxonomy/app/filters.py b/taxonomy/taxonomy/filters.py similarity index 79% rename from taxonomy/app/filters.py rename to taxonomy/taxonomy/filters.py index e415e80f0..1b2e981e2 100644 --- a/taxonomy/app/filters.py +++ b/taxonomy/taxonomy/filters.py @@ -1,6 +1,6 @@ from filter_lib import create_filter_model -from app.models import Taxon, Taxonomy +from taxonomy.models import Taxon, Taxonomy TaxonFilter = create_filter_model(Taxon, exclude=["tenant"]) TaxonomyFilter = create_filter_model(Taxonomy, exclude=["tenant"]) diff --git a/taxonomy/app/logging_setup.py b/taxonomy/taxonomy/logging_setup.py similarity index 100% rename from taxonomy/app/logging_setup.py rename to taxonomy/taxonomy/logging_setup.py diff --git a/taxonomy/app/main.py b/taxonomy/taxonomy/main.py similarity index 88% rename from taxonomy/app/main.py rename to taxonomy/taxonomy/main.py index c341ee67a..a52726e81 100644 --- a/taxonomy/app/main.py +++ b/taxonomy/taxonomy/main.py @@ -5,7 +5,7 @@ from fastapi import Depends, FastAPI from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from app.errors import ( +from taxonomy.errors import ( CheckFieldError, FieldConstraintError, ForeignKeyError, @@ -21,10 +21,10 @@ no_taxonomy_error_handler, taxon_parent_child_error_handler, ) -from app.tags import TAGS -from app.taxon import resources as taxon_resources -from app.taxonomy import resources as taxonomy_resources -from app.token_dependency import TOKEN +from taxonomy.tags import TAGS +from taxonomy.taxon import resources as taxon_resources +from taxonomy.taxonomy import resources as taxonomy_resources +from taxonomy.token_dependency import TOKEN load_dotenv(find_dotenv()) diff --git a/taxonomy/app/microservice_communication/__init__.py b/taxonomy/taxonomy/microservice_communication/__init__.py similarity index 100% rename from taxonomy/app/microservice_communication/__init__.py rename to taxonomy/taxonomy/microservice_communication/__init__.py diff --git a/taxonomy/app/microservice_communication/search.py b/taxonomy/taxonomy/microservice_communication/search.py similarity index 100% rename from taxonomy/app/microservice_communication/search.py rename to taxonomy/taxonomy/microservice_communication/search.py diff --git a/taxonomy/app/models.py b/taxonomy/taxonomy/models.py similarity index 97% rename from taxonomy/app/models.py rename to taxonomy/taxonomy/models.py index 975174316..29b70fb01 100644 --- a/taxonomy/app/models.py +++ b/taxonomy/taxonomy/models.py @@ -14,8 +14,8 @@ from sqlalchemy.orm import relationship, validates from sqlalchemy_utils import Ltree, LtreeType -from app.database import Base -from app.errors import CheckFieldError +from taxonomy.database import Base +from taxonomy.errors import CheckFieldError def default_tree(column_name: str) -> Callable: diff --git a/taxonomy/app/schemas/__init__.py b/taxonomy/taxonomy/schemas/__init__.py similarity index 84% rename from taxonomy/app/schemas/__init__.py rename to taxonomy/taxonomy/schemas/__init__.py index 0e5abc6aa..818b3be2f 100644 --- a/taxonomy/app/schemas/__init__.py +++ b/taxonomy/taxonomy/schemas/__init__.py @@ -1,15 +1,15 @@ -from app.schemas.errors import ( +from taxonomy.schemas.errors import ( BadRequestErrorSchema, ConnectionErrorSchema, NotFoundErrorSchema, ) -from app.schemas.taxon import ( +from taxonomy.schemas.taxon import ( ParentsConcatenateResponseSchema, TaxonBaseSchema, TaxonInputSchema, TaxonResponseSchema, ) -from app.schemas.taxonomy import ( +from taxonomy.schemas.taxonomy import ( CategoryLinkSchema, JobTaxonomySchema, TaxonomyBaseSchema, diff --git a/taxonomy/app/schemas/errors.py b/taxonomy/taxonomy/schemas/errors.py similarity index 100% rename from taxonomy/app/schemas/errors.py rename to taxonomy/taxonomy/schemas/errors.py diff --git a/taxonomy/app/schemas/taxon.py b/taxonomy/taxonomy/schemas/taxon.py similarity index 97% rename from taxonomy/app/schemas/taxon.py rename to taxonomy/taxonomy/schemas/taxon.py index 030225b53..b5d352e09 100644 --- a/taxonomy/app/schemas/taxon.py +++ b/taxonomy/taxonomy/schemas/taxon.py @@ -2,7 +2,7 @@ from pydantic import BaseModel, Field, validator -from app.errors import CheckFieldError +from taxonomy.errors import CheckFieldError class TaxonBaseSchema(BaseModel): diff --git a/taxonomy/app/schemas/taxonomy.py b/taxonomy/taxonomy/schemas/taxonomy.py similarity index 100% rename from taxonomy/app/schemas/taxonomy.py rename to taxonomy/taxonomy/schemas/taxonomy.py diff --git a/taxonomy/app/tags.py b/taxonomy/taxonomy/tags.py similarity index 100% rename from taxonomy/app/tags.py rename to taxonomy/taxonomy/tags.py diff --git a/taxonomy/app/taxon/__init__.py b/taxonomy/taxonomy/taxon/__init__.py similarity index 100% rename from taxonomy/app/taxon/__init__.py rename to taxonomy/taxonomy/taxon/__init__.py diff --git a/taxonomy/app/taxon/resources.py b/taxonomy/taxonomy/taxon/resources.py similarity index 93% rename from taxonomy/app/taxon/resources.py rename to taxonomy/taxonomy/taxon/resources.py index 5eeff7da3..2868993fc 100644 --- a/taxonomy/app/taxon/resources.py +++ b/taxonomy/taxonomy/taxon/resources.py @@ -5,11 +5,11 @@ from sqlalchemy.orm import Session from sqlalchemy_filters.exceptions import BadFilterFormat -from app.database import get_db -from app.errors import NoTaxonError -from app.filters import TaxonFilter -from app.microservice_communication.search import X_CURRENT_TENANT_HEADER -from app.schemas import ( +from taxonomy.database import get_db +from taxonomy.errors import NoTaxonError +from taxonomy.filters import TaxonFilter +from taxonomy.microservice_communication.search import X_CURRENT_TENANT_HEADER +from taxonomy.schemas import ( BadRequestErrorSchema, ConnectionErrorSchema, NotFoundErrorSchema, @@ -18,8 +18,8 @@ TaxonInputSchema, TaxonResponseSchema, ) -from app.tags import TAXON_TAG -from app.taxon.services import ( +from taxonomy.tags import TAXON_TAG +from taxonomy.taxon.services import ( add_taxon_db, concatenated_parents_list, delete_taxon_db, diff --git a/taxonomy/app/taxon/services.py b/taxonomy/taxonomy/taxon/services.py similarity index 97% rename from taxonomy/app/taxon/services.py rename to taxonomy/taxonomy/taxon/services.py index ab0a8bc13..2ec097a34 100644 --- a/taxonomy/app/taxon/services.py +++ b/taxonomy/taxonomy/taxon/services.py @@ -7,15 +7,15 @@ from sqlalchemy.orm.query import Query from sqlalchemy_utils import Ltree -from app.errors import CheckFieldError, NoTaxonError, SelfParentError -from app.filters import TaxonFilter -from app.models import Taxon -from app.schemas import ( +from taxonomy.errors import CheckFieldError, NoTaxonError, SelfParentError +from taxonomy.filters import TaxonFilter +from taxonomy.models import Taxon +from taxonomy.schemas import ( ParentsConcatenateResponseSchema, TaxonInputSchema, TaxonResponseSchema, ) -from app.taxonomy.services import get_latest_taxonomy, get_taxonomy +from taxonomy.taxonomy.services import get_latest_taxonomy, get_taxonomy TaxonIdT = str TaxonPathT = str diff --git a/taxonomy/app/taxonomy/__init__.py b/taxonomy/taxonomy/taxonomy/__init__.py similarity index 100% rename from taxonomy/app/taxonomy/__init__.py rename to taxonomy/taxonomy/taxonomy/__init__.py diff --git a/taxonomy/app/taxonomy/resources.py b/taxonomy/taxonomy/taxonomy/resources.py similarity index 97% rename from taxonomy/app/taxonomy/resources.py rename to taxonomy/taxonomy/taxonomy/resources.py index 48f03e761..954ccf69d 100644 --- a/taxonomy/app/taxonomy/resources.py +++ b/taxonomy/taxonomy/taxonomy/resources.py @@ -5,11 +5,11 @@ from sqlalchemy.orm import Session from sqlalchemy_filters.exceptions import BadFilterFormat -from app.database import get_db -from app.filters import TaxonomyFilter -from app.logging_setup import LOGGER -from app.microservice_communication.search import X_CURRENT_TENANT_HEADER -from app.schemas import ( +from taxonomy.database import get_db +from taxonomy.filters import TaxonomyFilter +from taxonomy.logging_setup import LOGGER +from taxonomy.microservice_communication.search import X_CURRENT_TENANT_HEADER +from taxonomy.schemas import ( BadRequestErrorSchema, CategoryLinkSchema, ConnectionErrorSchema, @@ -19,8 +19,8 @@ TaxonomyInputSchema, TaxonomyResponseSchema, ) -from app.tags import TAXONOMY_TAG -from app.taxonomy.services import ( +from taxonomy.tags import TAXONOMY_TAG +from taxonomy.taxonomy.services import ( batch_latest_taxonomies, batch_versioned_taxonomies, bulk_create_relations_with_categories, diff --git a/taxonomy/app/taxonomy/services.py b/taxonomy/taxonomy/taxonomy/services.py similarity index 97% rename from taxonomy/app/taxonomy/services.py rename to taxonomy/taxonomy/taxonomy/services.py index 68a36c83f..3daf7ec0e 100644 --- a/taxonomy/app/taxonomy/services.py +++ b/taxonomy/taxonomy/taxonomy/services.py @@ -4,10 +4,10 @@ from sqlalchemy import and_, desc, null, or_ from sqlalchemy.orm import Query, Session -from app.errors import CheckFieldError -from app.filters import TaxonomyFilter -from app.models import AssociationTaxonomyCategory, Taxonomy -from app.schemas import ( +from taxonomy.errors import CheckFieldError +from taxonomy.filters import TaxonomyFilter +from taxonomy.models import AssociationTaxonomyCategory, Taxonomy +from taxonomy.schemas import ( CategoryLinkSchema, JobTaxonomySchema, TaxonomyBaseSchema, diff --git a/taxonomy/app/token_dependency.py b/taxonomy/taxonomy/token_dependency.py similarity index 100% rename from taxonomy/app/token_dependency.py rename to taxonomy/taxonomy/token_dependency.py diff --git a/taxonomy/tests/conftest.py b/taxonomy/tests/conftest.py index 8f582d43d..a06acc5de 100644 --- a/taxonomy/tests/conftest.py +++ b/taxonomy/tests/conftest.py @@ -15,13 +15,13 @@ from alembic import command from alembic.config import Config -from app.database import SQLALCHEMY_DATABASE_URL, Base, get_db, get_test_db_url -from app.main import app -from app.models import Taxon, Taxonomy -from app.schemas import CategoryLinkSchema, TaxonInputSchema, TaxonomyInputSchema -from app.taxon import services as taxon_services -from app.taxonomy import services as taxonomy_services -from app.token_dependency import TOKEN +from taxonomy.database import SQLALCHEMY_DATABASE_URL, Base, get_db, get_test_db_url +from taxonomy.main import app +from taxonomy.models import Taxon, Taxonomy +from taxonomy.schemas import CategoryLinkSchema, TaxonInputSchema, TaxonomyInputSchema +from taxonomy.taxon import services as taxon_services +from taxonomy.taxonomy import services as taxonomy_services +from taxonomy.token_dependency import TOKEN from tests.override_app_dependency import TEST_TENANTS, override diff --git a/taxonomy/tests/test_taxon_crud.py b/taxonomy/tests/test_taxon_crud.py index 482704386..c5b4d4271 100644 --- a/taxonomy/tests/test_taxon_crud.py +++ b/taxonomy/tests/test_taxon_crud.py @@ -4,7 +4,7 @@ import pytest -from app.models import Taxon +from taxonomy.models import Taxon from tests.override_app_dependency import TEST_HEADER TAXON_PATH = "/taxons" diff --git a/taxonomy/tests/test_taxonomy_router.py b/taxonomy/tests/test_taxonomy_router.py index 2efdcc8ef..6d6ce325a 100644 --- a/taxonomy/tests/test_taxonomy_router.py +++ b/taxonomy/tests/test_taxonomy_router.py @@ -2,9 +2,9 @@ import pytest -from app.models import Taxonomy -from app.schemas import CategoryLinkSchema -from app.taxonomy import services +from taxonomy.models import Taxonomy +from taxonomy.schemas import CategoryLinkSchema +from taxonomy.taxonomy import services from tests.override_app_dependency import TEST_HEADER, TEST_TENANTS diff --git a/users/Dockerfile b/users/Dockerfile index 332f5db44..898991d7e 100644 --- a/users/Dockerfile +++ b/users/Dockerfile @@ -3,7 +3,7 @@ FROM ${base_image} as build WORKDIR /opt/users_filter -COPY src /opt/users_filter/src +COPY users /opt/users_filter/src COPY requirements.txt /opt/users_filter COPY .env /opt/users_filter @@ -22,7 +22,7 @@ CMD pytest -vvv FROM sonarsource/sonar-scanner-cli:4.6 AS sonar -COPY src /sonar/src +COPY users /sonar/src COPY tests /sonar/tests COPY sonar-project.properties /sonar/sonar-project.properties diff --git a/users/tests/keycloak/test_query.py b/users/tests/keycloak/test_query.py index cb1ead1a7..bf9084421 100644 --- a/users/tests/keycloak/test_query.py +++ b/users/tests/keycloak/test_query.py @@ -1,16 +1,16 @@ -"""Testing src/keycloak/query.py.""" +"""Testing users/keycloak/query.py.""" import json from unittest.mock import patch, create_autospec import pytest -import src.keycloak.query as query -import src.keycloak.schemas as schemas +import users.keycloak.query as query +import users.keycloak.schemas as schemas @pytest.fixture def request_mock(): - with patch("src.keycloak.query.aiohttp.request") as mock: + with patch("users.keycloak.query.aiohttp.request") as mock: yield mock diff --git a/users/tests/keycloak/test_schemas.py b/users/tests/keycloak/test_schemas.py index 47a513dfe..87026a1d1 100644 --- a/users/tests/keycloak/test_schemas.py +++ b/users/tests/keycloak/test_schemas.py @@ -1,5 +1,5 @@ -import src.keycloak.utils as kc_utils -import src.keycloak.schemas as kc_schemas +import users.keycloak.utils as kc_utils +import users.keycloak.schemas as kc_schemas import pytest user_1 = kc_schemas.User(username="user", id="1") diff --git a/users/tests/keycloak/test_utils.py b/users/tests/keycloak/test_utils.py index 8e88add38..69bccc9f9 100644 --- a/users/tests/keycloak/test_utils.py +++ b/users/tests/keycloak/test_utils.py @@ -1,5 +1,5 @@ -import src.keycloak.utils as kc_utils -from src.schemas import Users +import users.keycloak.utils as kc_utils +from users.schemas import Users import pytest diff --git a/users/tests/test_main.py b/users/tests/test_main.py index e34462a31..ea0cbe15e 100644 --- a/users/tests/test_main.py +++ b/users/tests/test_main.py @@ -6,8 +6,8 @@ from fastapi.testclient import TestClient from tenant_dependency import TenantData -import src.keycloak.schemas as kc_schemas -from src.main import app, check_authorization, tenant +import users.keycloak.schemas as kc_schemas +from users.main import app, check_authorization, tenant client = TestClient(app) @@ -140,7 +140,7 @@ def test_check_authorization_role_is_right(mock_tenant_data): check_authorization(token=mock_tenant_data, role="role") -@patch("src.keycloak.query.get_token_v2", return_value=token_schema) +@patch("users.keycloak.query.get_token_v2", return_value=token_schema) def test_login_body(token_schema): response = client.post( "/token", @@ -153,7 +153,7 @@ def test_login_body(token_schema): assert response.json() == token_representation -@patch("src.keycloak.query.get_token_v2", return_value=token_schema) +@patch("users.keycloak.query.get_token_v2", return_value=token_schema) @pytest.mark.parametrize( ("request_body", "status_code"), [ @@ -208,7 +208,7 @@ def test_login_status_code(token_schema, request_body, status_code): assert response.status_code == status_code -@patch("src.keycloak.query.get_user", return_value=user_1) +@patch("users.keycloak.query.get_user", return_value=user_1) class TestGetUserGWT: def test_get_user_jwt_body(self, mock_user, user_representation): response = client.get("/users/current") @@ -221,7 +221,7 @@ def test_get_user_jwt_status_code(self, mock_user): assert response.status_code == 200 -@patch("src.keycloak.query.get_user", return_value=user_1) +@patch("users.keycloak.query.get_user", return_value=user_1) class TestGetUser: def test_get_user_body(self, mock_user, user_representation): response = client.get("/users/user-id") @@ -238,7 +238,7 @@ def test_get_user_info_from_token_introspection( mocked_token1, mocked_token1_data ): with patch( - "src.keycloak.query.introspect_token", return_value=mocked_token1_data + "users.keycloak.query.introspect_token", return_value=mocked_token1_data ): response = client.get( "/users/current_v2", @@ -252,7 +252,7 @@ def test_get_user_info_from_token_introspection( mock_all_groups = [group_1, group_2] -@patch("src.keycloak.query.get_groups", return_value=mock_all_groups) +@patch("users.keycloak.query.get_groups", return_value=mock_all_groups) class TestGetTenants: def test_get_tenants_body(self, mock_groups): response = client.get("/tenants") @@ -263,8 +263,8 @@ def test_get_tenants_status_code(self, mock_groups): assert response.status_code == 200 -@patch("src.keycloak.query.create_group", return_value=None) -@patch("src.s3.create_bucket", return_value=None) +@patch("users.keycloak.query.create_group", return_value=None) +@patch("users.s3.create_bucket", return_value=None) class TestCreateTenant: def test_create_tenant_body(self, mock_group, mock_bucket): response = client.post("/tenants?tenant=tenant") @@ -291,10 +291,10 @@ def test_create_tenant_status_code( assert response.status_code == response_status_code -@patch("src.keycloak.query.get_groups", return_value=mock_all_groups) -@patch("src.keycloak.query.get_user", return_value=user_1) -@patch("src.keycloak.schemas.User.add_tenant", return_value=None) -@patch("src.keycloak.query.update_user", return_value=None) +@patch("users.keycloak.query.get_groups", return_value=mock_all_groups) +@patch("users.keycloak.query.get_user", return_value=user_1) +@patch("users.keycloak.schemas.User.add_tenant", return_value=None) +@patch("users.keycloak.query.update_user", return_value=None) class TestAddUserToTenant: @pytest.mark.parametrize( ("tenant", "expected_result"), @@ -335,8 +335,8 @@ def test_add_user_to_tenant2( assert response.status_code == expected_result -@patch("src.keycloak.query.get_user", return_value=user_1) -@patch("src.keycloak.query.update_user", return_value=None) +@patch("users.keycloak.query.get_user", return_value=user_1) +@patch("users.keycloak.query.update_user", return_value=None) @pytest.mark.parametrize( ("tenant", "expected_result"), [ @@ -350,8 +350,8 @@ def test_remove_user_from_tenant_body( assert response.json() == expected_result -@patch("src.keycloak.query.get_user", return_value=user_1) -@patch("src.keycloak.query.update_user", return_value=None) +@patch("users.keycloak.query.get_user", return_value=user_1) +@patch("users.keycloak.query.update_user", return_value=None) @pytest.mark.parametrize( ("tenant", "expected_result"), [ @@ -365,9 +365,9 @@ def test_remove_user_from_tenant_status_code( assert response.status_code == 200 -@patch("src.keycloak.query.get_users_v2", return_value=mock_all_users) +@patch("users.keycloak.query.get_users_v2", return_value=mock_all_users) @patch( - "src.keycloak.query.get_users_by_role", return_value=mock_users_with_role + "users.keycloak.query.get_users_by_role", return_value=mock_users_with_role ) class TestUsersSearch: @pytest.mark.parametrize("request_body", [{}, {"filters": []}]) @@ -769,9 +769,9 @@ def test_filter_users_by_all_filters_when_user_does_not_exist_body2( mock_users = [user] -@patch("src.keycloak.query.create_user", return_value=None) -@patch("src.keycloak.query.get_users_v2", return_value=mock_all_users) -@patch("src.keycloak.query.execute_action_email", return_value=None) +@patch("users.keycloak.query.create_user", return_value=None) +@patch("users.keycloak.query.get_users_v2", return_value=mock_all_users) +@patch("users.keycloak.query.execute_action_email", return_value=None) class TestUserRegistration: def test_user_registration_body(self, user, mock_all_users, action_email): response = client.post("/users/registration?email=mail@mail.ru") @@ -802,10 +802,10 @@ def test_get_idp_names_and_SSOauth_links( mocked_admin_auth_data, mocked_identity_providers_data ): with patch( - "src.keycloak.query.get_master_realm_auth_data", + "users.keycloak.query.get_master_realm_auth_data", return_value=mocked_admin_auth_data, ), patch( - "src.keycloak.query.get_identity_providers_data", + "users.keycloak.query.get_identity_providers_data", return_value=mocked_identity_providers_data, ): response = client.get("/identity_providers_data") diff --git a/users/tests/test_schemas.py b/users/tests/test_schemas.py index 8dccb2e15..c3fb39be1 100644 --- a/users/tests/test_schemas.py +++ b/users/tests/test_schemas.py @@ -1,4 +1,4 @@ -from src.schemas import Users +from users.schemas import Users def test_users_schemas(): diff --git a/users/tests/test_utils.py b/users/tests/test_utils.py index 04b445465..2c13f4cb8 100644 --- a/users/tests/test_utils.py +++ b/users/tests/test_utils.py @@ -1,7 +1,7 @@ from unittest.mock import patch import pytest -from src import utils +from users import utils def test_extract_idp_data_needed(): @@ -53,5 +53,5 @@ def test_extract_idp_data_needed(): ("prefix", "expected"), (("", "tenant"), ("prefix", "prefix-tenant")) ) def test_bucket_dependency(prefix: str, expected: str) -> None: - with patch("src.config.S3_PREFIX", prefix): + with patch("users.config.S3_PREFIX", prefix): assert utils.get_bucket_name("tenant") == expected diff --git a/users/src/__init__.py b/users/users/__init__.py similarity index 100% rename from users/src/__init__.py rename to users/users/__init__.py diff --git a/users/src/config.py b/users/users/config.py similarity index 100% rename from users/src/config.py rename to users/users/config.py diff --git a/users/src/keycloak/__init__.py b/users/users/keycloak/__init__.py similarity index 100% rename from users/src/keycloak/__init__.py rename to users/users/keycloak/__init__.py diff --git a/users/src/keycloak/query.py b/users/users/keycloak/query.py similarity index 98% rename from users/src/keycloak/query.py rename to users/users/keycloak/query.py index a5f753f28..0b34284d4 100644 --- a/users/src/keycloak/query.py +++ b/users/users/keycloak/query.py @@ -1,11 +1,11 @@ from typing import Any, Dict, List, TypedDict, Union import aiohttp -import src.config as config -import src.keycloak.resources as resources -import src.keycloak.schemas as schemas +import users.config as config +import users.keycloak.resources as resources +import users.keycloak.schemas as schemas from fastapi import HTTPException, status -from src import logger +from users import logger class AuthData(TypedDict): diff --git a/users/src/keycloak/resources.py b/users/users/keycloak/resources.py similarity index 96% rename from users/src/keycloak/resources.py rename to users/users/keycloak/resources.py index a6b334893..bf07f4c94 100644 --- a/users/src/keycloak/resources.py +++ b/users/users/keycloak/resources.py @@ -4,7 +4,7 @@ from string import Template from urllib.parse import urljoin -from src.config import KEYCLOAK_ENDPOINT +from users.config import KEYCLOAK_ENDPOINT def join_paths(*args: str) -> str: diff --git a/users/src/keycloak/schemas.py b/users/users/keycloak/schemas.py similarity index 100% rename from users/src/keycloak/schemas.py rename to users/users/keycloak/schemas.py diff --git a/users/src/keycloak/utils.py b/users/users/keycloak/utils.py similarity index 78% rename from users/src/keycloak/utils.py rename to users/users/keycloak/utils.py index 46e85bf91..b2b77b835 100644 --- a/users/src/keycloak/utils.py +++ b/users/users/keycloak/utils.py @@ -1,7 +1,7 @@ from typing import Any, Dict, List, Optional, Union -import src.keycloak.schemas as schemas -from src.schemas import Users +import users.keycloak.schemas as schemas +from users.schemas import Users def create_filters(users: Users) -> Dict[str, Union[str, None]]: diff --git a/users/src/logger.py b/users/users/logger.py similarity index 100% rename from users/src/logger.py rename to users/users/logger.py diff --git a/users/src/main.py b/users/users/main.py similarity index 97% rename from users/src/main.py rename to users/users/main.py index 34be85d1f..122587c09 100644 --- a/users/src/main.py +++ b/users/users/main.py @@ -2,24 +2,24 @@ import aiohttp import pydantic -import src.config as conf -import src.keycloak.query as kc_query -import src.keycloak.schemas as kc_schemas -import src.keycloak.utils as kc_utils +import users.config as conf +import users.keycloak.query as kc_query +import users.keycloak.schemas as kc_schemas +import users.keycloak.utils as kc_utils from aiohttp.web_exceptions import HTTPException as AIOHTTPException from apscheduler.schedulers.background import BackgroundScheduler from email_validator import EmailNotValidError, validate_email from fastapi import Depends, FastAPI, Header, HTTPException, Query, Request from fastapi.responses import JSONResponse from fastapi.security import OAuth2PasswordRequestForm -from src import s3, utils -from src.config import ( +from users import s3, utils +from users.config import ( KEYCLOAK_ROLE_ADMIN, KEYCLOAK_USERS_PUBLIC_KEY, ROOT_PATH, ) -from src.logger import Logger -from src.schemas import Users +from users.logger import Logger +from users.schemas import Users from tenant_dependency import TenantData, get_tenant_info from urllib3.exceptions import MaxRetryError diff --git a/users/src/s3.py b/users/users/s3.py similarity index 98% rename from users/src/s3.py rename to users/users/s3.py index 3f138b541..9f5d32d23 100644 --- a/users/src/s3.py +++ b/users/users/s3.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Optional from minio import Minio, credentials -from src import config, logger +from users import config, logger class S3Providers(str, enum.Enum): diff --git a/users/src/schemas.py b/users/users/schemas.py similarity index 100% rename from users/src/schemas.py rename to users/users/schemas.py diff --git a/users/src/utils.py b/users/users/utils.py similarity index 98% rename from users/src/utils.py rename to users/users/utils.py index d7b980f84..7a19410b0 100644 --- a/users/src/utils.py +++ b/users/users/utils.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional from minio import Minio -from src import config +from users import config def extract_idp_data_needed( From a88b4fedde11d40c6a9a1a6050ad5194e1f2ae4c Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 13:41:14 +0400 Subject: [PATCH 07/22] fix kafka config --- dev_runner/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/dev_runner/docker-compose.yml b/dev_runner/docker-compose.yml index f7478f6fe..a67eecdc2 100644 --- a/dev_runner/docker-compose.yml +++ b/dev_runner/docker-compose.yml @@ -63,6 +63,7 @@ services: environment: KAFKA_ADVERTISED_HOST_NAME: localhost KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 # KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'false' depends_on: - zookeeper From 67e78018ff3407b487374c3b853e4c813f583a30 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 16:46:40 +0400 Subject: [PATCH 08/22] feat: add readme --- dev_runner/README.md | 46 ++++++++++++++++ dev_runner/collect_requirements.sh | 21 ++++++- dev_runner/pyproject.toml | 88 ++++++++++++++++-------------- 3 files changed, 112 insertions(+), 43 deletions(-) diff --git a/dev_runner/README.md b/dev_runner/README.md index e69de29bb..e8c2751f6 100644 --- a/dev_runner/README.md +++ b/dev_runner/README.md @@ -0,0 +1,46 @@ +# Local Dev Runner + +This is a subproject for BadgerDoc to run a local development environment. + +## How to Use + +### Install Python Dependencies +Use poetry to install dependencies: +```bash + poetry install +``` + +If you have problems with dependencies, you can try to update them. Initially, install [poetry export plugin](https://pypi.org/project/poetry-plugin-export/). +And then run the following command: +```bash + ./collect_requirements.sh +``` + + +On Windows and Mac you may need one extra package: +```bash + pip install python-magic-bin +``` +And for Mac additionally: +```bash + brew install libmagic +``` + +### External Dependencies +There is a row of external dependencies, to run them you need to use docker-compose: + ```bash + docker-compose up + ``` + +### Run the BadgerDoc + +> TODO: DB migrations + +To run the all the services you need to run the following command: + ```bash + python start.py + ``` +Or you can run only the services you need (see help for more information): +```bash +python start.py annotation users + ``` \ No newline at end of file diff --git a/dev_runner/collect_requirements.sh b/dev_runner/collect_requirements.sh index e46894716..a01a6164f 100755 --- a/dev_runner/collect_requirements.sh +++ b/dev_runner/collect_requirements.sh @@ -8,13 +8,15 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) POETRY_SERVICES=(search annotation convert models processing taxonomy) PIPENV_SERVICES=(assets) PIP_SERVICES=(jobs pipelines scheduler users) -DUPLICATED_DEPENDENCIES=(starlette fastapi aiohttp sqlalchemy_utils sqlalchemy aiosignal alembic anyio asgiref attrs boto3 botocore cachetools certifi cffi charset-normalizer click colorama cryptography h11 idna importlib-metadata importlib-resources jmespath mako markupsafe pillow psycopg2-binary pycparser pydantic pyjwt python-dotenv pyyaml requests s3transfer setuptools typing-extensions urllib3 uvicorn yarl zipp frozenlist minio multidict sniffio aiokafka websocket-client) +DUPLICATED_DEPENDENCIES=(starlette fastapi aiohttp sqlalchemy_utils sqlalchemy aiosignal alembic) +PACKAGES_TO_KEEP=(dnspython pytz pytz-deprecation-shim requests-oauthlib rfc3986 sqlalchemy-filters tzdata tzlocal) collect_poetry_dependencies() { for poetry_service in "${POETRY_SERVICES[@]}"; do cd "$ROOT_DIR/$poetry_service" || exit # ensure that you have https://pypi.org/project/poetry-plugin-export/ installed poetry export -f requirements.txt --without-hashes | cut -d \; -f 1 >> "$TMP_REQUIREMENTS_FILE" + cd "$SCRIPT_DIR" || exit done } @@ -22,6 +24,7 @@ collect_pipenv_dependencies() { for pipenv_service in "${PIPENV_SERVICES[@]}"; do cd "$ROOT_DIR/$pipenv_service" || exit pipenv requirements | tail -n +2 | cut -d \; -f 1 >> "$TMP_REQUIREMENTS_FILE" + cd "$SCRIPT_DIR" || exit done } @@ -31,21 +34,35 @@ collect_pip_dependencies() { if [ -f requirements.txt ]; then cat requirements.txt | cut -d \; -f 1 >> "$TMP_REQUIREMENTS_FILE" fi + cd "$SCRIPT_DIR" || exit done } + +# remove all dependencies +cd "$SCRIPT_DIR" || exit +all_packages=$(poetry show | cut -d' ' -f1) +for package in "${PACKAGES_TO_KEEP[@]}"; do + all_packages=$(echo "$all_packages" | grep -v "$package") +done +echo $all_packages | xargs poetry remove + + +# collect new dependencies collect_poetry_dependencies collect_pipenv_dependencies collect_pip_dependencies + cd "$SCRIPT_DIR" || exit requirementes=$(cat "$TMP_REQUIREMENTS_FILE") for dependency in "${DUPLICATED_DEPENDENCIES[@]}"; do requirementes=$(echo "$requirementes" | grep -v "$dependency") done + echo $requirementes | xargs poetry -v add for dependency in "${DUPLICATED_DEPENDENCIES[@]}"; do poetry add "$dependency"=="*" done -poetry add ../lib/tenants ../lib/filter_lib python-magic-bin +poetry add ../lib/tenants ../lib/filter_lib diff --git a/dev_runner/pyproject.toml b/dev_runner/pyproject.toml index 2a6c08993..774cd1f8d 100644 --- a/dev_runner/pyproject.toml +++ b/dev_runner/pyproject.toml @@ -8,21 +8,63 @@ packages = [{include = "dev_runner"}] [tool.poetry.dependencies] python = "^3.9" +requests-oauthlib = "1.3.1" +rfc3986 = {version = "1.5.0", extras = ["idna2008"]} aiocache = "0.11.1" +aiokafka = "0.7.2" +asgiref = "3.5.0" async-timeout = "4.0.2" +attrs = "22.2.0" +boto3 = "1.20.54" +botocore = "1.23.54" +certifi = "2021.10.8" +cffi = "1.15.1" +charset-normalizer = "2.0.12" +click = "8.0.3" +colorama = "0.4.6" +cryptography = "36.0.0" elasticsearch = {version = "7.13.4", extras = ["async"]} +frozenlist = "1.3.3" +h11 = "0.13.0" +idna = "3.3" +jmespath = "0.10.0" kafka-python = "2.0.2" +multidict = "6.0.4" +pycparser = "2.21" +pydantic = "1.8.2" python-dateutil = "2.8.2" +python-dotenv = "0.19.1" +pyyaml = "6.0" +s3transfer = "0.5.1" six = "1.16.0" +typing-extensions = "4.1.1" +urllib3 = ">=1.26.8,<1.27.0" +uvicorn = "0.15.0" +yarl = "1.8.2" +cachetools = "5.2.0" +importlib-metadata = "4.11.0" +importlib-resources = "5.4.0" +mako = "1.1.6" +markupsafe = "2.0.1" +psycopg2-binary = "2.9.1" +requests = "2.26.0" +zipp = "3.7.0" +anyio = "3.5.0" +boto3-stubs = "1.26.64" +botocore-stubs = "1.29.64" chardet = "4.0.0" +minio = "7.1.0" mypy-extensions = "0.4.4" pdfminer-six = "20200517" pdfplumber = "0.5.28" +pillow = "9.0.1" pycryptodome = "3.17" pymupdf-fonts = "1.0.5" pymupdf = "1.21.1" +sniffio = "1.2.0" sortedcontainers = "2.4.0" types-awscrt = "0.16.4" +types-s3transfer = "0.6.0.post5" wand = "0.6.11" bcrypt = "4.0.1" google-auth = "2.15.0" @@ -31,11 +73,16 @@ oauthlib = "3.2.2" paramiko = "2.12.0" pyasn1-modules = "0.2.8" pyasn1 = "0.4.8" +pyjwt = {version = "2.3.0", extras = ["crypto"]} pynacl = "1.5.0" python-multipart = "0.0.5" rsa = "4.9" +setuptools = ">=60.5.0,<60.6.0" +websocket-client = "1.4.2" async-cache = "1.1.1" numpy = "1.24.1" +types-requests = "2.28.11.7" +types-urllib3 = "1.26.25.4" httpcore = "0.16.2" httpx = "0.23.1" exceptiongroup = "1.0.4" @@ -47,7 +94,6 @@ pytest = "7.2.0" python-magic = "0.4.25" tomli = "2.0.1" sqlalchemy-utils = "*" -pyjwt = "*" email-validator = "1.1.3" apscheduler = "3.9.1" starlette = "*" @@ -56,51 +102,11 @@ aiohttp = "*" sqlalchemy = "*" aiosignal = "*" alembic = "*" -anyio = "*" -asgiref = "*" -attrs = "*" -boto3 = "*" -botocore = "*" -cachetools = "*" -certifi = "*" -cffi = "*" -charset-normalizer = "*" -click = "*" -colorama = "*" -cryptography = "*" -h11 = "*" -idna = "*" -importlib-metadata = "*" -importlib-resources = "*" -jmespath = "*" -mako = "*" -markupsafe = "*" -pillow = "*" -psycopg2-binary = "*" -pycparser = "*" -pydantic = "*" -python-dotenv = "*" -pyyaml = "*" -requests = "*" -s3transfer = "*" -setuptools = "*" -typing-extensions = "*" -urllib3 = "*" -uvicorn = "*" -yarl = "*" -zipp = "*" -frozenlist = "*" -minio = "*" -multidict = "*" -sniffio = "*" -aiokafka = "*" -websocket-client = "*" tenant-dependency = {path = "../lib/tenants"} filter-lib = {path = "../lib/filter_lib"} python-magic-bin = "^0.4.14" - [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" From 3fec8ce33dceb82e7130f4855c1316e4e248dc30 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 17:06:01 +0400 Subject: [PATCH 09/22] refactor: formatting --- annotation/.pre-commit-config.yaml | 2 +- annotation/tests/test_annotators_overall_load.py | 3 ++- annotation/tests/test_assets_communication.py | 12 +++++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/annotation/.pre-commit-config.yaml b/annotation/.pre-commit-config.yaml index 9e7cf1fb3..947a49c76 100644 --- a/annotation/.pre-commit-config.yaml +++ b/annotation/.pre-commit-config.yaml @@ -1,7 +1,7 @@ fail_fast: true repos: - repo: https://github.com/pycqa/isort - rev: 5.9.2 + rev: 5.12.0 hooks: - id: isort args: diff --git a/annotation/tests/test_annotators_overall_load.py b/annotation/tests/test_annotators_overall_load.py index c10a891ee..972e14d77 100644 --- a/annotation/tests/test_annotators_overall_load.py +++ b/annotation/tests/test_annotators_overall_load.py @@ -465,7 +465,8 @@ def test_overall_load_after_distribution( monkeypatch, prepare_db_for_overall_load ): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=[{"id": 3, "pages": 4}]), ) response = client.post( diff --git a/annotation/tests/test_assets_communication.py b/annotation/tests/test_assets_communication.py index 1cb4e8dd4..fa51d52c9 100644 --- a/annotation/tests/test_assets_communication.py +++ b/annotation/tests/test_assets_communication.py @@ -2,6 +2,9 @@ import pytest import responses +from fastapi import HTTPException +from requests import ConnectionError, RequestException, Timeout + from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ASSETS_URL, @@ -10,9 +13,6 @@ get_file_path_and_bucket, get_files_info, ) -from fastapi import HTTPException -from requests import ConnectionError, RequestException, Timeout - from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, TEST_TOKEN FILES = [ @@ -126,7 +126,8 @@ def test_get_file_names( monkeypatch, file_ids, parsed_response, expected_result ): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=parsed_response), ) @@ -215,7 +216,8 @@ def test_get_files_info( expected_result, ): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=mocked_files), ) for i, dataset_id in enumerate(dataset_ids): From d83d88c5e42e0dfb59e89fbd86d64f965b76dc8d Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 17:06:50 +0400 Subject: [PATCH 10/22] refactor: formatting --- convert/.pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert/.pre-commit-config.yaml b/convert/.pre-commit-config.yaml index 9e7cf1fb3..947a49c76 100644 --- a/convert/.pre-commit-config.yaml +++ b/convert/.pre-commit-config.yaml @@ -1,7 +1,7 @@ fail_fast: true repos: - repo: https://github.com/pycqa/isort - rev: 5.9.2 + rev: 5.12.0 hooks: - id: isort args: From 041eb16c838d224efa6d1322f83c091be56512e0 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 17:09:33 +0400 Subject: [PATCH 11/22] refactor: formatting --- lib/filter_lib/.pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/filter_lib/.pre-commit-config.yaml b/lib/filter_lib/.pre-commit-config.yaml index 60a8973cf..289d36f82 100644 --- a/lib/filter_lib/.pre-commit-config.yaml +++ b/lib/filter_lib/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/pycqa/isort - rev: 5.9.1 + rev: 5.12.0 hooks: - id: isort args: From 62ed314cc78fe598a6379110d933d4e89f77b826 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 17:13:44 +0400 Subject: [PATCH 12/22] refactor: formatting --- annotation/alembic/env.py | 8 +- ...72a5043_add_categories_editor_url_data_.py | 8 +- ..._alter_categories_id_integer_to_varchar.py | 28 +-- ...3136551008d8_agreement_metrics_relation.py | 4 +- .../36bff2d016f7_expand_validationschema.py | 16 +- .../versions/3a083a1fbba0_first_revision.py | 24 +-- .../versions/4272d0a43ff1_agreement_score.py | 8 +- .../66cd6054c2d0_add_categories_tree.py | 8 +- ...8e6343_add_extensive_coverage_parameter.py | 6 +- .../7cc1ed83c309_compare_agreement_scores.py | 4 +- ...6b8ebe84_remove_annotateddoc_constraint.py | 12 +- ..._category_change_editor_data_attributes.py | 4 +- .../cf633ca94498_add_statuses_to_job.py | 4 +- ...963_drop_is_link_add_type_to_categories.py | 16 +- annotation/annotation/annotations/main.py | 60 ++---- .../annotation/annotations/resources.py | 76 +++----- annotation/annotation/categories/resources.py | 21 +- annotation/annotation/categories/services.py | 50 ++--- annotation/annotation/distribution/main.py | 74 ++----- .../annotation/distribution/resources.py | 32 ++- annotation/annotation/errors.py | 19 +- annotation/annotation/jobs/resources.py | 71 +++---- annotation/annotation/jobs/services.py | 43 ++-- annotation/annotation/main.py | 20 +- .../assets_communication.py | 17 +- .../jobs_communication.py | 9 +- .../microservice_communication/search.py | 4 +- annotation/annotation/models.py | 33 ++-- annotation/annotation/schemas/annotations.py | 36 +--- annotation/annotation/schemas/categories.py | 7 +- annotation/annotation/schemas/jobs.py | 19 +- annotation/annotation/schemas/tasks.py | 31 +-- annotation/annotation/tasks/resources.py | 74 +++---- annotation/annotation/tasks/services.py | 72 ++----- annotation/annotation/tasks/validation.py | 38 ++-- annotation/annotation/utils.py | 2 +- annotation/documentation/update_docs.py | 5 +- annotation/tests/conftest.py | 49 ++--- .../tests/test_annotators_overall_load.py | 53 ++--- annotation/tests/test_assets_communication.py | 14 +- annotation/tests/test_category_crud.py | 183 +++++------------- annotation/tests/test_cross_validation.py | 4 +- annotation/tests/test_delete_batch_tasks.py | 8 +- annotation/tests/test_distribution.py | 12 +- annotation/tests/test_finish_task.py | 35 +--- .../tests/test_get_accumulated_revisions.py | 4 +- ..._get_annotation_for_particular_revision.py | 8 +- annotation/tests/test_get_child_categories.py | 8 +- annotation/tests/test_get_job.py | 12 +- annotation/tests/test_get_job_files.py | 10 +- .../tests/test_get_jobs_info_by_files.py | 4 +- annotation/tests/test_get_pages_info.py | 5 +- annotation/tests/test_get_revisions.py | 4 +- .../test_get_revisions_without_annotation.py | 4 +- annotation/tests/test_job_categories.py | 23 +-- annotation/tests/test_microservices_search.py | 8 +- annotation/tests/test_post.py | 40 +--- annotation/tests/test_post_annotation.py | 90 +++------ annotation/tests/test_post_job.py | 13 +- .../tests/test_post_unassgined_files.py | 8 +- annotation/tests/test_start_job.py | 16 +- annotation/tests/test_tasks_crud_cr.py | 115 +++-------- annotation/tests/test_tasks_crud_ud.py | 13 +- annotation/tests/test_update_job.py | 38 +--- annotation/tests/test_validation.py | 14 +- assets/alembic/env.py | 11 +- .../versions/afa33cc83d57_new_fields.py | 10 +- assets/assets/db/models.py | 19 +- assets/assets/db/service.py | 49 ++--- assets/assets/db/utils.py | 2 +- assets/assets/routers/bonds_router.py | 13 +- assets/assets/routers/datasets_router.py | 21 +- assets/assets/routers/files_router.py | 18 +- assets/assets/routers/minio_router.py | 33 +--- assets/assets/routers/s3_router.py | 8 +- assets/assets/utils/common_utils.py | 13 +- assets/assets/utils/convert_service_utils.py | 9 +- assets/assets/utils/minio_utils.py | 41 +--- assets/assets/utils/s3_utils.py | 17 +- assets/tests/conftest.py | 32 +-- assets/tests/test_helpers.py | 10 +- assets/tests/test_main.py | 89 +++------ assets/tests/test_utils.py | 20 +- .../minio_service/minio_service/minio_api.py | 24 +-- common/minio_service/setup.py | 52 ++--- common/model_api/example/__init__.py | 2 +- common/model_api/example/__main__.py | 17 +- common/model_api/model_api/common/models.py | 4 +- common/model_api/model_api/preprocessing.py | 12 +- .../model_api/model_api/storage_exchange.py | 8 +- common/model_api/model_api/utils.py | 12 +- common/model_api/tests/test_api.py | 12 +- common/model_api/tests/test_preprocessing.py | 12 +- common/model_api/tests/test_smoke.py | 10 +- .../page_rendering/page_rendering.py | 4 +- convert/convert/coco_export/convert.py | 24 +-- convert/convert/coco_import/convert.py | 16 +- convert/convert/coco_import/import_service.py | 11 +- convert/convert/config.py | 9 +- .../badgerdoc_format/annotation_converter.py | 13 +- .../annotation_converter_practic.py | 8 +- .../badgerdoc_format/pdf_renderer.py | 14 +- .../badgerdoc_format/plain_text_converter.py | 4 +- .../badgerdoc_to_label_studio_use_case.py | 19 +- .../label_studio_to_badgerdoc_use_case.py | 88 +++------ .../labelstudio_format/label_studio_format.py | 15 +- .../models/bd_tokens_model.py | 1 + convert/convert/models/coco.py | 4 +- convert/convert/routers/coco.py | 17 +- convert/convert/routers/label_studio.py | 9 +- convert/convert/routers/text.py | 7 +- convert/convert/utils/json_utils.py | 12 +- convert/convert/utils/render_pdf_page.py | 13 +- convert/convert/utils/s3_utils.py | 29 ++- .../tests/test_label_studio/test_export.py | 15 +- .../tests/test_label_studio/test_import.py | 8 +- .../test_label_studio/test_text_wrapper.py | 5 +- .../dev_runner/runners/annotation_runner.py | 5 +- .../dev_runner/runners/assets_runner.py | 7 +- dev_runner/dev_runner/runners/base_runner.py | 23 ++- .../dev_runner/runners/convert_runner.py | 7 +- .../dev_runner/runners/scheduler_runner.py | 7 +- dev_runner/dev_runner/runners/users_runner.py | 5 +- dev_runner/start.py | 14 +- jobs/alembic/env.py | 12 +- jobs/alembic/versions/3f5b2d199d38_.py | 4 +- jobs/alembic/versions/7511c6790067_.py | 12 +- jobs/alembic/versions/9229e70d2791_.py | 12 +- ...add_start_manual_job_automatically_flag.py | 12 +- ...0dd492b17f_add_extensive_coverage_param.py | 10 +- jobs/jobs/create_job_funcs.py | 8 +- jobs/jobs/db_service.py | 4 +- jobs/jobs/main.py | 7 +- jobs/jobs/schemas.py | 64 ++---- jobs/jobs/utils.py | 31 +-- jobs/tests/conftest.py | 11 +- ...t_ExtractionWithAnnotationJob_workflows.py | 33 +--- .../test_args_validation.py | 36 +--- .../test_change_job-proxy_to_annotation.py | 12 +- .../test_API_functions/test_change_job.py | 25 +-- .../test_API_functions/test_create_job.py | 8 +- .../test_other_API_functions.py | 16 +- .../test_API_functions/test_search_jobs.py | 18 +- jobs/tests/test_db.py | 12 +- jobs/tests/test_utils.py | 40 +--- lib/filter_lib/src/dict_parser.py | 4 +- lib/filter_lib/src/enum_generator.py | 4 +- lib/filter_lib/src/query_modificator.py | 28 +-- lib/filter_lib/src/schema_generator.py | 8 +- lib/filter_lib/tests/test_dict_parser.py | 12 +- lib/filter_lib/tests/test_enum_generator.py | 12 +- lib/filter_lib/tests/test_pagination.py | 4 +- lib/filter_lib/tests/test_query_modifier.py | 8 +- lib/filter_lib/tests/test_schema_generator.py | 10 +- lib/filter_lib/usage_example/app.py | 13 +- lib/tenants/src/dependency.py | 24 +-- lib/tenants/tests/conftest.py | 16 +- lib/tenants/tests/test_dependency_rs256.py | 4 +- lib/tenants/tests/test_schema.py | 4 +- models/alembic/env.py | 10 +- ...4fd362de_add_description_field_to_model.py | 4 +- .../5c3092bc3517_add_columns_to_basement.py | 8 +- ...a_added_archive_field_to_training_model.py | 4 +- .../versions/683f401ed33e_create_tables.py | 8 +- .../826680104247_pod_limits_column.py | 7 +- .../8fd15e9edd28_pod_cpu_limits_change.py | 6 +- ...eff4c79fd3_modify_basement_and_training.py | 4 +- ...add_latest_and_version_columns_to_model.py | 4 +- ...85a73c2_set_basement_concurrency_limits.py | 6 +- ...9f68f00d4_add_field_type_to_table_model.py | 4 +- models/models/colab_ssh_utils.py | 9 +- models/models/crud.py | 14 +- models/models/errors.py | 16 +- models/models/routers/basements_routers.py | 9 +- .../models/routers/deployed_models_routers.py | 5 +- models/models/routers/models_routers.py | 46 ++--- models/models/routers/training_routers.py | 25 +-- models/models/schemas.py | 23 +-- models/models/utils.py | 15 +- models/tests/conftest.py | 8 +- models/tests/test_basement_routers.py | 18 +- models/tests/test_colab_start_training.py | 28 +-- models/tests/test_crud.py | 8 +- models/tests/test_models_routers.py | 16 +- models/tests/test_schemas.py | 16 +- models/tests/test_trainings_routers.py | 10 +- models/tests/test_utils.py | 21 +- models/tests/utils.py | 4 +- pipelines/alembic/env.py | 10 +- ...5e65cf34b_fix_default_type_to_inference.py | 8 +- pipelines/alembic/versions/29f072fb5c9c_.py | 4 +- .../alembic/versions/5fd9d1fdcf5b_init.py | 4 +- ..._add_original_pipeline_id_and_is_latest.py | 6 +- ...69_add_type_description_and_summary_to_.py | 8 +- ...aebbddd8_change_pipeline_version_to_int.py | 14 +- ...dd_parent_step_and_tenant_to_execution_.py | 19 +- pipelines/pipelines/app.py | 25 +-- pipelines/pipelines/config.py | 3 +- pipelines/pipelines/db/logger.py | 23 +-- pipelines/pipelines/db/models.py | 16 +- pipelines/pipelines/db/service.py | 37 +--- pipelines/pipelines/execution.py | 71 +++---- pipelines/pipelines/http_utils.py | 9 +- pipelines/pipelines/kafka_utils.py | 5 +- pipelines/pipelines/pipeline_runner.py | 8 +- pipelines/pipelines/result_processing.py | 19 +- pipelines/pipelines/schemas.py | 27 +-- pipelines/pipelines/service_token.py | 8 +- pipelines/tests/conftest.py | 8 +- pipelines/tests/db/test_logger.py | 6 +- pipelines/tests/db/test_service.py | 49 ++--- pipelines/tests/test_app.py | 12 +- pipelines/tests/test_execution.py | 32 +-- pipelines/tests/test_http_utils.py | 4 +- pipelines/tests/test_result_processing.py | 34 +--- pipelines/tests/test_schemas.py | 16 +- pipelines/tests/testing_data.py | 3 +- processing/alembic/env.py | 12 +- .../processing/health_check_easy_ocr.py | 13 +- processing/processing/main.py | 23 +-- processing/processing/schema.py | 4 +- .../processing/send_preprocess_results.py | 6 +- processing/processing/tasks.py | 15 +- processing/processing/text_merge.py | 5 +- .../processing/third_party_code/table.py | 12 +- processing/processing/utils/aiohttp_utils.py | 9 +- processing/processing/utils/logger.py | 3 +- processing/processing/utils/minio_utils.py | 11 +- processing/processing/utils/utils.py | 15 +- .../tests/integration/test_integration.py | 32 +-- processing/tests/test_text_merge.py | 12 +- processing/tests/test_utils/test_utils.py | 25 +-- scheduler/alembic/env.py | 11 +- scheduler/alembic/versions/0cadbdb7f0ea_.py | 7 +- scheduler/alembic/versions/449be82736bd_.py | 5 +- scheduler/scheduler/app.py | 5 +- scheduler/scheduler/db/models.py | 8 +- scheduler/scheduler/db/service.py | 17 +- scheduler/scheduler/heartbeat.py | 19 +- scheduler/scheduler/runner.py | 9 +- scheduler/tests/test_heartbeat.py | 8 +- scheduler/tests/test_service.py | 4 +- search/search/config.py | 4 +- search/search/es.py | 21 +- search/search/harvester.py | 16 +- search/search/main.py | 19 +- search/search/schemas/facets.py | 39 +--- search/search/schemas/pieces.py | 45 ++--- search/tests/conftest.py | 16 +- search/tests/test_facets.py | 18 +- search/tests/test_get.py | 19 +- search/tests/test_harvester.py | 19 +- search/tests/test_indexation_endpoint.py | 12 +- search/tests/test_pieces.py | 42 +--- ...ecbed_add_association_taxonomy_category.py | 8 +- .../versions/bdea8a93cafe_first_revision.py | 16 +- taxonomy/documentation/update_docs.py | 5 +- taxonomy/taxonomy/errors.py | 4 +- taxonomy/taxonomy/schemas/taxon.py | 5 +- taxonomy/taxonomy/schemas/taxonomy.py | 8 +- taxonomy/taxonomy/taxon/services.py | 29 +-- taxonomy/taxonomy/taxonomy/resources.py | 29 +-- taxonomy/taxonomy/taxonomy/services.py | 13 +- taxonomy/tests/conftest.py | 20 +- taxonomy/tests/test_taxon_crud.py | 58 ++---- users/tests/keycloak/test_query.py | 8 +- users/tests/test_main.py | 72 ++----- users/users/config.py | 4 +- users/users/keycloak/query.py | 28 +-- users/users/keycloak/resources.py | 4 +- users/users/keycloak/schemas.py | 2 + users/users/main.py | 40 ++-- 272 files changed, 1513 insertions(+), 3533 deletions(-) diff --git a/annotation/alembic/env.py b/annotation/alembic/env.py index 79acffc7c..19c1480c4 100644 --- a/annotation/alembic/env.py +++ b/annotation/alembic/env.py @@ -1,11 +1,11 @@ import os from logging.config import fileConfig +from annotation.database import SQLALCHEMY_DATABASE_URL +from annotation.utils import get_test_db_url from sqlalchemy import engine_from_config, pool from alembic import context # type: ignore -from annotation.database import SQLALCHEMY_DATABASE_URL -from annotation.utils import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -30,9 +30,7 @@ if not os.getenv("USE_TEST_DB"): config.set_main_option("sqlalchemy.url", SQLALCHEMY_DATABASE_URL) else: - config.set_main_option( - "sqlalchemy.url", get_test_db_url(SQLALCHEMY_DATABASE_URL) - ) + config.set_main_option("sqlalchemy.url", get_test_db_url(SQLALCHEMY_DATABASE_URL)) def run_migrations_offline(): diff --git a/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py b/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py index f48b2e818..5dac3a82e 100644 --- a/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py +++ b/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py @@ -18,14 +18,10 @@ def upgrade(): - op.add_column( - "categories", sa.Column("editor_url", sa.VARCHAR(), nullable=True) - ) + op.add_column("categories", sa.Column("editor_url", sa.VARCHAR(), nullable=True)) op.add_column( "categories", - sa.Column( - "data_attributes", postgresql.ARRAY(sa.VARCHAR()), nullable=True - ), + sa.Column("data_attributes", postgresql.ARRAY(sa.VARCHAR()), nullable=True), ) diff --git a/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py b/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py index e01797b0b..896fd34be 100644 --- a/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py +++ b/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py @@ -20,9 +20,7 @@ def upgrade(): - op.drop_constraint( - "categories_parent_fkey", "categories", type_="foreignkey" - ) + op.drop_constraint("categories_parent_fkey", "categories", type_="foreignkey") op.drop_constraint( "association_jobs_categories_category_id_fkey", "association_jobs_categories", @@ -85,18 +83,14 @@ def upgrade(): new_column_name="parent", server_default="null", ) - op.alter_column( - "categories", "id_temp", new_column_name="id", nullable=False - ) + op.alter_column("categories", "id_temp", new_column_name="id", nullable=False) op.alter_column( "association_jobs_categories", "category_id_temp", new_column_name="category_id", nullable=False, ) - op.create_check_constraint( - "is_not_self_parent", "categories", "id != parent" - ) + op.create_check_constraint("is_not_self_parent", "categories", "id != parent") op.create_index( op.f("ix_categories_parent"), "categories", @@ -114,9 +108,7 @@ def check_exist_sequence(): def clear_tables(): conn = op.get_bind() inspector = Inspector.from_engine(conn) - tables = [ - data[0] for data in inspector.get_sorted_table_and_fkc_names()[-2::-1] - ] + tables = [data[0] for data in inspector.get_sorted_table_and_fkc_names()[-2::-1]] tables.remove("alembic_version") for table in tables: conn.execute(f"DELETE FROM {table}") @@ -126,9 +118,7 @@ def downgrade(): clear_tables() if not check_exist_sequence(): op.execute(sa.schema.CreateSequence(categories_id_seq)) - op.drop_constraint( - "categories_parent_fkey", "categories", type_="foreignkey" - ) + op.drop_constraint("categories_parent_fkey", "categories", type_="foreignkey") op.drop_constraint( "association_jobs_categories_category_id_fkey", "association_jobs_categories", @@ -185,18 +175,14 @@ def downgrade(): "parent_temp", new_column_name="parent", ) - op.alter_column( - "categories", "id_temp", new_column_name="id", nullable=False - ) + op.alter_column("categories", "id_temp", new_column_name="id", nullable=False) op.alter_column( "association_jobs_categories", "category_id_temp", new_column_name="category_id", nullable=False, ) - op.create_check_constraint( - "is_not_self_parent", "categories", "id != parent" - ) + op.create_check_constraint("is_not_self_parent", "categories", "id != parent") op.create_index( op.f("ix_categories_parent"), "categories", diff --git a/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py b/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py index 2fbb85886..bb3a3ebb0 100644 --- a/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py +++ b/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py @@ -34,9 +34,7 @@ def downgrade(): nullable=False, ), sa.Column("job_id", sa.INTEGER(), autoincrement=False, nullable=False), - sa.Column( - "task_id", sa.INTEGER(), autoincrement=False, nullable=False - ), + sa.Column("task_id", sa.INTEGER(), autoincrement=False, nullable=False), sa.Column( "agreement_score", postgresql.JSONB(astext_type=sa.Text()), diff --git a/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py b/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py index 44927752b..a36f49140 100644 --- a/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py +++ b/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py @@ -75,9 +75,7 @@ def upgrade(): ondelete="cascade", ) - op.drop_constraint( - JOB_OWNER_JOB_ID_FK, "association_job_owner", type_="foreignkey" - ) + op.drop_constraint(JOB_OWNER_JOB_ID_FK, "association_job_owner", type_="foreignkey") op.drop_constraint( JOB_OWNER_USER_ID_FK, "association_job_owner", type_="foreignkey" ) @@ -148,9 +146,7 @@ def downgrade(): Remove validation_only job type and rollback fk constraints """ - op.alter_column( - "jobs", "validation_type", type_=sa.VARCHAR(), server_default=None - ) + op.alter_column("jobs", "validation_type", type_=sa.VARCHAR(), server_default=None) op.execute("DROP TYPE validation_type;") op.execute("DELETE FROM jobs " "WHERE validation_type = 'validation_only'") @@ -165,9 +161,7 @@ def downgrade(): ) op.drop_constraint(TASKS_JOB_ID_FK, "tasks", type_="foreignkey") - op.create_foreign_key( - TASKS_JOB_ID_FK, "tasks", "jobs", ["job_id"], ["job_id"] - ) + op.create_foreign_key(TASKS_JOB_ID_FK, "tasks", "jobs", ["job_id"], ["job_id"]) op.drop_constraint( JOB_VALIDATOR_USER_ID_FK, @@ -197,9 +191,7 @@ def downgrade(): op.drop_constraint( JOB_OWNER_USER_ID_FK, "association_job_owner", type_="foreignkey" ) - op.drop_constraint( - JOB_OWNER_JOB_ID_FK, "association_job_owner", type_="foreignkey" - ) + op.drop_constraint(JOB_OWNER_JOB_ID_FK, "association_job_owner", type_="foreignkey") op.create_foreign_key( JOB_OWNER_USER_ID_FK, "association_job_owner", diff --git a/annotation/alembic/versions/3a083a1fbba0_first_revision.py b/annotation/alembic/versions/3a083a1fbba0_first_revision.py index 0020e61d2..6ec1fd6bb 100644 --- a/annotation/alembic/versions/3a083a1fbba0_first_revision.py +++ b/annotation/alembic/versions/3a083a1fbba0_first_revision.py @@ -29,9 +29,7 @@ def upgrade(): if "annotators" not in tables: op.create_table( "annotators", - sa.Column( - "user_id", postgresql.UUID(as_uuid=True), nullable=False - ), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("default_load", sa.INTEGER(), nullable=False), sa.PrimaryKeyConstraint("user_id"), ) # Here and below this 'if' condition creates tables only for new db @@ -48,9 +46,7 @@ def upgrade(): nullable=True, ), sa.Column("is_link", sa.BOOLEAN(), nullable=False), - sa.ForeignKeyConstraint( - ["parent"], ["categories.id"], ondelete="cascade" - ), + sa.ForeignKeyConstraint(["parent"], ["categories.id"], ondelete="cascade"), sa.PrimaryKeyConstraint("id"), ) op.create_check_constraint( @@ -91,12 +87,8 @@ def upgrade(): ), sa.Column("file_id", sa.INTEGER(), nullable=False), sa.Column("job_id", sa.INTEGER(), nullable=False), - sa.Column( - "pages", postgresql.JSON(astext_type=sa.Text()), nullable=False - ), - sa.Column( - "validated", postgresql.ARRAY(sa.INTEGER()), nullable=False - ), + sa.Column("pages", postgresql.JSON(astext_type=sa.Text()), nullable=False), + sa.Column("validated", postgresql.ARRAY(sa.INTEGER()), nullable=False), sa.Column("tenant", sa.VARCHAR(), nullable=False), sa.CheckConstraint( '("user" IS NULL AND pipeline IS NOT NULL) OR ' @@ -111,9 +103,7 @@ def upgrade(): if "association_job_annotator" not in tables: op.create_table( "association_job_annotator", - sa.Column( - "user_id", postgresql.UUID(as_uuid=True), nullable=False - ), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("job_id", sa.INTEGER(), nullable=False), sa.ForeignKeyConstraint( ["job_id"], @@ -160,9 +150,7 @@ def upgrade(): sa.Column("file_id", sa.INTEGER(), nullable=False), sa.Column("pages", postgresql.ARRAY(sa.INTEGER()), nullable=False), sa.Column("job_id", sa.INTEGER(), nullable=False), - sa.Column( - "user_id", postgresql.UUID(as_uuid=True), nullable=False - ), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("is_validation", sa.BOOLEAN(), nullable=False), sa.Column( "status", diff --git a/annotation/alembic/versions/4272d0a43ff1_agreement_score.py b/annotation/alembic/versions/4272d0a43ff1_agreement_score.py index b48c75b06..91ea17310 100644 --- a/annotation/alembic/versions/4272d0a43ff1_agreement_score.py +++ b/annotation/alembic/versions/4272d0a43ff1_agreement_score.py @@ -21,9 +21,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_table( "agreement_score", - sa.Column( - "annotator_id", postgresql.UUID(as_uuid=True), nullable=False - ), + sa.Column("annotator_id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("job_id", sa.INTEGER(), nullable=False), sa.Column("task_id", sa.INTEGER(), nullable=False), sa.Column( @@ -35,9 +33,7 @@ def upgrade(): ["annotator_id"], ["users.user_id"], ), - sa.ForeignKeyConstraint( - ["job_id"], ["jobs.job_id"], ondelete="cascade" - ), + sa.ForeignKeyConstraint(["job_id"], ["jobs.job_id"], ondelete="cascade"), sa.ForeignKeyConstraint(["task_id"], ["tasks.id"], ondelete="cascade"), sa.PrimaryKeyConstraint("task_id"), ) diff --git a/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py b/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py index 58c611a5f..539b230cb 100644 --- a/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py +++ b/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py @@ -21,9 +21,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column( "categories", - sa.Column( - "tree", sqlalchemy_utils.types.ltree.LtreeType(), nullable=True - ), + sa.Column("tree", sqlalchemy_utils.types.ltree.LtreeType(), nullable=True), ) op.create_index( "index_tree", @@ -37,8 +35,6 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_index( - "index_tree", table_name="categories", postgresql_using="gist" - ) + op.drop_index("index_tree", table_name="categories", postgresql_using="gist") op.drop_column("categories", "tree") # ### end Alembic commands ### diff --git a/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py b/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py index 08d6dc0be..d7c36853a 100644 --- a/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py +++ b/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py @@ -6,9 +6,9 @@ """ import sqlalchemy as sa +from annotation.models import ValidationSchema from alembic import op -from annotation.models import ValidationSchema # revision identifiers, used by Alembic. revision = "71095b8e6343" @@ -25,9 +25,7 @@ def upgrade(): for v in enum_keys_to_add: op.execute(f"ALTER TYPE {enum_name} ADD VALUE '{v}'") - op.add_column( - "jobs", sa.Column("extensive_coverage", sa.INTEGER(), nullable=True) - ) + op.add_column("jobs", sa.Column("extensive_coverage", sa.INTEGER(), nullable=True)) # ### end Alembic commands ### diff --git a/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py b/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py index a96504e7c..5ceae3194 100644 --- a/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py +++ b/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py @@ -23,9 +23,7 @@ def upgrade() -> None: sa.Column("task_from", sa.INTEGER(), nullable=False), sa.Column("task_to", sa.INTEGER(), nullable=False), sa.Column("agreement_metric", sa.FLOAT(), nullable=False), - sa.ForeignKeyConstraint( - ["task_from"], ["tasks.id"], ondelete="cascade" - ), + sa.ForeignKeyConstraint(["task_from"], ["tasks.id"], ondelete="cascade"), sa.ForeignKeyConstraint(["task_to"], ["tasks.id"], ondelete="cascade"), sa.PrimaryKeyConstraint("task_from", "task_to"), ) diff --git a/annotation/alembic/versions/89276b8ebe84_remove_annotateddoc_constraint.py b/annotation/alembic/versions/89276b8ebe84_remove_annotateddoc_constraint.py index 190041aa1..06e90ecd3 100644 --- a/annotation/alembic/versions/89276b8ebe84_remove_annotateddoc_constraint.py +++ b/annotation/alembic/versions/89276b8ebe84_remove_annotateddoc_constraint.py @@ -19,9 +19,7 @@ def upgrade(): op.drop_constraint( "annotated_docs_task_id_fkey", "annotated_docs", type_="foreignkey" ) - op.drop_constraint( - "annotated_docs_user_fkey", "annotated_docs", type_="foreignkey" - ) + op.drop_constraint("annotated_docs_user_fkey", "annotated_docs", type_="foreignkey") op.create_foreign_key( None, "annotated_docs", @@ -41,18 +39,14 @@ def upgrade(): def downgrade(): - op.execute( - 'DELETE FROM annotated_docs WHERE "user" IS NULL AND pipeline IS NULL' - ) + op.execute('DELETE FROM annotated_docs WHERE "user" IS NULL AND pipeline IS NULL') op.create_check_constraint( "annotated_docs_check", "annotated_docs", '("user" IS NULL AND pipeline IS NOT NULL) OR ' '("user" IS NOT NULL AND pipeline IS NULL)', ) - op.drop_constraint( - "annotated_docs_user_fkey", "annotated_docs", type_="foreignkey" - ) + op.drop_constraint("annotated_docs_user_fkey", "annotated_docs", type_="foreignkey") op.drop_constraint( "annotated_docs_task_id_fkey", "annotated_docs", type_="foreignkey" ) diff --git a/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py b/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py index c16f0523e..bf4d9ffe6 100644 --- a/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py +++ b/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py @@ -35,9 +35,7 @@ def downgrade(): op.alter_column( "categories", "data_attributes", - existing_type=postgresql.ARRAY( - postgresql.JSONB(astext_type=sa.Text()) - ), + existing_type=postgresql.ARRAY(postgresql.JSONB(astext_type=sa.Text())), type_=postgresql.ARRAY(sa.VARCHAR()), existing_nullable=True, ) diff --git a/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py b/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py index f26495237..26b9a59dc 100644 --- a/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py +++ b/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py @@ -27,9 +27,7 @@ class JobStatusEnumSchema(str, Enum): def upgrade(): - job_status = postgresql.ENUM( - JobStatusEnumSchema, name="jobstatusenumschema" - ) + job_status = postgresql.ENUM(JobStatusEnumSchema, name="jobstatusenumschema") job_status.create(op.get_bind(), checkfirst=True) op.add_column( "jobs", diff --git a/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py b/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py index 52b44dd70..8b135495d 100644 --- a/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py +++ b/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py @@ -26,9 +26,7 @@ class CategoryTypeSchema(str, Enum): def upgrade(): - category_type = postgresql.ENUM( - CategoryTypeSchema, name="categorytypeschema" - ) + category_type = postgresql.ENUM(CategoryTypeSchema, name="categorytypeschema") category_type.create(op.get_bind(), checkfirst=True) op.add_column( "categories", @@ -38,22 +36,16 @@ def upgrade(): type_=category_type, ), ) - op.execute( - "UPDATE categories SET type = CAST ('box' AS categorytypeschema)" - ) + op.execute("UPDATE categories SET type = CAST ('box' AS categorytypeschema)") op.alter_column("categories", "type", nullable=False) op.drop_index("ix_categories_is_link", table_name="categories") op.drop_column("categories", "is_link") def downgrade(): - op.add_column( - "categories", sa.Column("is_link", sa.BOOLEAN(), nullable=True) - ) + op.add_column("categories", sa.Column("is_link", sa.BOOLEAN(), nullable=True)) op.execute("UPDATE categories SET is_link = 'false'") op.alter_column("categories", "is_link", nullable=False) - op.create_index( - "ix_categories_is_link", "categories", ["is_link"], unique=False - ) + op.create_index("ix_categories_is_link", "categories", ["is_link"], unique=False) op.drop_column("categories", "type") op.execute("DROP TYPE categorytypeschema;") diff --git a/annotation/annotation/annotations/main.py b/annotation/annotation/annotations/main.py index 20e067f03..37cf961b4 100644 --- a/annotation/annotation/annotations/main.py +++ b/annotation/annotation/annotations/main.py @@ -6,14 +6,6 @@ from uuid import UUID import boto3 -from dotenv import find_dotenv, load_dotenv -from fastapi import HTTPException -from kafka import KafkaProducer -from kafka.errors import KafkaError -from sqlalchemy import asc -from sqlalchemy.exc import IntegrityError -from sqlalchemy.orm import Session - from annotation import logger from annotation.kafka_client import KAFKA_BOOTSTRAP_SERVER, KAFKA_SEARCH_TOPIC from annotation.kafka_client import producers as kafka_producers @@ -25,6 +17,13 @@ ParticularRevisionSchema, RevisionLink, ) +from dotenv import find_dotenv, load_dotenv +from fastapi import HTTPException +from kafka import KafkaProducer +from kafka.errors import KafkaError +from sqlalchemy import asc +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session load_dotenv(find_dotenv()) ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL") @@ -135,9 +134,7 @@ def upload_pages_to_minio( for page in pages: json_page = json.dumps(page.dict()) path_to_object = f"{s3_path}/{pages_sha[str(page.page_num)]}.json" - upload_json_to_minio( - json_page, path_to_object, bucket_name, s3_resource - ) + upload_json_to_minio(json_page, path_to_object, bucket_name, s3_resource) def upload_json_to_minio( @@ -237,9 +234,7 @@ def create_manifest_json( manifest = row_to_dict(doc) redundant_keys = ("task_id", "file_id", "tenant", "categories") manifest = { - key: value - for key, value in manifest.items() - if key not in redundant_keys + key: value for key, value in manifest.items() if key not in redundant_keys } manifest["pages"] = all_pages manifest["validated"] = list(validated) @@ -254,9 +249,7 @@ def create_manifest_json( ] manifest_json = json.dumps(manifest) - upload_json_to_minio( - manifest_json, manifest_path, bucket_name, s3_resource - ) + upload_json_to_minio(manifest_json, manifest_path, bucket_name, s3_resource) def construct_annotated_doc( @@ -422,8 +415,7 @@ def check_docs_identity( latest_doc is not None and latest_doc.pages == new_doc.pages and set(latest_doc.validated) == new_doc.validated - and set(latest_doc.failed_validation_pages) - == new_doc.failed_validation_pages + and set(latest_doc.failed_validation_pages) == new_doc.failed_validation_pages and latest_doc.categories == new_doc.categories ) @@ -555,9 +547,7 @@ def find_all_revisions_pages( } """ pages = {} - revisions = [ - AnnotatedDocSchema.from_orm(revision) for revision in revisions - ] + revisions = [AnnotatedDocSchema.from_orm(revision) for revision in revisions] for revision in revisions: revision.pages = { int(key): value @@ -606,9 +596,7 @@ def find_latest_revision_pages( } """ pages = {} - revisions = [ - AnnotatedDocSchema.from_orm(revision) for revision in revisions - ] + revisions = [AnnotatedDocSchema.from_orm(revision) for revision in revisions] for revision in revisions: revision.pages = { int(key): value @@ -867,9 +855,7 @@ def accumulate_pages_info( all_annotated.update(revision.pages) for status, attr in attr_map.items(): - latest_status.update( - {int(i): status for i in getattr(revision, attr)} - ) + latest_status.update({int(i): status for i in getattr(revision, attr)}) # if there is specific revision, where we need to stop, # we will stop here @@ -917,9 +903,7 @@ def accumulate_pages_info( annotated_list = all_annotated if with_page_hash: - annotated = { - str(page): all_annotated[str(page)] for page in annotated_list - } + annotated = {str(page): all_annotated[str(page)] for page in annotated_list} else: annotated = set(map(int, annotated_list)) @@ -975,8 +959,7 @@ def check_task_pages( for array_name, pgs in error_mapping.items(): if pgs: err_msg += ( - f"Pages {pgs} from {array_name} array " - "do not belong to the task. " + f"Pages {pgs} from {array_name} array " "do not belong to the task. " ) if err_msg: @@ -995,9 +978,7 @@ def _init_search_annotation_producer(): ) return producer except KafkaError as error: # KafkaError is parent of all kafka errors - logger_.warning( - f"Error occurred during kafka producer creating: {error}" - ) + logger_.warning(f"Error occurred during kafka producer creating: {error}") def add_search_annotation_producer() -> KafkaProducer: @@ -1006,13 +987,10 @@ def add_search_annotation_producer() -> KafkaProducer: return search_annotation_producer -def send_annotation_kafka_message( - job_id: int, file_id: int, tenant: str -) -> None: +def send_annotation_kafka_message(job_id: int, file_id: int, tenant: str) -> None: # if startup failed, try to recreate it search_annotation_producer = ( - kafka_producers.get("search_annotation") - or add_search_annotation_producer() + kafka_producers.get("search_annotation") or add_search_annotation_producer() ) if search_annotation_producer: search_annotation_producer.send( diff --git a/annotation/annotation/annotations/resources.py b/annotation/annotation/annotations/resources.py index be016ca11..0a43e065a 100644 --- a/annotation/annotation/annotations/resources.py +++ b/annotation/annotation/annotations/resources.py @@ -1,17 +1,14 @@ from typing import Dict, List, Optional, Set from uuid import UUID -from fastapi import APIRouter, Depends, HTTPException, Path, Query, status -from sqlalchemy import and_, desc -from sqlalchemy.orm import Session -from tenant_dependency import TenantData - from annotation.database import get_db from annotation.errors import NoSuchRevisionsError from annotation.microservice_communication.assets_communication import ( get_file_path_and_bucket, ) -from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.microservice_communication.search import ( + X_CURRENT_TENANT_HEADER, +) from annotation.schemas import ( AnnotatedDocSchema, BadRequestErrorSchema, @@ -25,6 +22,10 @@ ) from annotation.tags import ANNOTATION_TAG, JOBS_TAG, REVISION_TAG from annotation.tasks import update_task_status +from fastapi import APIRouter, Depends, HTTPException, Path, Query, status +from sqlalchemy import and_, desc +from sqlalchemy.orm import Session +from tenant_dependency import TenantData from ..models import AnnotatedDoc, File, Job, ManualAnnotationTask from ..token_dependency import TOKEN @@ -87,8 +88,7 @@ def post_annotation_by_user( if doc.user is None: raise HTTPException( status_code=400, - detail="Field user should not be null, " - "when saving annotation by user.", + detail="Field user should not be null, " "when saving annotation by user.", ) check_null_fields(doc) @@ -115,9 +115,7 @@ def post_annotation_by_user( f"User_id associated with task: [{task.user_id}].", ) - if not task.is_validation and ( - doc.validated or doc.failed_validation_pages - ): + if not task.is_validation and (doc.validated or doc.failed_validation_pages): raise HTTPException( status_code=400, detail="This task is for annotation. " @@ -329,11 +327,7 @@ def get_jobs_by_file_id( x_current_tenant: str = X_CURRENT_TENANT_HEADER, db: Session = Depends(get_db), ): - db_file = ( - db.query(File) - .filter_by(file_id=file_id, tenant=x_current_tenant) - .first() - ) + db_file = db.query(File).filter_by(file_id=file_id, tenant=x_current_tenant).first() if not db_file: raise HTTPException( status_code=404, @@ -348,10 +342,7 @@ def get_jobs_by_file_id( .distinct(AnnotatedDoc.job_id, AnnotatedDoc.pipeline) .all() ) - return [ - {"job_id": job.job_id, "is_manual": not bool(job.pipeline)} - for job in jobs - ] + return [{"job_id": job.job_id, "is_manual": not bool(job.pipeline)} for job in jobs] @router.get( @@ -383,10 +374,7 @@ def get_latest_revision_by_user( if user_id: filters.append(AnnotatedDoc.user == user_id) revisions = ( - db.query(AnnotatedDoc) - .filter(and_(*filters)) - .order_by(AnnotatedDoc.date) - .all() + db.query(AnnotatedDoc).filter(and_(*filters)).order_by(AnnotatedDoc.date).all() ) pages = find_latest_revision_pages(revisions, page_numbers) if not pages: @@ -417,8 +405,7 @@ def get_annotations_up_to_given_revision( user_id: Optional[UUID] = Query( None, example="1843c251-564b-4c2f-8d42-c61fdac369a1", - description="Required in case job validation type is extensive_" - "coverage", + description="Required in case job validation type is extensive_" "coverage", ), ): job: Job = db.query(Job).filter(Job.job_id == job_id).first() @@ -435,10 +422,7 @@ def get_annotations_up_to_given_revision( if job.validation_type == ValidationSchema.extensive_coverage: filters.append(AnnotatedDoc.user.in_((user_id, None))) revisions = ( - db.query(AnnotatedDoc) - .filter(*filters) - .order_by(AnnotatedDoc.date.asc()) - .all() + db.query(AnnotatedDoc).filter(*filters).order_by(AnnotatedDoc.date.asc()).all() ) if not revisions: @@ -453,14 +437,19 @@ def get_annotations_up_to_given_revision( links_json=[], ) - validated, failed, annotated, _, categories, required_revision = ( - accumulate_pages_info( - task_pages=[], - revisions=revisions, - stop_revision=revision, - specific_pages=page_numbers, - with_page_hash=True, - ) + ( + validated, + failed, + annotated, + _, + categories, + required_revision, + ) = accumulate_pages_info( + task_pages=[], + revisions=revisions, + stop_revision=revision, + specific_pages=page_numbers, + with_page_hash=True, ) # if revision with given id (hash) was not found, # response with empty revision will be returned @@ -535,8 +524,7 @@ def get_annotation_for_given_revision( responses={ 500: {"model": ConnectionErrorSchema}, }, - summary="Get all users revisions (or pipeline revision) " - "for particular pages.", + summary="Get all users revisions (or pipeline revision) " "for particular pages.", tags=[REVISION_TAG, ANNOTATION_TAG], ) def get_all_revisions( @@ -547,8 +535,7 @@ def get_all_revisions( user_id: Optional[UUID] = Query( None, example="1843c251-564b-4c2f-8d42-c61fdac369a1", - description="Required in case job validation type is extensive_" - "coverage", + description="Required in case job validation type is extensive_" "coverage", ), db: Session = Depends(get_db), ): @@ -566,10 +553,7 @@ def get_all_revisions( if job.validation_type == ValidationSchema.extensive_coverage: filters.append(AnnotatedDoc.user.in_((user_id, None))) revisions = ( - db.query(AnnotatedDoc) - .filter(and_(*filters)) - .order_by(AnnotatedDoc.date) - .all() + db.query(AnnotatedDoc).filter(and_(*filters)).order_by(AnnotatedDoc.date).all() ) pages = find_all_revisions_pages(revisions, page_numbers) if not pages: diff --git a/annotation/annotation/categories/resources.py b/annotation/annotation/categories/resources.py index 40f411677..3a2b11d35 100644 --- a/annotation/annotation/categories/resources.py +++ b/annotation/annotation/categories/resources.py @@ -1,14 +1,11 @@ from typing import List, Union -from fastapi import APIRouter, Depends, HTTPException, Path, Response, status -from filter_lib import Page -from sqlalchemy.orm import Session -from sqlalchemy_filters.exceptions import BadFilterFormat - from annotation.database import get_db from annotation.errors import NoSuchCategoryError from annotation.filters import CategoryFilter -from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.microservice_communication.search import ( + X_CURRENT_TENANT_HEADER, +) from annotation.schemas import ( BadRequestErrorSchema, CategoryBaseSchema, @@ -19,6 +16,10 @@ SubCategoriesOutSchema, ) from annotation.tags import CATEGORIES_TAG +from fastapi import APIRouter, Depends, HTTPException, Path, Response, status +from filter_lib import Page +from sqlalchemy.orm import Session +from sqlalchemy_filters.exceptions import BadFilterFormat from .services import ( add_category_db, @@ -72,9 +73,7 @@ def fetch_category( x_current_tenant: str = X_CURRENT_TENANT_HEADER, ) -> CategoryResponseSchema: category_db = fetch_category_db(db, category_id, x_current_tenant) - category_response = insert_category_tree( - db, category_db, tenant=x_current_tenant - ) + category_response = insert_category_tree(db, category_db, tenant=x_current_tenant) return category_response @@ -151,9 +150,7 @@ def update_category( """ Updates category by id and returns updated category. """ - category_db = update_category_db( - db, category_id, query.dict(), x_current_tenant - ) + category_db = update_category_db(db, category_id, query.dict(), x_current_tenant) if not category_db: raise NoSuchCategoryError("Cannot update category parameters") return response_object_from_db(category_db) diff --git a/annotation/annotation/categories/services.py b/annotation/annotation/categories/services.py index c81adbb2b..44cd3cc85 100644 --- a/annotation/annotation/categories/services.py +++ b/annotation/annotation/categories/services.py @@ -1,14 +1,6 @@ import uuid from typing import Dict, List, Optional, Set, Tuple, Union -from cachetools import TTLCache, cached, keys -from filter_lib import Page, form_query, map_request_to_filter, paginate -from sqlalchemy import and_, null, or_ -from sqlalchemy.event import listens_for -from sqlalchemy.orm import Session -from sqlalchemy.sql.expression import func -from sqlalchemy_utils import Ltree - from annotation import logger as app_logger from annotation.errors import ( CheckFieldError, @@ -23,6 +15,13 @@ CategoryORMSchema, CategoryResponseSchema, ) +from cachetools import TTLCache, cached, keys +from filter_lib import Page, form_query, map_request_to_filter, paginate +from sqlalchemy import and_, null, or_ +from sqlalchemy.event import listens_for +from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import func +from sqlalchemy_utils import Ltree cache = TTLCache(maxsize=128, ttl=300) @@ -114,9 +113,7 @@ def response_object_from_db(category_db: Category) -> CategoryResponseSchema: return CategoryResponseSchema.parse_obj(category_orm) -def fetch_category_parents( - db: Session, category_input: Category -) -> List[Category]: +def fetch_category_parents(db: Session, category_input: Category) -> List[Category]: return ( db.query(Category) .filter(Category.tree.ancestor_of(category_input.tree)) @@ -125,9 +122,7 @@ def fetch_category_parents( ) # remove self item from result -def fetch_category_children( - db: Session, category_input: Category -) -> List[Category]: +def fetch_category_children(db: Session, category_input: Category) -> List[Category]: return ( db.query(Category) .filter(Category.tree.descendant_of(category_input.tree)) @@ -152,9 +147,7 @@ def check_unique_category_field( def fetch_category_db(db: Session, category_id: str, tenant: str) -> Category: category = db.query(Category).get(category_id) if not category or category.tenant and category.tenant != tenant: - raise NoSuchCategoryError( - f"Category with id: {category_id} doesn't exist" - ) + raise NoSuchCategoryError(f"Category with id: {category_id} doesn't exist") return category @@ -197,9 +190,7 @@ def recursive_subcategory_search( if child_ids: child_categories.update(child_ids) for child_id in child_ids: - recursive_subcategory_search( - db, child_id, root_id, child_categories - ) + recursive_subcategory_search(db, child_id, root_id, child_categories) return child_categories @@ -269,9 +260,7 @@ def _get_leaves( return leaves -def _extract_category( - path: str, categories: Dict[str, Category] -) -> List[Category]: +def _extract_category(path: str, categories: Dict[str, Category]) -> List[Category]: return [categories[node] for node in path.split(".")[0:-1]] @@ -292,8 +281,7 @@ def _get_parents( uniq_cats = uniq_cats.union({tree.path for tree in cat.tree}) category_to_object = { - cat.id: cat - for cat in fetch_bunch_categories_db(db, uniq_cats, tenant, job_id) + cat.id: cat for cat in fetch_bunch_categories_db(db, uniq_cats, tenant, job_id) } for path in uniq_pathes: @@ -321,9 +309,7 @@ def _compose_response( { **CategoryORMSchema.from_orm(cat).dict(), "is_leaf": leaves.get(cat.id, False), - "parents": converted_parents.get(cat.tree.path, []) - if cat.tree - else [], + "parents": converted_parents.get(cat.tree.path, []) if cat.tree else [], } ) for cat in categories @@ -357,9 +343,7 @@ def filter_category_db( tenant: str, job_id: Optional[int] = None, ) -> Page[Union[CategoryResponseSchema, str, dict]]: - child_categories, pagination = _get_child_categories( - db, request, tenant, job_id - ) + child_categories, pagination = _get_child_categories(db, request, tenant, job_id) if request.filters and "distinct" in [ item.operator.value for item in request.filters @@ -407,9 +391,7 @@ def update_category_db( ) ex_parent_id = category.parent new_parent_id = update_query["parent"] - parent_db = ( - db.query(Category).get(new_parent_id) if new_parent_id else None - ) + parent_db = db.query(Category).get(new_parent_id) if new_parent_id else None if parent_db and parent_db.tenant not in [tenant, None]: raise ForeignKeyError("Category with this id doesn't exist.") diff --git a/annotation/annotation/distribution/main.py b/annotation/annotation/distribution/main.py index e94fd17b3..a2267b55d 100644 --- a/annotation/annotation/distribution/main.py +++ b/annotation/annotation/distribution/main.py @@ -45,8 +45,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple, Union from uuid import UUID -from sqlalchemy.orm import Session - from annotation.jobs import create_user, read_user from annotation.microservice_communication.assets_communication import ( FilesForDistribution, @@ -54,6 +52,7 @@ from annotation.models import File, User from annotation.schemas import TaskStatusEnumSchema, ValidationSchema from annotation.tasks import create_tasks as create_db_tasks +from sqlalchemy.orm import Session MAX_PAGES = 50 @@ -100,12 +99,8 @@ def distribute( db.flush() annotated_files_pages = {} # no pages distributed for annotation yet - annotators = [ - x.__dict__ for x in annotators if x.default_load # type: ignore - ] - validators = [ - x.__dict__ for x in validators if x.default_load # type: ignore - ] + annotators = [x.__dict__ for x in annotators if x.default_load] # type: ignore + validators = [x.__dict__ for x in validators if x.default_load] # type: ignore if annotators: if ( validation_type == ValidationSchema.extensive_coverage @@ -137,9 +132,7 @@ def distribute( tasks.extend(annotation_tasks) if validation_type == ValidationSchema.cross: annotated_files_pages = find_annotated_pages(tasks) - job_validators = choose_validators_users( - validation_type, annotators, validators - ) + job_validators = choose_validators_users(validation_type, annotators, validators) if job_validators: validation_tasks = distribute_tasks( annotated_files_pages, @@ -201,9 +194,7 @@ def distribute_tasks_extensively( user_can_take_pages = min(len(pages), user_can_take_pages) pages_not_seen_by_user = sorted( set(pages).difference( - users_seen_pages[annotators[0]["user_id"]][ - file["file_id"] - ] + users_seen_pages[annotators[0]["user_id"]][file["file_id"]] ) ) @@ -223,9 +214,9 @@ def distribute_tasks_extensively( "deadline": deadline, } ) - users_seen_pages[annotators[0]["user_id"]][ - file["file_id"] - ].update(set(pages_for_user)) + users_seen_pages[annotators[0]["user_id"]][file["file_id"]].update( + set(pages_for_user) + ) pages = sorted(set(pages).difference(set(pages_for_user))) annotators[0]["pages_number"] -= len(pages_for_user) if annotators[0]["pages_number"] == 0: @@ -360,9 +351,7 @@ def find_users_share_loads( """ quantity = len(users) for user in users: - average_pages_deviation = ( - users_overall_load - user["overall_load"] * quantity - ) + average_pages_deviation = users_overall_load - user["overall_load"] * quantity average_deviation_coefficient = ( average_pages_deviation / (users_overall_load * quantity) if users_overall_load @@ -371,15 +360,11 @@ def find_users_share_loads( pages_deviation = average_deviation_coefficient * average_job_pages user_deviation_pages = average_job_pages + pages_deviation user["share_load"] = ( - user_deviation_pages / all_job_pages_sum - if all_job_pages_sum - else 1 + user_deviation_pages / all_job_pages_sum if all_job_pages_sum else 1 ) default_load_part = user["default_load"] / users_default_load user["share_load"] *= default_load_part - all_annotators_share_load = sum( - annotator["share_load"] for annotator in users - ) + all_annotators_share_load = sum(annotator["share_load"] for annotator in users) return all_annotators_share_load @@ -404,9 +389,7 @@ def distribute_whole_files( files_to_distribute = [ item for item in files if item["file_id"] not in annotated_files ] - files_for_task = find_equal_files( - files_to_distribute, user["pages_number"] - ) + files_for_task = find_equal_files(files_to_distribute, user["pages_number"]) create_tasks( tasks, files_for_task, @@ -416,9 +399,7 @@ def distribute_whole_files( tasks_status, deadline, ) - files_for_task = find_small_files( - files_to_distribute, user["pages_number"] - ) + files_for_task = find_small_files(files_to_distribute, user["pages_number"]) create_tasks( tasks, files_for_task, @@ -447,8 +428,7 @@ def find_files_for_task( file_for_task = next( x for x in files - if x["pages_number"] == pages - and x["file_id"] not in distributed_files + if x["pages_number"] == pages and x["file_id"] not in distributed_files ) files_for_task.append(file_for_task) distributed_files.append(file_for_task["file_id"]) @@ -592,9 +572,7 @@ def distribute_annotation_partial_files( annotators[0]["pages_number"] -= 1 if pages: full_tasks = len(pages) // MAX_PAGES - tasks_number = ( - full_tasks + 1 if len(pages) % MAX_PAGES else full_tasks - ) + tasks_number = full_tasks + 1 if len(pages) % MAX_PAGES else full_tasks for times in range(tasks_number): annotation_tasks.append( { @@ -657,9 +635,7 @@ def filter_validation_files_pages( if validator["pages_number"] > 0 else 0 ) - files_all_pages[file_id].difference_update( - files_for_validation[file_id] - ) + files_all_pages[file_id].difference_update(files_for_validation[file_id]) return files_for_validation @@ -689,9 +665,7 @@ def create_partial_validation_tasks( for file_id, pages in validation_files_pages.items(): if pages: full_tasks = len(pages) // MAX_PAGES - tasks_number = ( - full_tasks + 1 if len(pages) % MAX_PAGES else full_tasks - ) + tasks_number = full_tasks + 1 if len(pages) % MAX_PAGES else full_tasks for times in range(tasks_number): validation_tasks.append( { @@ -811,9 +785,7 @@ def check_file_distribution( add_unassigned_file(files_to_distribute, file_id, pages_number) else: # file was partially distributed - unassigned_pages = find_unassigned_pages( - distributed_pages, pages_number - ) + unassigned_pages = find_unassigned_pages(distributed_pages, pages_number) add_unassigned_file( files_to_distribute, file_id, @@ -822,17 +794,11 @@ def check_file_distribution( ) -def find_unassigned_pages( - assigned_pages: list, pages_amount: int -) -> List[int]: +def find_unassigned_pages(assigned_pages: list, pages_amount: int) -> List[int]: """ Get all pages, that were not distributed. """ - return [ - page - for page in range(1, pages_amount + 1) - if page not in assigned_pages - ] + return [page for page in range(1, pages_amount + 1) if page not in assigned_pages] def add_unassigned_file( diff --git a/annotation/annotation/distribution/resources.py b/annotation/annotation/distribution/resources.py index 141b40680..916d5049c 100644 --- a/annotation/annotation/distribution/resources.py +++ b/annotation/annotation/distribution/resources.py @@ -1,10 +1,5 @@ from typing import List -from fastapi import APIRouter, Depends, Path, status -from sqlalchemy import and_ -from sqlalchemy.orm import Session -from tenant_dependency import TenantData - from annotation.database import get_db from annotation.distribution import ( distribute, @@ -21,7 +16,9 @@ get_files_info, prepare_files_for_distribution, ) -from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.microservice_communication.search import ( + X_CURRENT_TENANT_HEADER, +) from annotation.models import File, Job, User from annotation.schemas import ( BadRequestErrorSchema, @@ -31,6 +28,10 @@ ) from annotation.tags import TASKS_TAG from annotation.token_dependency import TOKEN +from fastapi import APIRouter, Depends, Path, status +from sqlalchemy import and_ +from sqlalchemy.orm import Session +from tenant_dependency import TenantData router = APIRouter( prefix="/distribution", @@ -67,15 +68,12 @@ def post_tasks( task_file_ids = {task_file["file_id"] for task_file in files} job_files = [ file_db[0] - for file_db in db.query(File.file_id) - .filter(File.job_id == job_id) - .all() + for file_db in db.query(File.file_id).filter(File.job_id == job_id).all() ] files_beyond_job = task_file_ids.difference(job_files) if files_beyond_job: raise FieldConstraintError( - f"Files with ids {files_beyond_job} are not assigned to " - f"job {job_id}" + f"Files with ids {files_beyond_job} are not assigned to " f"job {job_id}" ) annotators = ( db.query(User) @@ -99,9 +97,7 @@ def post_tasks( .all() ) validator_ids = {user.user_id for user in validators} - users_beyond_job = task_info.user_ids.difference( - annotator_ids.union(validator_ids) - ) + users_beyond_job = task_info.user_ids.difference(annotator_ids.union(validator_ids)) if users_beyond_job: raise FieldConstraintError( f"Users with ids {users_beyond_job} are not assigned to " @@ -133,8 +129,7 @@ def post_tasks( responses={ 400: {"model": BadRequestErrorSchema}, }, - summary="Distribute all remaining unassigned " - "files and pages for given job_id.", + summary="Distribute all remaining unassigned " "files and pages for given job_id.", ) def post_tasks_for_unassigned_files( job_id: int = Path(..., example=3), @@ -147,10 +142,7 @@ def post_tasks_for_unassigned_files( annotation_files_to_distribute, validation_files_to_distribute, ) = find_unassigned_files(job.files) - if ( - not annotation_files_to_distribute - and not validation_files_to_distribute - ): + if not annotation_files_to_distribute and not validation_files_to_distribute: return [] annotation_files_to_distribute = prepare_files_for_distribution( annotation_files_to_distribute diff --git a/annotation/annotation/errors.py b/annotation/annotation/errors.py index 9b9cc839e..6158aa56e 100644 --- a/annotation/annotation/errors.py +++ b/annotation/annotation/errors.py @@ -1,10 +1,9 @@ +from annotation import logger as app_logger from botocore.exceptions import BotoCoreError, ClientError from fastapi.requests import Request from fastapi.responses import JSONResponse from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from annotation import logger as app_logger - logger = app_logger.Logger @@ -52,9 +51,7 @@ def __init__(self, exc: str): self.exc = exc -def no_such_revisions_error_handler( - request: Request, exc: NoSuchRevisionsError -): +def no_such_revisions_error_handler(request: Request, exc: NoSuchRevisionsError): return JSONResponse( status_code=404, content={"detail": "Cannot find such revision(s)."}, @@ -75,9 +72,7 @@ def no_such_category_error_handler(request: Request, exc: NoSuchCategoryError): ) -def category_unique_field_error_handler( - request: Request, exc: CheckFieldError -): +def category_unique_field_error_handler(request: Request, exc: CheckFieldError): return JSONResponse( status_code=400, content={"detail": f"Field constraint error. {exc.message}"}, @@ -119,9 +114,7 @@ def minio_no_such_bucket_error_handler(request: Request, exc: ClientError): ) -def field_constraint_error_handler( - request: Request, exc: FieldConstraintError -): +def field_constraint_error_handler(request: Request, exc: FieldConstraintError): return JSONResponse( status_code=400, content={"detail": f"Error: {exc.message}"}, @@ -135,9 +128,7 @@ def enum_validation_error_handler(request: Request, exc: EnumValidationError): ) -def category_parent_child_error_handler( - request: Request, exc: SelfParentError -): +def category_parent_child_error_handler(request: Request, exc: SelfParentError): return JSONResponse( status_code=400, content={"detail": f"Self parent error. {exc.message}"}, diff --git a/annotation/annotation/jobs/resources.py b/annotation/annotation/jobs/resources.py index dd93c36dc..e0b5f757d 100644 --- a/annotation/annotation/jobs/resources.py +++ b/annotation/annotation/jobs/resources.py @@ -1,34 +1,22 @@ from typing import Dict, List, Optional, Set, Union from uuid import UUID -from fastapi import ( - APIRouter, - Depends, - HTTPException, - Path, - Query, - Response, - status, -) -from filter_lib import Page -from sqlalchemy import and_ -from sqlalchemy.orm import Session -from sqlalchemy.sql.expression import func, or_ -from sqlalchemy_filters.exceptions import BadFilterFormat -from tenant_dependency import TenantData - import annotation.categories.services from annotation import logger as app_logger from annotation.categories import fetch_bunch_categories_db from annotation.database import get_db from annotation.distribution import distribute from annotation.filters import CategoryFilter -from annotation.microservice_communication.assets_communication import get_files_info +from annotation.microservice_communication.assets_communication import ( + get_files_info, +) from annotation.microservice_communication.jobs_communication import ( JobUpdateException, update_job_status, ) -from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.microservice_communication.search import ( + X_CURRENT_TENANT_HEADER, +) from annotation.schemas import ( BadRequestErrorSchema, CategoryResponseSchema, @@ -48,6 +36,21 @@ ) from annotation.tags import FILES_TAG, JOBS_TAG from annotation.token_dependency import TOKEN +from fastapi import ( + APIRouter, + Depends, + HTTPException, + Path, + Query, + Response, + status, +) +from filter_lib import Page +from sqlalchemy import and_ +from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import func, or_ +from sqlalchemy_filters.exceptions import BadFilterFormat +from tenant_dependency import TenantData from ..models import ( AnnotatedDoc, @@ -111,12 +114,8 @@ def post_job( ) db.add_all(new_users) db_users = saved_users + new_users - annotators = [ - user for user in db_users if user.user_id in job_info.annotators - ] - validators = [ - user for user in db_users if user.user_id in job_info.validators - ] + annotators = [user for user in db_users if user.user_id in job_info.annotators] + validators = [user for user in db_users if user.user_id in job_info.validators] owners = [user for user in db_users if user.user_id in job_info.owners] categories = fetch_bunch_categories_db( db, job_info.categories, x_current_tenant, root_parents=True @@ -160,9 +159,7 @@ def post_job( tenant=x_current_tenant, job_id=job_id, pages_number=f["pages_number"], - distributed_annotating_pages=list( - range(1, f["pages_number"] + 1) - ), + distributed_annotating_pages=list(range(1, f["pages_number"] + 1)), annotated_pages=list(range(1, f["pages_number"] + 1)), status=FileStatusEnumSchema.pending, ) @@ -354,10 +351,8 @@ def get_unassigned_files( job_undistributed_files = job_files.filter( or_( - func.cardinality(File.distributed_annotating_pages) - != File.pages_number, - func.cardinality(File.distributed_validating_pages) - != File.pages_number, + func.cardinality(File.distributed_annotating_pages) != File.pages_number, + func.cardinality(File.distributed_validating_pages) != File.pages_number, ), ) @@ -423,9 +418,7 @@ def start_job( job status to In Progress. """ job = get_job(db, job_id, x_current_tenant) - annotation_tasks = ( - db.query(ManualAnnotationTask).filter_by(job_id=job_id).all() - ) + annotation_tasks = db.query(ManualAnnotationTask).filter_by(job_id=job_id).all() if not annotation_tasks: raise HTTPException( status_code=404, @@ -494,10 +487,7 @@ def get_users_for_job( .filter(User.job_annotators.any(job_id=job_id)) .all() ) - return [ - {"id": user.user_id, "overall_load": user.overall_load} - for user in users - ] + return [{"id": user.user_id, "overall_load": user.overall_load} for user in users] # Get categories for job_id, each entity requires children/parents @@ -586,10 +576,7 @@ def get_jobs_info_by_files( db, file_ids, x_current_tenant, token.token ) - return { - file_id: grouped_by_file_jobs_info.get(file_id, []) - for file_id in file_ids - } + return {file_id: grouped_by_file_jobs_info.get(file_id, []) for file_id in file_ids} @router.get( diff --git a/annotation/annotation/jobs/services.py b/annotation/annotation/jobs/services.py index def19d8d4..5ec0fa108 100644 --- a/annotation/annotation/jobs/services.py +++ b/annotation/annotation/jobs/services.py @@ -2,18 +2,20 @@ from typing import Any, DefaultDict, Dict, List, Optional, Set, Tuple, Union from uuid import UUID -from filter_lib import Page, form_query, map_request_to_filter, paginate -from pydantic import ValidationError -from sqlalchemy import and_, desc, not_ -from sqlalchemy.orm import Session, query -from sqlalchemy.orm.attributes import InstrumentedAttribute - from annotation.categories import fetch_bunch_categories_db from annotation.categories.services import response_object_from_db from annotation.database import Base -from annotation.errors import EnumValidationError, FieldConstraintError, WrongJobError -from annotation.microservice_communication.assets_communication import get_files_info -from annotation.microservice_communication.jobs_communication import get_job_names +from annotation.errors import ( + EnumValidationError, + FieldConstraintError, + WrongJobError, +) +from annotation.microservice_communication.assets_communication import ( + get_files_info, +) +from annotation.microservice_communication.jobs_communication import ( + get_job_names, +) from annotation.models import ( Category, File, @@ -33,11 +35,14 @@ TaskStatusEnumSchema, ValidationSchema, ) +from filter_lib import Page, form_query, map_request_to_filter, paginate +from pydantic import ValidationError +from sqlalchemy import and_, desc, not_ +from sqlalchemy.orm import Session, query +from sqlalchemy.orm.attributes import InstrumentedAttribute -def update_inner_job_status( - db: Session, job_id: int, status: JobStatusEnumSchema -): +def update_inner_job_status(db: Session, job_id: int, status: JobStatusEnumSchema): """Updates job status in db""" db.query(Job).filter(Job.job_id == job_id).update({"status": status}) @@ -124,9 +129,7 @@ def get_job_attributes_for_post( return job_attributes -def check_annotators( - annotators: Set[UUID], validation_type: ValidationSchema -) -> None: +def check_annotators(annotators: Set[UUID], validation_type: ValidationSchema) -> None: annotators_validation_mapping = { ValidationSchema.cross: ( len(annotators) < CROSS_MIN_ANNOTATORS_NUMBER, @@ -151,9 +154,7 @@ def check_annotators( raise FieldConstraintError(error_message) -def check_validators( - validators: Set[UUID], validation_type: ValidationSchema -) -> None: +def check_validators(validators: Set[UUID], validation_type: ValidationSchema) -> None: validators_validation_mapping = { ValidationSchema.cross: ( validators, @@ -162,8 +163,7 @@ def check_validators( ), ValidationSchema.hierarchical: ( not validators, - "If the validation type is hierarchical, validators should " - "be provided.", + "If the validation type is hierarchical, validators should " "be provided.", ), ValidationSchema.validation_only: ( not validators, @@ -278,8 +278,7 @@ def find_users(db: Session, users_ids: Set[UUID]): saved_users = db.query(User).filter(User.user_id.in_(users_ids)).all() saved_users_ids = {user.user_id for user in saved_users} new_users = [ - User(user_id=user_id) - for user_id in users_ids.difference(saved_users_ids) + User(user_id=user_id) for user_id in users_ids.difference(saved_users_ids) ] return saved_users, new_users diff --git a/annotation/annotation/main.py b/annotation/annotation/main.py index dc2db2d2b..d907b1bb7 100644 --- a/annotation/annotation/main.py +++ b/annotation/annotation/main.py @@ -1,12 +1,7 @@ import os import pathlib -from botocore.exceptions import BotoCoreError, ClientError -from dotenv import find_dotenv, load_dotenv -from fastapi import Depends, FastAPI -from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from starlette.requests import Request - +from annotation import logger as app_logger from annotation.annotations import resources as annotations_resources from annotation.categories import resources as categories_resources from annotation.distribution import resources as distribution_resources @@ -36,12 +31,16 @@ wrong_job_error_handler, ) from annotation.jobs import resources as jobs_resources -from annotation import logger as app_logger from annotation.metadata import resources as metadata_resources from annotation.revisions import resources as revision_resources from annotation.tags import TAGS from annotation.tasks import resources as task_resources from annotation.token_dependency import TOKEN +from botocore.exceptions import BotoCoreError, ClientError +from dotenv import find_dotenv, load_dotenv +from fastapi import Depends, FastAPI +from sqlalchemy.exc import DBAPIError, SQLAlchemyError +from starlette.requests import Request load_dotenv(find_dotenv()) @@ -77,7 +76,8 @@ async def catch_exceptions_middleware(request: Request, call_next): logger.exception(exception) raise exception -app.middleware('http')(catch_exceptions_middleware) + +app.middleware("http")(catch_exceptions_middleware) app.include_router(annotations_resources.router) app.include_router(task_resources.router) app.include_router(distribution_resources.router) @@ -89,9 +89,7 @@ async def catch_exceptions_middleware(request: Request, call_next): app.add_exception_handler( AgreementScoreServiceException, agreement_score_service_error_handler ) -app.add_exception_handler( - NoSuchRevisionsError, no_such_revisions_error_handler -) +app.add_exception_handler(NoSuchRevisionsError, no_such_revisions_error_handler) app.add_exception_handler(CheckFieldError, category_unique_field_error_handler) app.add_exception_handler(EnumValidationError, enum_validation_error_handler) app.add_exception_handler(FieldConstraintError, field_constraint_error_handler) diff --git a/annotation/annotation/microservice_communication/assets_communication.py b/annotation/annotation/microservice_communication/assets_communication.py index bf0b52eb1..6c1d1c3ac 100644 --- a/annotation/annotation/microservice_communication/assets_communication.py +++ b/annotation/annotation/microservice_communication/assets_communication.py @@ -2,9 +2,6 @@ from typing import Dict, List, Optional, Set, Tuple, Union import requests -from dotenv import find_dotenv, load_dotenv -from requests import ConnectionError, RequestException, Timeout - from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, @@ -12,6 +9,8 @@ get_response, raise_request_exception, ) +from dotenv import find_dotenv, load_dotenv +from requests import ConnectionError, RequestException, Timeout load_dotenv(find_dotenv()) ASSETS_FILES_URL = os.environ.get("ASSETS_FILES_URL") @@ -54,13 +53,9 @@ def get_files_info( for f in dataset_files_info if f["id"] not in files ] - files.update( - {dataset_file["file_id"] for dataset_file in dataset_pages_info} - ) + files.update({dataset_file["file_id"] for dataset_file in dataset_pages_info}) datasets_pages_info.extend(dataset_pages_info) - return prepare_files_for_distribution( - files_pages_info + datasets_pages_info - ) + return prepare_files_for_distribution(files_pages_info + datasets_pages_info) def get_dataset_info(dataset_id: int, tenant: str, token: str) -> List[dict]: @@ -80,9 +75,7 @@ def get_dataset_info(dataset_id: int, tenant: str, token: str) -> List[dict]: return dataset_files_info.json() -def get_file_names( - file_ids: List[int], tenant: str, token: str -) -> Dict[int, str]: +def get_file_names(file_ids: List[int], tenant: str, token: str) -> Dict[int, str]: """ Return dict of file_id and its name for provided file_ids. diff --git a/annotation/annotation/microservice_communication/jobs_communication.py b/annotation/annotation/microservice_communication/jobs_communication.py index 2c031ddb6..b62689203 100644 --- a/annotation/annotation/microservice_communication/jobs_communication.py +++ b/annotation/annotation/microservice_communication/jobs_communication.py @@ -2,15 +2,14 @@ from typing import Dict, List, Union import requests -from dotenv import find_dotenv, load_dotenv -from requests import RequestException - from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, get_response, ) +from dotenv import find_dotenv, load_dotenv +from requests import RequestException load_dotenv(find_dotenv()) JOBS_SEARCH_URL = os.environ.get("JOBS_SEARCH_URL") @@ -40,9 +39,7 @@ def update_job_status(callback_url: str, status: str, tenant: str, token: str): raise JobUpdateException(exc) -def get_job_names( - job_ids: List[int], tenant: str, token: str -) -> Dict[int, str]: +def get_job_names(job_ids: List[int], tenant: str, token: str) -> Dict[int, str]: """ Return dict of job_id and its name for provided job_ids from jobs microservice. diff --git a/annotation/annotation/microservice_communication/search.py b/annotation/annotation/microservice_communication/search.py index 6d942f44c..f139a985f 100644 --- a/annotation/annotation/microservice_communication/search.py +++ b/annotation/annotation/microservice_communication/search.py @@ -137,9 +137,7 @@ def construct_search_params(page: int, ids: List[int]): } -def get_response( - ids: List[int], url: str, tenant: str, token: str -) -> List[dict]: +def get_response(ids: List[int], url: str, tenant: str, token: str) -> List[dict]: """ Request from jobs or assets microservices all elements, that have provided ids. diff --git a/annotation/annotation/models.py b/annotation/annotation/models.py index 5f8f40536..bf4905150 100644 --- a/annotation/annotation/models.py +++ b/annotation/annotation/models.py @@ -1,6 +1,18 @@ from datetime import datetime from typing import Callable +from annotation.database import Base +from annotation.errors import CheckFieldError +from annotation.schemas import ( + DEFAULT_LOAD, + AnnotationStatisticsEventEnumSchema, + CategoryTypeSchema, + FileStatusEnumSchema, + JobStatusEnumSchema, + JobTypeEnumSchema, + TaskStatusEnumSchema, + ValidationSchema, +) from sqlalchemy import ( BOOLEAN, FLOAT, @@ -21,19 +33,6 @@ from sqlalchemy.orm import relationship, validates from sqlalchemy_utils import Ltree, LtreeType -from annotation.database import Base -from annotation.errors import CheckFieldError -from annotation.schemas import ( - DEFAULT_LOAD, - AnnotationStatisticsEventEnumSchema, - CategoryTypeSchema, - FileStatusEnumSchema, - JobStatusEnumSchema, - JobTypeEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) - association_job_annotator = Table( "association_job_annotator", Base.metadata, @@ -90,9 +89,7 @@ class AnnotatedDoc(Base): revision = Column(VARCHAR, primary_key=True) file_id = Column(INTEGER, primary_key=True) job_id = Column(INTEGER, primary_key=True) - user = Column( - UUID(as_uuid=True), ForeignKey("users.user_id", ondelete="SET NULL") - ) + user = Column(UUID(as_uuid=True), ForeignKey("users.user_id", ondelete="SET NULL")) pipeline = Column(INTEGER) date = Column(TIMESTAMP, server_default=func.now(), nullable=False) pages = Column(JSON, nullable=False, server_default="{}") @@ -286,9 +283,7 @@ class ManualAnnotationTask(Base): job_id = Column( INTEGER, ForeignKey("jobs.job_id", ondelete="cascade"), nullable=False ) - user_id = Column( - UUID(as_uuid=True), ForeignKey("users.user_id"), nullable=False - ) + user_id = Column(UUID(as_uuid=True), ForeignKey("users.user_id"), nullable=False) is_validation = Column(BOOLEAN, nullable=False) status = Column( ENUM(TaskStatusEnumSchema), diff --git a/annotation/annotation/schemas/annotations.py b/annotation/annotation/schemas/annotations.py index f483b46ab..1dde6d64d 100644 --- a/annotation/annotation/schemas/annotations.py +++ b/annotation/annotation/schemas/annotations.py @@ -7,9 +7,7 @@ class PageSchema(BaseModel): page_num: int = Field(..., ge=1, example=2) - size: Dict[str, float] = Field( - ..., example={"width": 10.2, "height": 123.34} - ) + size: Dict[str, float] = Field(..., example={"width": 10.2, "height": 123.34}) objs: List[dict] = Field( ..., example=[ @@ -43,12 +41,8 @@ class PageSchema(BaseModel): class PageOutSchema(PageSchema): - revision: str = Field( - ..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69" - ) - user_id: Optional[UUID] = Field( - ..., example="c1c76433-5bfb-4c4a-a5b5-93c66fbfe376" - ) + revision: str = Field(..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69") + user_id: Optional[UUID] = Field(..., example="c1c76433-5bfb-4c4a-a5b5-93c66fbfe376") pipeline: Optional[int] = Field(..., example=2) date: datetime = Field(..., example="2021-10-19 01:01:01") is_validated: bool = Field(default=False, example=False) @@ -67,16 +61,12 @@ class ParticularRevisionSchema(BaseModel): revision: Optional[str] = Field( ..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69" ) - user: Optional[UUID] = Field( - ..., example="c7311267-fdfd-4ef1-be44-160d3dd819ca" - ) + user: Optional[UUID] = Field(..., example="c7311267-fdfd-4ef1-be44-160d3dd819ca") pipeline: Optional[int] = Field(..., example=1) date: Optional[datetime] = Field(..., example="2021-10-19 01:01:01") pages: List[PageSchema] validated: Optional[List[int]] = Field(None, ge=1, example=[2]) - failed_validation_pages: Optional[List[int]] = Field( - None, ge=1, example=[] - ) + failed_validation_pages: Optional[List[int]] = Field(None, ge=1, example=[]) similar_revisions: Optional[List[RevisionLink]] = Field(None) categories: Optional[Set[str]] = Field(None, example=["1", "2"]) links_json: Optional[List[dict]] = Field(None, example={}) @@ -86,15 +76,11 @@ class DocForSaveSchema(BaseModel): base_revision: Optional[str] = Field( None, example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69" ) - user: Optional[UUID] = Field( - None, example="b0ac6d8c-7b31-4570-a634-c92b07c9e566" - ) + user: Optional[UUID] = Field(None, example="b0ac6d8c-7b31-4570-a634-c92b07c9e566") pipeline: Optional[int] = Field(None, example=1) pages: Optional[List[PageSchema]] = Field(None) validated: Optional[Set[int]] = Field(None, ge=1, example={1, 2, 10}) - failed_validation_pages: Optional[Set[int]] = Field( - None, ge=1, example={3, 4} - ) + failed_validation_pages: Optional[Set[int]] = Field(None, ge=1, example={3, 4}) similar_revisions: Optional[List[RevisionLink]] = Field(None) categories: Optional[Set[str]] = Field(None, example=["1", "2"]) links_json: Optional[List[dict]] = Field(None, example={}) @@ -162,12 +148,8 @@ def pages_for_save_check(cls, values): class AnnotatedDocSchema(BaseModel): - revision: str = Field( - ..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69" - ) - user: Optional[UUID] = Field( - ..., example="0b0ea570-e4e8-4664-84ac-dd1122471fc5" - ) + revision: str = Field(..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69") + user: Optional[UUID] = Field(..., example="0b0ea570-e4e8-4664-84ac-dd1122471fc5") pipeline: Optional[int] = Field(..., example=1) date: datetime = Field(..., example="2021-10-19 01:01:01") file_id: int = Field(..., example=1) diff --git a/annotation/annotation/schemas/categories.py b/annotation/annotation/schemas/categories.py index edd68ea39..0e5d37313 100644 --- a/annotation/annotation/schemas/categories.py +++ b/annotation/annotation/schemas/categories.py @@ -1,9 +1,8 @@ from enum import Enum from typing import List, Optional -from pydantic import BaseModel, Field, validator - from annotation.errors import CheckFieldError +from pydantic import BaseModel, Field, validator class CategoryTypeSchema(str, Enum): @@ -62,9 +61,7 @@ class SubCategoriesOutSchema(BaseModel): class CategoryORMSchema(CategoryInputSchema): - metadata: Optional[dict] = Field( - None, example={"color": "blue"}, alias="metadata_" - ) + metadata: Optional[dict] = Field(None, example={"color": "blue"}, alias="metadata_") class Config: orm_mode = True diff --git a/annotation/annotation/schemas/jobs.py b/annotation/annotation/schemas/jobs.py index b6aa939af..03342393c 100644 --- a/annotation/annotation/schemas/jobs.py +++ b/annotation/annotation/schemas/jobs.py @@ -65,9 +65,7 @@ class JobInfoSchema(BaseModel): is_auto_distribution: bool = Field(default=False, example=False) categories: Set[str] = Field(..., example={"1", "2"}) deadline: Optional[datetime] = Field(None, example="2021-10-19 01:01:01") - job_type: JobTypeEnumSchema = Field( - ..., example=JobTypeEnumSchema.ExtractionJob - ) + job_type: JobTypeEnumSchema = Field(..., example=JobTypeEnumSchema.ExtractionJob) extensive_coverage: int = Field( 1, example=1, @@ -80,12 +78,9 @@ def check_files_and_datasets(cls, values): """ files, datasets = values.get("files"), values.get("datasets") job_type = values.get("job_type") - if ( - not files and not datasets - ) and job_type != JobTypeEnumSchema.ImportJob: + if (not files and not datasets) and job_type != JobTypeEnumSchema.ImportJob: raise ValueError( - "Fields files and datasets should " - "not be empty at the same time." + "Fields files and datasets should " "not be empty at the same time." ) return values @@ -163,9 +158,7 @@ def check_categories(cls, values): job_type = values.get("job_type") categories = values.get("categories") if job_type != JobTypeEnumSchema.ImportJob and not categories: - raise ValueError( - "There should be not less than one category provided" - ) + raise ValueError("There should be not less than one category provided") return values @@ -203,9 +196,7 @@ class FileStatusEnumSchema(str, Enum): class FileInfoSchema(BaseModel): id: int = Field(..., example=1) - status: FileStatusEnumSchema = Field( - ..., example=FileStatusEnumSchema.pending - ) + status: FileStatusEnumSchema = Field(..., example=FileStatusEnumSchema.pending) class JobFilesInfoSchema(BaseModel): diff --git a/annotation/annotation/schemas/tasks.py b/annotation/annotation/schemas/tasks.py index 5bfd7d9a3..b13a22fcd 100644 --- a/annotation/annotation/schemas/tasks.py +++ b/annotation/annotation/schemas/tasks.py @@ -33,18 +33,14 @@ class TaskStatusSchema(BaseModel): class ManualAnnotationTaskInSchema(BaseModel): file_id: int = Field(..., example=2) - pages: Set[int] = Field( - ..., ge=1, min_items=1, example={1, 2, 3} - ) # type: ignore + pages: Set[int] = Field(..., ge=1, min_items=1, example={1, 2, 3}) # type: ignore job_id: int = Field(..., example=3) user_id: UUID = Field(..., example="4e9c5839-f63b-49c8-b918-614b87813e53") is_validation: bool = Field(default=False, example=False) deadline: Optional[datetime] = Field(None, example="2021-10-19 01:01:01") -class ManualAnnotationTaskSchema( - ManualAnnotationTaskInSchema, TaskStatusSchema -): +class ManualAnnotationTaskSchema(ManualAnnotationTaskInSchema, TaskStatusSchema): class Config: orm_mode = True @@ -60,9 +56,7 @@ class UserSchema(BaseModel): class ExpandedManualAnnotationTaskSchema(TaskStatusSchema): - pages: Set[int] = Field( - ..., ge=1, min_items=1, example={1, 2, 3} - ) # type: ignore + pages: Set[int] = Field(..., ge=1, min_items=1, example={1, 2, 3}) # type: ignore user: UserSchema is_validation: bool = Field(default=False, example=False) deadline: Optional[datetime] = Field(None, example="2021-10-19 01:01:01") @@ -93,8 +87,7 @@ def both_fields_not_empty_check(cls, values): files, datasets = values.get("files"), values.get("datasets") if not files and not datasets: raise ValueError( - "Fields files and datasets should " - "not be empty at the same time." + "Fields files and datasets should " "not be empty at the same time." ) return values @@ -117,9 +110,7 @@ class ValidationEndSchema(BaseModel): class TaskPatchSchema(BaseModel): file_id: Optional[int] = Field(None, example=2) - pages: Optional[Set[int]] = Field( - None, ge=1, min_items=1, example={1, 2, 3} - ) + pages: Optional[Set[int]] = Field(None, ge=1, min_items=1, example={1, 2, 3}) job_id: Optional[int] = Field(None, example=3) user_id: Optional[UUID] = Field( None, example="4e9c5839-f63b-49c8-b918-614b87813e53" @@ -145,9 +136,7 @@ class Config: class AgreementScoreServiceInput(BaseModel): - annotator_id: UUID = Field( - ..., example="f0474853-f733-41c0-b897-90b788b822e3" - ) + annotator_id: UUID = Field(..., example="f0474853-f733-41c0-b897-90b788b822e3") job_id: int = Field(..., example=1) task_id: int = Field(..., example=1) s3_file_path: str = Field(..., example="files/1/1.pdf") @@ -157,9 +146,7 @@ class AgreementScoreServiceInput(BaseModel): class ExportTaskStatsInput(BaseModel): - user_ids: List[UUID] = Field( - ..., example=["e20af190-0f05-4cd8-ad51-811bfb19ad71"] - ) + user_ids: List[UUID] = Field(..., example=["e20af190-0f05-4cd8-ad51-811bfb19ad71"]) date_from: datetime = Field(..., example="2020-12-20 01:01:01") date_to: Optional[datetime] = Field(None, example="2025-12-20 01:01:01") @@ -170,9 +157,7 @@ class ResponseScore(BaseModel): class AgreementScoreServiceResponse(BaseModel): - annotator_id: UUID = Field( - ..., example="f0474853-f733-41c0-b897-90b788b822e3" - ) + annotator_id: UUID = Field(..., example="f0474853-f733-41c0-b897-90b788b822e3") job_id: int = Field(..., example=1) task_id: int = Field(..., example=1) agreement_score: List[ResponseScore] = Field(...) diff --git a/annotation/annotation/tasks/resources.py b/annotation/annotation/tasks/resources.py index 497fdf1f7..067f11d22 100644 --- a/annotation/annotation/tasks/resources.py +++ b/annotation/annotation/tasks/resources.py @@ -5,25 +5,6 @@ from uuid import UUID import dotenv -from fastapi import ( - APIRouter, - Body, - Depends, - Header, - HTTPException, - Path, - Query, - Response, - status, -) -from fastapi.responses import JSONResponse, StreamingResponse -from filter_lib import Page -from sqlalchemy import and_, not_ -from sqlalchemy.exc import IntegrityError, SQLAlchemyError -from sqlalchemy.orm import Session -from sqlalchemy_filters.exceptions import BadFilterFormat -from tenant_dependency import TenantData - from annotation.annotations import accumulate_pages_info, row_to_dict from annotation.database import get_db from annotation.filters import TaskFilter @@ -38,7 +19,9 @@ update_user_overall_load, ) from annotation.logger import Logger -from annotation.microservice_communication.assets_communication import get_file_names +from annotation.microservice_communication.assets_communication import ( + get_file_names, +) from annotation.microservice_communication.jobs_communication import ( JobUpdateException, update_job_status, @@ -75,6 +58,24 @@ create_validation_tasks, ) from annotation.token_dependency import TOKEN +from fastapi import ( + APIRouter, + Body, + Depends, + Header, + HTTPException, + Path, + Query, + Response, + status, +) +from fastapi.responses import JSONResponse, StreamingResponse +from filter_lib import Page +from sqlalchemy import and_, not_ +from sqlalchemy.exc import IntegrityError, SQLAlchemyError +from sqlalchemy.orm import Session +from sqlalchemy_filters.exceptions import BadFilterFormat +from tenant_dependency import TenantData from ..models import File, Job, ManualAnnotationTask from .services import ( @@ -123,8 +124,7 @@ def _prepare_expanded_tasks_response( user_logins = get_user_logins(tasks, tenant, token) except GetUserInfoAccessDenied: Logger.info( - "Trying to get users logins with non-admin jwt. " - "Getting empty dict" + "Trying to get users logins with non-admin jwt. " "Getting empty dict" ) user_logins = {} @@ -298,9 +298,7 @@ def get_task( if not annotation_task: return JSONResponse( status_code=404, - content={ - "detail": "Task with id {0} was not found.".format(task_id) - }, + content={"detail": "Task with id {0} was not found.".format(task_id)}, ) annotation_task = _prepare_expanded_tasks_response( db, @@ -316,14 +314,11 @@ def get_task( @router.get( "", status_code=status.HTTP_200_OK, - response_model=Dict[ - str, Union[int, List[ExpandedManualAnnotationTaskSchema]] - ], + response_model=Dict[str, Union[int, List[ExpandedManualAnnotationTaskSchema]]], responses={ 404: {"model": NotFoundErrorSchema}, }, - summary="Get a list of manual annotation tasks based " - "on search parameters.", + summary="Get a list of manual annotation tasks based " "on search parameters.", ) def get_tasks( file_id: Optional[int] = Query(None, example=5), @@ -332,9 +327,7 @@ def get_tasks( None, example="2016a913-47f2-417d-afdb-032165b9330d" ), deadline: Optional[datetime] = Query(None, example="2021-10-19 01:01:01"), - task_status: Optional[str] = Query( - None, example=TaskStatusEnumSchema.ready - ), + task_status: Optional[str] = Query(None, example=TaskStatusEnumSchema.ready), pagination_page_size: Optional[int] = Query(50, gt=0, le=100, example=25), pagination_start_page: Optional[int] = Query(1, gt=0, example=1), db: Session = Depends(get_db), @@ -487,8 +480,7 @@ def update_task( if task.status != TaskStatusEnumSchema.pending: raise HTTPException( status_code=400, - detail="Error: only tasks in 'Pending' status could " - "be updated", + detail="Error: only tasks in 'Pending' status could " "be updated", ) task_info_dict = row_to_dict(task) @@ -526,9 +518,7 @@ def update_task( if old_task_file: recalculate_file_pages(db, old_task_file) - if not ( - old_task_job_id == task.job_id and old_task_file_id == task.file_id - ): + if not (old_task_job_id == task.job_id and old_task_file_id == task.file_id): update_files(db, [row_to_dict(task)], task.job_id) db.flush() @@ -785,9 +775,7 @@ def finish_task( # if there is user for annotation # param will be True, otherwise False - annotation_user = bool( - validation_info.annotation_user_for_failed_pages is not None - ) + annotation_user = bool(validation_info.annotation_user_for_failed_pages is not None) # if there is user for validation # param will be True, otherwise False @@ -986,9 +974,7 @@ def finish_task( status_code=500, detail=f"Error: connection error ({exc.exc_info})", ) - update_inner_job_status( - db, task.job_id, JobStatusEnumSchema.finished - ) + update_inner_job_status(db, task.job_id, JobStatusEnumSchema.finished) # store metrics in db save_agreement_metrics(db=db, scores=compared_score) diff --git a/annotation/annotation/tasks/services.py b/annotation/annotation/tasks/services.py index 8b0744ff2..7a6c84443 100644 --- a/annotation/annotation/tasks/services.py +++ b/annotation/annotation/tasks/services.py @@ -6,12 +6,6 @@ import dotenv import pydantic -from fastapi import HTTPException -from filter_lib import Page, form_query, map_request_to_filter, paginate -from sqlalchemy import and_, asc, text -from sqlalchemy.orm import Session -from tenant_dependency import TenantData - from annotation.errors import CheckFieldError, FieldConstraintError from annotation.filters import TaskFilter from annotation.jobs import update_files, update_user_overall_load @@ -40,6 +34,11 @@ TaskStatusEnumSchema, ValidationSchema, ) +from fastapi import HTTPException +from filter_lib import Page, form_query, map_request_to_filter, paginate +from sqlalchemy import and_, asc, text +from sqlalchemy.orm import Session +from tenant_dependency import TenantData dotenv.load_dotenv(dotenv.find_dotenv()) AGREEMENT_SCORE_MIN_MATCH = float(os.getenv("AGREEMENT_SCORE_MIN_MATCH")) @@ -68,10 +67,7 @@ def validate_users_info( for new/updated task. Raises FieldConstraintError in case of any validation fails. """ - if ( - validation_type == ValidationSchema.cross - and task_info["is_validation"] - ): + if validation_type == ValidationSchema.cross and task_info["is_validation"]: check_cross_annotating_pages(db, task_info) if task_info["is_validation"]: job_task_validator = ( @@ -80,9 +76,7 @@ def validate_users_info( if validation_type == ValidationSchema.cross else association_job_validator ) - .filter_by( - user_id=task_info["user_id"], job_id=task_info["job_id"] - ) + .filter_by(user_id=task_info["user_id"], job_id=task_info["job_id"]) .first() ) if not job_task_validator: @@ -93,9 +87,7 @@ def validate_users_info( else: job_task_annotator = ( db.query(association_job_annotator) - .filter_by( - user_id=task_info["user_id"], job_id=task_info["job_id"] - ) + .filter_by(user_id=task_info["user_id"], job_id=task_info["job_id"]) .first() ) if not job_task_annotator: @@ -208,9 +200,7 @@ def validate_user_actions( ) -def create_annotation_task( - db: Session, annotation_task: ManualAnnotationTaskInSchema -): +def create_annotation_task(db: Session, annotation_task: ManualAnnotationTaskInSchema): annotation_task = ManualAnnotationTask(**annotation_task.dict()) db.add(annotation_task) @@ -255,9 +245,7 @@ def filter_tasks_db( filter_query = db.query(ManualAnnotationTask).filter( ManualAnnotationTask.jobs.has(tenant=tenant) ) - filter_args = map_request_to_filter( - request.dict(), ManualAnnotationTask.__name__ - ) + filter_args = map_request_to_filter(request.dict(), ManualAnnotationTask.__name__) task_query, pagination = form_query(filter_args, filter_query) return paginate(task_query.all(), pagination) @@ -314,9 +302,7 @@ def finish_validation_task(db: Session, task: ManualAnnotationTask) -> None: ManualAnnotationTask.file_id == task.file_id, ManualAnnotationTask.is_validation.is_(True), ).with_for_update().update( - { - ManualAnnotationTask.status: TaskStatusEnumSchema.finished # noqa: E501 - }, + {ManualAnnotationTask.status: TaskStatusEnumSchema.finished}, # noqa: E501 synchronize_session="fetch", ) db.commit() @@ -361,13 +347,9 @@ def get_task_revisions( if int(key) in task_pages } revision.failed_validation_pages = [ - page - for page in revision.failed_validation_pages - if page in task_pages - ] - revision.validated = [ - page for page in revision.validated if page in task_pages + page for page in revision.failed_validation_pages if page in task_pages ] + revision.validated = [page for page in revision.validated if page in task_pages] return [ revision @@ -382,9 +364,7 @@ def get_task_revisions( ] -def get_task_info( - db: Session, task_id: int, tenant: str -) -> ManualAnnotationTask: +def get_task_info(db: Session, task_id: int, tenant: str) -> ManualAnnotationTask: return ( db.query(ManualAnnotationTask) .filter( @@ -418,9 +398,7 @@ def unblock_validation_tasks( ManualAnnotationTask.pages.contained_by(annotated_file_pages), ) ) - .update( - {"status": TaskStatusEnumSchema.ready}, synchronize_session=False - ) + .update({"status": TaskStatusEnumSchema.ready}, synchronize_session=False) ) @@ -448,9 +426,7 @@ def add_task_stats_record( stats_db.updated = datetime.utcnow() else: if stats.event_type == "closed": - raise CheckFieldError( - "Attribute event_type can not start from closed." - ) + raise CheckFieldError("Attribute event_type can not start from closed.") stats_db = AnnotationStatistics(task_id=task_id, **stats.dict()) db.add(stats_db) @@ -487,9 +463,7 @@ def create_export_csv( "file_id": stat.task.file_id, "pages": stat.task.pages, "time_start": stat.created.isoformat(), - "time_finish": ( - stat.updated.isoformat() if stat.updated else None - ), + "time_finish": (stat.updated.isoformat() if stat.updated else None), "agreement_score": [ { "task_from": metric.task_from, @@ -564,9 +538,7 @@ def evaluate_agreement_score( ) for task_in in tasks_intersection_pages ] - agreement_scores: List[ - AgreementScoreServiceResponse - ] = get_agreement_score( + agreement_scores: List[AgreementScoreServiceResponse] = get_agreement_score( agreement_scores_input=agreement_scores_input, tenant=tenant, token=token.token, @@ -617,9 +589,7 @@ def compare_agreement_scores( get_unique_scores(task_from_id, scores, unique_scores) # check is every annotator reached min match score and return result - agreement_reached: bool = all( - map(lambda a: a.score >= min_match, unique_scores) - ) + agreement_reached: bool = all(map(lambda a: a.score >= min_match, unique_scores)) metrics: List[TaskMetric] = list( sorted( map( @@ -638,9 +608,7 @@ def compare_agreement_scores( ) -def save_agreement_metrics( - db: Session, scores: AgreementScoreComparingResult -) -> None: +def save_agreement_metrics(db: Session, scores: AgreementScoreComparingResult) -> None: metrics: List[AgreementMetrics] = [ AgreementMetrics( task_from=el.task_from_id, diff --git a/annotation/annotation/tasks/validation.py b/annotation/annotation/tasks/validation.py index c04082c53..a77d78e91 100644 --- a/annotation/annotation/tasks/validation.py +++ b/annotation/annotation/tasks/validation.py @@ -3,10 +3,6 @@ from typing import Dict, List, Optional, Set, Union from uuid import UUID -from fastapi import HTTPException -from sqlalchemy import and_, asc, null, or_ -from sqlalchemy.orm import Session - from annotation.distribution import prepare_response from annotation.microservice_communication.assets_communication import ( FilesForDistribution, @@ -17,6 +13,9 @@ TaskStatusEnumSchema, ValidationSchema, ) +from fastapi import HTTPException +from sqlalchemy import and_, asc, null, or_ +from sqlalchemy.orm import Session from .services import create_tasks @@ -117,9 +116,7 @@ def create_tasks_initial_users( Create validation tasks with 'pending' status automatically. """ # revisions for job_id and file_id, made by annotators - annotators_revisions = get_annotators_revisions( - db, file_id, job.job_id, task_id - ) + annotators_revisions = get_annotators_revisions(db, file_id, job.job_id, task_id) # find annotators, who made annotation for each page initial_annotators = find_initial_annotators(annotators_revisions, failed) # create tasks for annotation with 'ready' status @@ -157,12 +154,8 @@ def create_annotation_tasks_specific_user( Create validation tasks with 'pending' status automatically. """ # check, that string is valid uuid - annotation_user_for_failed_pages = check_uuid( - annotation_user_for_failed_pages - ) - check_user_job_action( - db, annotation_user_for_failed_pages, job.job_id, False - ) + annotation_user_for_failed_pages = check_uuid(annotation_user_for_failed_pages) + check_user_job_action(db, annotation_user_for_failed_pages, job.job_id, False) # create annotation task for specific user with 'ready' status # and tasks for validation with 'pending' status prepare_response( @@ -223,9 +216,7 @@ def _find_annotators_for_failed_pages( for revision in revisions: rev_pages = set(map(int, revision.pages)) # take unique pages - for page in rev_pages.intersection( - pages - ): # take only failed by val pages + for page in rev_pages.intersection(pages): # take only failed by val pages pages_user[page] = revision.user if None in pages_user.values(): @@ -382,9 +373,7 @@ def create_validation_tasks_specific_user( validation_user_for_reannotated_pages ) - check_user_job_action( - db, validation_user_for_reannotated_pages, job.job_id, False - ) + check_user_job_action(db, validation_user_for_reannotated_pages, job.job_id, False) if ( validator_id == validation_user_for_reannotated_pages @@ -444,8 +433,7 @@ def check_user_job_action( if not check_user_job_belonging(db, user_id, job_id, only_owner=True): raise HTTPException( status_code=400, - detail="Only owner may not request " - "validation of edited pages.", + detail="Only owner may not request " "validation of edited pages.", ) else: if not check_user_job_belonging(db, user_id, job_id, only_owner=False): @@ -472,12 +460,8 @@ def check_user_job_belonging( if not only_owner: filters.extend( [ - and_( - Job.annotators.any(user_id=user_id), Job.job_id == job_id - ), - and_( - Job.validators.any(user_id=user_id), Job.job_id == job_id - ), + and_(Job.annotators.any(user_id=user_id), Job.job_id == job_id), + and_(Job.validators.any(user_id=user_id), Job.job_id == job_id), ] ) return bool(db.query(User).filter(or_(*filters)).first()) diff --git a/annotation/annotation/utils.py b/annotation/annotation/utils.py index 2251e4f75..ea1853ede 100644 --- a/annotation/annotation/utils.py +++ b/annotation/annotation/utils.py @@ -7,6 +7,6 @@ def get_test_db_url(main_db_url: str) -> str: postgresql+psycopg2://admin:admin@host:5432/test_db """ main_db_url_split = main_db_url.split("/") - main_db_url_split[-1] = 'test_db' + main_db_url_split[-1] = "test_db" result = "/".join(main_db_url_split) return result diff --git a/annotation/documentation/update_docs.py b/annotation/documentation/update_docs.py index d49ee5071..9f8c70153 100644 --- a/annotation/documentation/update_docs.py +++ b/annotation/documentation/update_docs.py @@ -1,13 +1,10 @@ import yaml - from app.main import app def str_presenter(dumper, data): if "\n" in data: - return dumper.represent_scalar( - "tag:yaml.org,2002:str", data, style="|" - ) + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") return dumper.represent_scalar("tag:yaml.org,2002:str", data) diff --git a/annotation/tests/conftest.py b/annotation/tests/conftest.py index 4e171e140..7c0c4cf6d 100644 --- a/annotation/tests/conftest.py +++ b/annotation/tests/conftest.py @@ -253,9 +253,7 @@ def use_temp_env_var(): @pytest.fixture(scope="module") def db_session(setup_test_db): - session_local = sessionmaker( - autocommit=False, autoflush=False, bind=engine - ) + session_local = sessionmaker(autocommit=False, autoflush=False, bind=engine) session = session_local() yield session @@ -451,9 +449,7 @@ def prepare_moto_s3_for_get_revisions(): @pytest.fixture(name="expected_latest_revisions", scope="module") def load_expected_latest_revisions(): - with open( - "tests/fixtures/expected_latest_revisions.json", "r" - ) as json_file: + with open("tests/fixtures/expected_latest_revisions.json", "r") as json_file: json_data = json.load(json_file) return json_data @@ -592,16 +588,12 @@ def prepare_db_for_finish_task_check_deleted_annotators(db_session): @pytest.fixture def prepare_db_categories_same_names(db_session): - category_tenant = Category( - id="1", tenant=TEST_TENANT, name="Title", type="box" - ) + category_tenant = Category(id="1", tenant=TEST_TENANT, name="Title", type="box") category_common = Category(id="2", tenant=None, name="Table", type="box") category_other_tenant = Category( id="3", tenant="other_tenant", name="Title", type="box" ) - add_objects( - db_session, (category_tenant, category_common, category_other_tenant) - ) + add_objects(db_session, (category_tenant, category_common, category_other_tenant)) yield db_session @@ -610,9 +602,7 @@ def prepare_db_categories_same_names(db_session): @pytest.fixture def prepare_db_categories_different_names(db_session): - category_tenant = Category( - id="1", tenant=TEST_TENANT, name="Title", type="box" - ) + category_tenant = Category(id="1", tenant=TEST_TENANT, name="Title", type="box") category_common = Category(id="2", tenant=None, name="Table", type="box") category_other_tenant = Category( id="3", tenant="other_tenant", name="Header", type="box" @@ -739,10 +729,7 @@ def prepare_db_for_cr_task(db_session): @pytest.fixture(scope="module") def prepare_db_update_stats(prepare_db_for_cr_task): for task_id in [ - id_ - for (id_,) in prepare_db_for_cr_task.query( - ManualAnnotationTask.id - ).all() + id_ for (id_,) in prepare_db_for_cr_task.query(ManualAnnotationTask.id).all() ]: add_task_stats_record( db=prepare_db_for_cr_task, @@ -757,10 +744,7 @@ def prepare_db_update_stats(prepare_db_for_cr_task): @pytest.fixture(scope="module") def prepare_db_update_stats_already_updated(prepare_db_update_stats): for task_id in [ - id_ - for (id_,) in prepare_db_update_stats.query( - ManualAnnotationTask.id - ).all() + id_ for (id_,) in prepare_db_update_stats.query(ManualAnnotationTask.id).all() ]: add_task_stats_record( db=prepare_db_update_stats, @@ -868,9 +852,7 @@ def prepare_db_for_get_next_task(db_session): @pytest.fixture(scope="function") def prepare_db_for_batch_delete_tasks(db_session): add_objects(db_session, [DELETE_BATCH_TASKS_JOB]) - add_objects( - db_session, (DELETE_BATCH_TASKS_FILE, DELETE_BATCH_TASKS_ANNOTATOR) - ) + add_objects(db_session, (DELETE_BATCH_TASKS_FILE, DELETE_BATCH_TASKS_ANNOTATOR)) db_session.bulk_insert_mappings(ManualAnnotationTask, DIFF_STATUSES_TASKS) db_session.commit() @@ -885,10 +867,7 @@ def minio_particular_revision(): s3_resource = boto3.resource("s3", region_name=DEFAULT_REGION) s3_resource.create_bucket(Bucket=TEST_TENANT) - path = ( - f"{S3_START_PATH}/{PART_REV_DOC.job_id}/" - f"{PART_REV_DOC.file_id}/" - ) + path = f"{S3_START_PATH}/{PART_REV_DOC.job_id}/" f"{PART_REV_DOC.file_id}/" s3_resource.Bucket(TEST_TENANT).put_object( Body=json.dumps(PART_REV_PAGES[0]), @@ -1104,9 +1083,7 @@ def db_get_unassigned_files(db_session): @pytest.fixture def db_validation_end(db_session): add_objects(db_session, [validation.JOBS[0]]) - add_objects( - db_session, validation.FILES + validation.TASKS + validation.DOCS - ) + add_objects(db_session, validation.FILES + validation.TASKS + validation.DOCS) update_annotators_overall_load(db_session, validation.ANNOTATORS) yield db_session @@ -1127,9 +1104,9 @@ def prepare_db_find_annotators_for_failed_pages(db_validation_end): ManualAnnotationTask.user_id == annotator_for_delete ).delete(synchronize_session=False) db_validation_end.commit() - db_validation_end.query(User).filter( - User.user_id == annotator_for_delete - ).delete(synchronize_session=False) + db_validation_end.query(User).filter(User.user_id == annotator_for_delete).delete( + synchronize_session=False + ) db_validation_end.commit() yield db_validation_end diff --git a/annotation/tests/test_annotators_overall_load.py b/annotation/tests/test_annotators_overall_load.py index 972e14d77..1ecb0b22d 100644 --- a/annotation/tests/test_annotators_overall_load.py +++ b/annotation/tests/test_annotators_overall_load.py @@ -96,9 +96,7 @@ annotators=[user for user in OVERALL_LOAD_USERS[:3]], validation_type=ValidationSchema.cross, is_auto_distribution=False, - categories=[ - Category(id="123", name="Title", type=CategoryTypeSchema.box) - ], + categories=[Category(id="123", name="Title", type=CategoryTypeSchema.box)], deadline="2021-10-19T01:01:01", tenant=TEST_TENANT, ), @@ -111,9 +109,7 @@ validation_type=ValidationSchema.hierarchical, files=[TASK_FILES_OVERALL_LOAD[0]], is_auto_distribution=False, - categories=[ - Category(id="125", name="Paragraph", type=CategoryTypeSchema.box) - ], + categories=[Category(id="125", name="Paragraph", type=CategoryTypeSchema.box)], deadline="2021-10-19T01:01:01", tenant=TEST_TENANT, ), @@ -125,9 +121,7 @@ validation_type=ValidationSchema.cross, files=[TASK_FILES_OVERALL_LOAD[2]], is_auto_distribution=False, - categories=[ - Category(id="126", name="Abstract", type=CategoryTypeSchema.box) - ], + categories=[Category(id="126", name="Abstract", type=CategoryTypeSchema.box)], deadline="2021-10-19T01:01:01", tenant=TEST_TENANT, ), # job for task distribution for particular job @@ -139,9 +133,7 @@ validation_type=ValidationSchema.hierarchical, files=[TASK_FILES_OVERALL_LOAD[5]], is_auto_distribution=False, - categories=[ - Category(id="127", name="Abstract", type=CategoryTypeSchema.box) - ], + categories=[Category(id="127", name="Abstract", type=CategoryTypeSchema.box)], deadline="2021-10-19T01:01:01", tenant=TEST_TENANT, ), @@ -153,9 +145,7 @@ owners=[OVERALL_LOAD_USERS[14]], validation_type=ValidationSchema.hierarchical, is_auto_distribution=True, - categories=[ - Category(id="128", name="Abstract", type=CategoryTypeSchema.box) - ], + categories=[Category(id="128", name="Abstract", type=CategoryTypeSchema.box)], deadline="2022-10-19T01:01:01", tenant=TEST_TENANT, status=JobStatusEnumSchema.in_progress, @@ -374,9 +364,7 @@ def test_overall_load_after_update_task( ) assert response.status_code == 200 - for user_id, expected_overall_load in zip( - users_id, expected_overall_loads - ): + for user_id, expected_overall_load in zip(users_id, expected_overall_loads): user = prepare_db_for_overall_load.query(User).get(user_id) assert user.overall_load == expected_overall_load @@ -407,13 +395,9 @@ def test_overall_load_after_delete_batch_tasks(prepare_db_for_overall_load): OVERALL_LOAD_CREATED_TASKS[5].user_id, ] expected_overall_loads = [4, 0] - response = client.delete( - CRUD_TASKS_PATH, json=[4, 6], headers=TEST_HEADERS - ) + response = client.delete(CRUD_TASKS_PATH, json=[4, 6], headers=TEST_HEADERS) assert response.status_code == 204 - for user_id, expected_overall_load in zip( - user_ids, expected_overall_loads - ): + for user_id, expected_overall_load in zip(user_ids, expected_overall_loads): user = prepare_db_for_overall_load.query(User).get(user_id) assert user.overall_load == expected_overall_load @@ -430,11 +414,7 @@ def test_overall_load_after_delete_batch_tasks(prepare_db_for_overall_load): ), ( # validator with pages for reannotation 7, - { - "annotation_user_for_failed_pages": OVERALL_LOAD_USERS[ - 4 - ].user_id - }, + {"annotation_user_for_failed_pages": OVERALL_LOAD_USERS[4].user_id}, [OVERALL_LOAD_USERS[5].user_id, OVERALL_LOAD_USERS[4].user_id], [1, 6], ), @@ -453,29 +433,22 @@ def test_overall_load_after_finish_task( headers=TEST_HEADERS, ) assert response.status_code == 200 - for user_id, expected_overall_load in zip( - users_id, expected_overall_loads - ): + for user_id, expected_overall_load in zip(users_id, expected_overall_loads): user = prepare_db_for_overall_load.query(User).get(user_id) assert user.overall_load == expected_overall_load @mark.integration -def test_overall_load_after_distribution( - monkeypatch, prepare_db_for_overall_load -): +def test_overall_load_after_distribution(monkeypatch, prepare_db_for_overall_load): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication." - "get_response", + "annotation.microservice_communication.assets_communication." "get_response", Mock(return_value=[{"id": 3, "pages": 4}]), ) response = client.post( "/distribution", json=OVERALL_LOAD_NEW_TASKS[2], headers=TEST_HEADERS ) assert response.status_code == 201 - user = prepare_db_for_overall_load.query(User).get( - OVERALL_LOAD_USERS[6].user_id - ) + user = prepare_db_for_overall_load.query(User).get(OVERALL_LOAD_USERS[6].user_id) assert user.overall_load == 4 diff --git a/annotation/tests/test_assets_communication.py b/annotation/tests/test_assets_communication.py index fa51d52c9..f9101dfe5 100644 --- a/annotation/tests/test_assets_communication.py +++ b/annotation/tests/test_assets_communication.py @@ -122,12 +122,9 @@ (FILE_IDS, [], {}), ], ) -def test_get_file_names( - monkeypatch, file_ids, parsed_response, expected_result -): +def test_get_file_names(monkeypatch, file_ids, parsed_response, expected_result): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication." - "get_response", + "annotation.microservice_communication.assets_communication." "get_response", Mock(return_value=parsed_response), ) @@ -216,8 +213,7 @@ def test_get_files_info( expected_result, ): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication." - "get_response", + "annotation.microservice_communication.assets_communication." "get_response", Mock(return_value=mocked_files), ) for i, dataset_id in enumerate(dataset_ids): @@ -228,9 +224,7 @@ def test_get_files_info( headers=TEST_HEADERS, status=200, ) - actual_result = get_files_info( - file_ids, dataset_ids, TEST_TENANT, TEST_TOKEN - ) + actual_result = get_files_info(file_ids, dataset_ids, TEST_TENANT, TEST_TOKEN) assert actual_result == expected_result diff --git a/annotation/tests/test_category_crud.py b/annotation/tests/test_category_crud.py index b47c98bdb..64f1ad4d8 100644 --- a/annotation/tests/test_category_crud.py +++ b/annotation/tests/test_category_crud.py @@ -161,14 +161,10 @@ def add_for_cascade_delete( parent_id = request.param session = prepare_db_categories_different_names data_1 = prepare_category_body(name="Title1", parent=parent_id) - response_1 = client.post( - CATEGORIES_PATH, json=data_1, headers=TEST_HEADERS - ) + response_1 = client.post(CATEGORIES_PATH, json=data_1, headers=TEST_HEADERS) cat_id_1 = response_1.json()["id"] data_2 = prepare_category_body(name="Title3", parent=cat_id_1) - response_2 = client.post( - CATEGORIES_PATH, json=data_2, headers=TEST_HEADERS - ) + response_2 = client.post(CATEGORIES_PATH, json=data_2, headers=TEST_HEADERS) cat_id_2 = response_2.json()["id"] common_cat = session.query(Category).get("2") session.delete(common_cat) @@ -211,9 +207,7 @@ def test_add_unique_name(prepare_db_categories_different_names, category_name): data = prepare_category_body(name=category_name) response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) assert response.status_code == 201 - assert prepare_expected_result(response.text) == prepare_category_response( - data - ) + assert prepare_expected_result(response.text) == prepare_category_response(data) @mark.integration @@ -235,9 +229,7 @@ def test_add_unique_name_custom_fields( data = prepare_category_body(**field_value_pairs) response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) assert response.status_code == 201 - assert prepare_expected_result(response.text) == prepare_category_response( - data - ) + assert prepare_expected_result(response.text) == prepare_category_response(data) @mark.integration @@ -261,9 +253,7 @@ def test_add_wrong_field_types(db_session, wrong_field, wrong_value): "data_attributes": None, wrong_field: wrong_value, # rewrite default value with parametrized } - response = client.post( - CATEGORIES_PATH, json=wrong_body, headers=TEST_HEADERS - ) + response = client.post(CATEGORIES_PATH, json=wrong_body, headers=TEST_HEADERS) assert response.status_code == 422 @@ -338,9 +328,7 @@ def test_add_id_is_generated(prepare_db_categories_different_names): "category_id", ("1Category123", "second_category", "3rd_category"), ) -def test_add_id_numbers_underscore( - category_id, prepare_db_categories_different_names -): +def test_add_id_numbers_underscore(category_id, prepare_db_categories_different_names): data = prepare_category_body(id_=category_id, name=str(uuid.uuid4())) response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) assert response.status_code == 201 @@ -354,9 +342,7 @@ def test_add_id_numbers_underscore( "category_id", ("1st!-category1", "2nd%category", "3rd:.category"), ) -def test_add_id_special_chars( - category_id, prepare_db_categories_different_names -): +def test_add_id_special_chars(category_id, prepare_db_categories_different_names): data = prepare_category_body(id_=category_id, name=str(uuid.uuid4())) response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) assert response.status_code == 400 @@ -372,9 +358,7 @@ def test_add_self_parent(prepare_db_categories_different_names): @mark.integration -@patch( - "annotation.categories.resources.fetch_category_db", side_effect=SQLAlchemyError -) +@patch("annotation.categories.resources.fetch_category_db", side_effect=SQLAlchemyError) def test_get_db_connection_error(prepare_db_categories_same_names): cat_id = 1 response = client.get(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) @@ -388,9 +372,7 @@ def test_get_db_connection_error(prepare_db_categories_same_names): ("3", "100"), # other tenant category and category that doesn't exist ) def test_get_wrong_category(category_id, prepare_db_categories_same_names): - response = client.get( - f"{CATEGORIES_PATH}/{category_id}", headers=TEST_HEADERS - ) + response = client.get(f"{CATEGORIES_PATH}/{category_id}", headers=TEST_HEADERS) assert response.status_code == 404 assert f"Category with id: {category_id} doesn't exist" in response.text @@ -407,13 +389,9 @@ def test_get_allowed_category( category_id, category_name, prepare_db_categories_same_names ): data = prepare_category_body(name=category_name) - response = client.get( - f"{CATEGORIES_PATH}/{category_id}", headers=TEST_HEADERS - ) + response = client.get(f"{CATEGORIES_PATH}/{category_id}", headers=TEST_HEADERS) assert response.status_code == 200 - assert prepare_expected_result(response.text) == prepare_category_response( - data - ) + assert prepare_expected_result(response.text) == prepare_category_response(data) @mark.integration @@ -430,9 +408,7 @@ def test_get_no_tenant_specified(prepare_db_categories_same_names): ) def test_search_db_connection_error(prepare_db_categories_for_filtration): data = prepare_filtration_body() - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) assert response.status_code == 500 assert "Error: connection error" in response.text @@ -448,9 +424,7 @@ def test_search_pagination( data = prepare_filtration_body( page_num=page_num, page_size=page_size, no_filtration=True ) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) categories = response.json()["data"] pagination = response.json()["pagination"] assert response.status_code == 200 @@ -463,9 +437,7 @@ def test_search_pagination( @mark.integration def test_search_no_filtration(prepare_db_categories_for_filtration): data = prepare_filtration_body(page_size=30, no_filtration=True) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) categories = response.json()["data"] assert response.status_code == 200 assert len(categories) == 16 @@ -476,13 +448,9 @@ def test_search_no_filtration(prepare_db_categories_for_filtration): "category_id", ("2", "100"), # other tenant category and category that doesn't exist ) -def test_search_wrong_category( - category_id, prepare_db_categories_for_filtration -): +def test_search_wrong_category(category_id, prepare_db_categories_for_filtration): data = prepare_filtration_body(value=category_id) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) categories = response.json()["data"] total = response.json()["pagination"]["total"] assert response.status_code == 200 @@ -505,9 +473,7 @@ def test_search_allowed_categories( ): expected = prepare_category_body(name=category_name) data = prepare_filtration_body(value=category_id) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) category = response.json()["data"][0] assert response.status_code == 200 @@ -526,9 +492,7 @@ def test_search_filter_gt_lt( operator, value, expected, prepare_db_categories_for_filtration ): data = prepare_filtration_body(operator=operator, value=value) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) categories = response.json()["data"] assert response.status_code == 200 assert len(categories) == expected @@ -542,12 +506,8 @@ def test_search_filter_gt_lt( def test_search_filter_name_like( operator, value, expected, prepare_db_categories_for_filtration ): - data = prepare_filtration_body( - field="name", operator=operator, value=value - ) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + data = prepare_filtration_body(field="name", operator=operator, value=value) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) categories = response.json()["data"] assert response.status_code == 200 assert len(categories) == expected @@ -558,12 +518,8 @@ def test_search_filter_name_like( def test_search_filter_ordering( direction, expected, prepare_db_categories_for_filtration ): - data = prepare_filtration_body( - operator="lt", value="5", direction=direction - ) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + data = prepare_filtration_body(operator="lt", value="5", direction=direction) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) categories = response.json()["data"][0] assert response.status_code == 200 assert categories["id"] == expected @@ -571,12 +527,8 @@ def test_search_filter_ordering( @mark.integration def test_search_filter_distinct_id(prepare_db_categories_for_filtration): - data = prepare_filtration_body( - page_size=30, field="id", operator="distinct" - ) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + data = prepare_filtration_body(page_size=30, field="id", operator="distinct") + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) result_data = response.json()["data"] assert response.status_code == 200 assert len(result_data) == 16 @@ -592,14 +544,10 @@ def test_search_two_filters_different_distinct_order( second_operator="is_not_null", sorting_field="type", ) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) first_result_data = response.json()["data"] data = prepare_filtration_body_double_filter(first_operator="is_not_null") - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) second_result_data = response.json()["data"] assert first_result_data == second_result_data @@ -609,9 +557,7 @@ def test_search_two_filters_both_distinct( prepare_db_categories_for_distinct_filtration, ): data = prepare_filtration_body_double_filter() - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) result_data = response.json()["data"] assert response.status_code == 200 assert len(result_data) == 3 @@ -620,12 +566,9 @@ def test_search_two_filters_both_distinct( @mark.integration def test_search_categories_400_error(prepare_db_categories_for_filtration): data = prepare_filtration_body(field="parent", operator="distinct") - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) error_message = ( - "SELECT DISTINCT ON expressions must " - "match initial ORDER BY expressions" + "SELECT DISTINCT ON expressions must " "match initial ORDER BY expressions" ) assert response.status_code == 400 assert error_message in response.text @@ -644,9 +587,7 @@ def test_search_wrong_parameters( wrong_parameter, value, prepare_db_categories_for_filtration ): data = prepare_filtration_body(**{wrong_parameter: value}) - response = client.post( - f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS - ) + response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) assert response.status_code == 422 assert "value is not a valid enumeration member" in response.text @@ -710,9 +651,7 @@ def test_update_category_custom_fields( f"{CATEGORIES_PATH}/{cat_id}", json=data, headers=TEST_HEADERS ) assert response.status_code == 200 - assert prepare_expected_result(response.text) == prepare_category_response( - data - ) + assert prepare_expected_result(response.text) == prepare_category_response(data) @mark.integration @@ -743,9 +682,7 @@ def test_update_other_tenant_exist_name(prepare_db_categories_different_names): f"{CATEGORIES_PATH}/{cat_id}", json=data, headers=TEST_HEADERS ) assert response.status_code == 200 - assert prepare_expected_result(response.text) == prepare_category_response( - data - ) + assert prepare_expected_result(response.text) == prepare_category_response(data) @mark.integration @@ -793,15 +730,11 @@ def test_update_other_tenant_parent(prepare_db_categories_different_names): "category_parent", ("2", "4"), # parent from commons and this tenant other category as parent ) -def test_update_allowed_parent( - category_parent, prepare_db_categories_different_names -): +def test_update_allowed_parent(category_parent, prepare_db_categories_different_names): cat_id = "1" data_add = prepare_category_body(name="Footer") data_add["id"] = category_parent - prepare_db_categories_different_names.merge( - Category(**clean_data_for_db(data_add)) - ) + prepare_db_categories_different_names.merge(Category(**clean_data_for_db(data_add))) prepare_db_categories_different_names.commit() data_update = prepare_category_body(parent=category_parent) response = client.put( @@ -819,9 +752,7 @@ def test_update_allowed_parent( ) def test_delete_db_connection_error(prepare_db_categories_same_names): cat_id = "1" - response = client.delete( - f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS - ) + response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) assert response.status_code == 500 assert "Error: connection error" in response.text @@ -836,9 +767,7 @@ def test_delete_wrong_category( prepare_db_categories_same_names, ): cat_id = "100" - response = client.delete( - f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS - ) + response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) assert response.status_code == 404 assert "Cannot delete category that doesn't exist" in response.text @@ -846,9 +775,7 @@ def test_delete_wrong_category( @mark.integration def test_delete_common_category(prepare_db_categories_same_names): cat_id = "2" - response = client.delete( - f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS - ) + response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) assert response.status_code == 400 assert "Cannot delete default category" in response.text @@ -856,14 +783,10 @@ def test_delete_common_category(prepare_db_categories_same_names): @mark.integration def test_delete_tenant_category(prepare_db_categories_same_names): cat_id = "1" - response = client.delete( - f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS - ) + response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) assert response.status_code == 204 assert ( - client.get( - f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS - ).status_code + client.get(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS).status_code == 404 ) @@ -873,26 +796,18 @@ def test_delete_tenant_category(prepare_db_categories_same_names): def test_cascade_delete_tenant_parent(add_for_cascade_delete): cat_id = "1" child_1, child_2 = add_for_cascade_delete - response = client.delete( - f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS - ) + response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) assert response.status_code == 204 assert ( - client.get( - f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS - ).status_code + client.get(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS).status_code == 404 ) assert ( - client.get( - f"{CATEGORIES_PATH}/{child_1}", headers=TEST_HEADERS - ).status_code + client.get(f"{CATEGORIES_PATH}/{child_1}", headers=TEST_HEADERS).status_code == 404 ) assert ( - client.get( - f"{CATEGORIES_PATH}/{child_2}", headers=TEST_HEADERS - ).status_code + client.get(f"{CATEGORIES_PATH}/{child_2}", headers=TEST_HEADERS).status_code == 404 ) @@ -903,20 +818,14 @@ def test_cascade_delete_common_parent(add_for_cascade_delete): common_id = "2" child_1, child_2 = add_for_cascade_delete assert ( - client.get( - f"{CATEGORIES_PATH}/{common_id}", headers=TEST_HEADERS - ).status_code + client.get(f"{CATEGORIES_PATH}/{common_id}", headers=TEST_HEADERS).status_code == 404 ) assert ( - client.get( - f"{CATEGORIES_PATH}/{child_1}", headers=TEST_HEADERS - ).status_code + client.get(f"{CATEGORIES_PATH}/{child_1}", headers=TEST_HEADERS).status_code == 404 ) assert ( - client.get( - f"{CATEGORIES_PATH}/{child_2}", headers=TEST_HEADERS - ).status_code + client.get(f"{CATEGORIES_PATH}/{child_2}", headers=TEST_HEADERS).status_code == 404 ) diff --git a/annotation/tests/test_cross_validation.py b/annotation/tests/test_cross_validation.py index e52b85cce..270b3d5c0 100644 --- a/annotation/tests/test_cross_validation.py +++ b/annotation/tests/test_cross_validation.py @@ -381,9 +381,7 @@ def test_cross_distribution_small_files( ), ], ) -def test_cross_partial_files( - annotated_files_pages, files, annotators, expected_tasks -): +def test_cross_partial_files(annotated_files_pages, files, annotators, expected_tasks): assert ( distribute_validation_partial_files( annotated_files_pages, diff --git a/annotation/tests/test_delete_batch_tasks.py b/annotation/tests/test_delete_batch_tasks.py index 3a321ab10..9ee017379 100644 --- a/annotation/tests/test_delete_batch_tasks.py +++ b/annotation/tests/test_delete_batch_tasks.py @@ -18,9 +18,7 @@ client = TestClient(app) -DELETE_BATCH_TASKS_ANNOTATOR = User( - user_id="18d3d189-e73a-4680-bfa7-7ba3fe6ebee5" -) +DELETE_BATCH_TASKS_ANNOTATOR = User(user_id="18d3d189-e73a-4680-bfa7-7ba3fe6ebee5") CATEGORIES = [ Category( id="18d3d189e73a4680bfa77ba3fe6ebee5", @@ -125,9 +123,7 @@ def test_delete_batch_tasks_status_codes( prepare_db_for_batch_delete_tasks, tasks_id, job_id, expected_code ): - response = client.delete( - CRUD_TASKS_PATH, json=tasks_id, headers=TEST_HEADERS - ) + response = client.delete(CRUD_TASKS_PATH, json=tasks_id, headers=TEST_HEADERS) assert response.status_code == expected_code check_files_distributed_pages(prepare_db_for_batch_delete_tasks, job_id) diff --git a/annotation/tests/test_distribution.py b/annotation/tests/test_distribution.py index 1043f6d87..87d4eb6e5 100644 --- a/annotation/tests/test_distribution.py +++ b/annotation/tests/test_distribution.py @@ -944,9 +944,7 @@ def test_find_files_for_task(task_pages, expected_files): ), ], ) -def test_distribute_annotation_limit_50_pages( - files, annotators, expected_tasks -): +def test_distribute_annotation_limit_50_pages(files, annotators, expected_tasks): assert ( distribute_tasks( {}, @@ -1003,9 +1001,7 @@ def test_distribute_annotation_limit_50_pages( ], ) @pytest.mark.unittest -def test_distribution_with_extensive_coverage( - files, annotators, extensive_coverage -): +def test_distribution_with_extensive_coverage(files, annotators, extensive_coverage): tasks = distribute_tasks_extensively( files=files, users=annotators, @@ -1075,9 +1071,7 @@ def test_add_unassigned_file( unassigned_pages, expected_result, ): - add_unassigned_file( - files_to_distribute, file_id, pages_number, unassigned_pages - ) + add_unassigned_file(files_to_distribute, file_id, pages_number, unassigned_pages) assert files_to_distribute == expected_result diff --git a/annotation/tests/test_finish_task.py b/annotation/tests/test_finish_task.py index 9b13b6836..7f53ee320 100644 --- a/annotation/tests/test_finish_task.py +++ b/annotation/tests/test_finish_task.py @@ -336,18 +336,14 @@ ] -def check_files_finished_pages( - test_session: Session, job_id: int, tenant: str -): +def check_files_finished_pages(test_session: Session, job_id: int, tenant: str): finished_tasks = test_session.query(ManualAnnotationTask).filter( ManualAnnotationTask.job_id == job_id, ManualAnnotationTask.status == TaskStatusEnumSchema.finished, ) files = test_session.query(File).filter(File.job_id == job_id).all() validation_type = ( - test_session.query(Job.validation_type) - .filter_by(job_id=job_id) - .first() + test_session.query(Job.validation_type).filter_by(job_id=job_id).first() ) for task_file in files: @@ -550,9 +546,7 @@ def test_finish_not_all_tasks_db_contain( status=500, headers=TEST_HEADERS, ) - client.post( - FINISH_TASK_PATH.format(task_id=FINISH_TASK_ID), headers=TEST_HEADERS - ) + client.post(FINISH_TASK_PATH.format(task_id=FINISH_TASK_ID), headers=TEST_HEADERS) task = prepare_db_for_finish_task_status_two_tasks_same_job.query( ManualAnnotationTask ).get(FINISH_TASK_ID) @@ -831,9 +825,7 @@ def test_finish_task_pending_validation_unblocking( headers=TEST_HEADERS, ) session = prepare_db_for_finish_task_change_validation_status - annotation_finish_task = ManualAnnotationTask( - **ANNOTATION_TASKS_TO_FINISH[0] - ) + annotation_finish_task = ManualAnnotationTask(**ANNOTATION_TASKS_TO_FINISH[0]) session.add(annotation_finish_task) session.commit() client.post( @@ -879,9 +871,7 @@ def test_finish_tasks_failed_validation_statuses( status=200, headers=TEST_HEADERS, ) - validation_finish_task = ManualAnnotationTask( - **VALIDATION_TASKS_TO_FINISH[0] - ) + validation_finish_task = ManualAnnotationTask(**VALIDATION_TASKS_TO_FINISH[0]) session.add(validation_finish_task) session.commit() client.post( @@ -926,9 +916,7 @@ def test_finish_tasks_reannotation_statuses( status=200, headers=TEST_HEADERS, ) - validation_finish_task = ManualAnnotationTask( - **VALIDATION_TASKS_TO_FINISH[1] - ) + validation_finish_task = ManualAnnotationTask(**VALIDATION_TASKS_TO_FINISH[1]) session.add(validation_finish_task) session.commit() client.post( @@ -950,22 +938,17 @@ def test_finish_task_initial_annotator_deleted( ): session = prepare_db_for_finish_task_check_deleted_annotators session.query(ManualAnnotationTask).filter( - ManualAnnotationTask.id - == FINISH_TASK_CHECK_DELETE_USER_ANNOTATOR_2["id"] + ManualAnnotationTask.id == FINISH_TASK_CHECK_DELETE_USER_ANNOTATOR_2["id"] ).delete() session.commit() - session.query(User).filter( - User.user_id == FINISH_TASK_USER_2.user_id - ).delete() + session.query(User).filter(User.user_id == FINISH_TASK_USER_2.user_id).delete() session.commit() end_task_schema = { "annotation_user_for_failed_pages": "initial", } response = client.post( - FINISH_TASK_PATH.format( - task_id=FINISH_TASK_CHECK_DELETE_USER_VALIDATOR["id"] - ), + FINISH_TASK_PATH.format(task_id=FINISH_TASK_CHECK_DELETE_USER_VALIDATOR["id"]), headers=TEST_HEADERS, json=end_task_schema, ) diff --git a/annotation/tests/test_get_accumulated_revisions.py b/annotation/tests/test_get_accumulated_revisions.py index 840593605..fbf5c26ae 100644 --- a/annotation/tests/test_get_accumulated_revisions.py +++ b/annotation/tests/test_get_accumulated_revisions.py @@ -373,9 +373,7 @@ def test_get_annotation_for_latest_revision_status_codes( ], indirect=["db_errors"], ) -def test_get_annotation_for_latest_revision_db_exceptions( - monkeypatch, db_errors -): +def test_get_annotation_for_latest_revision_db_exceptions(monkeypatch, db_errors): response = client.get( construct_accumulated_revs_path( DOCS[0].job_id, diff --git a/annotation/tests/test_get_annotation_for_particular_revision.py b/annotation/tests/test_get_annotation_for_particular_revision.py index f05b2defe..d1989076e 100644 --- a/annotation/tests/test_get_annotation_for_particular_revision.py +++ b/annotation/tests/test_get_annotation_for_particular_revision.py @@ -132,9 +132,7 @@ def test_get_annotation_for_particular_revision_status_codes( Mock(return_value=minio_particular_revision), ) response = client.get( - construct_part_rev_path( - PART_REV_DOC.job_id, file_id, PART_REV_DOC.revision - ), + construct_part_rev_path(PART_REV_DOC.job_id, file_id, PART_REV_DOC.revision), headers={ HEADER_TENANT: tenant, AUTHORIZATION: f"{BEARER} {TEST_TOKEN}", @@ -153,9 +151,7 @@ def test_get_annotation_for_particular_revision_status_codes( ], indirect=["db_errors"], ) -def test_get_annotation_for_particular_revision_db_exceptions( - monkeypatch, db_errors -): +def test_get_annotation_for_particular_revision_db_exceptions(monkeypatch, db_errors): response = client.get( construct_part_rev_path( PART_REV_DOC.job_id, diff --git a/annotation/tests/test_get_child_categories.py b/annotation/tests/test_get_child_categories.py index f8da46ccc..029f068d8 100644 --- a/annotation/tests/test_get_child_categories.py +++ b/annotation/tests/test_get_child_categories.py @@ -39,9 +39,7 @@ Category(id="4", name="4", parent="2", tenant=TEST_TENANT, type=TEST_TYPE), ) -OTHER_TENANT_CHILD_CATEGORY = Category( - id="5", name="5", tenant="other", type=TEST_TYPE -) +OTHER_TENANT_CHILD_CATEGORY = Category(id="5", name="5", tenant="other", type=TEST_TYPE) NOT_EXIST_ID = "100" # Common categories have tree hierarchical structure of ids: @@ -120,9 +118,7 @@ def test_get_child_categories( ["category_id", "tenant"], [("1", "other"), ("5", TEST_TENANT), (NOT_EXIST_ID, TEST_TENANT)], ) -def test_get_wrong_categories( - prepare_db_child_categories, category_id, tenant -): +def test_get_wrong_categories(prepare_db_child_categories, category_id, tenant): response = client.get( f"{CATEGORIES_PATH}/{category_id}/child", headers={ diff --git a/annotation/tests/test_get_job.py b/annotation/tests/test_get_job.py index 785ab0c80..f984563cf 100644 --- a/annotation/tests/test_get_job.py +++ b/annotation/tests/test_get_job.py @@ -185,9 +185,7 @@ def test_get_jobs_by_file_id_sql_connection_error( (FILE_TEST_IDS[0], JOB_TEST_TENANTS[1]), ], ) -def test_get_jobs_by_file_id_404_error( - prepare_db_for_get_job, tenant, file_id -): +def test_get_jobs_by_file_id_404_error(prepare_db_for_get_job, tenant, file_id): response = client.get( f"{ANNOTATION_PATH}/{file_id}", headers={ @@ -233,9 +231,7 @@ def test_get_jobs_by_file_id_404_error( ), ], ) -def test_get_jobs_by_file( - prepare_db_for_get_job, file_id, tenant, expected_response -): +def test_get_jobs_by_file(prepare_db_for_get_job, file_id, tenant, expected_response): response = client.get( f"{ANNOTATION_PATH}/{file_id}", headers={ @@ -260,9 +256,7 @@ def test_get_jobs_name(monkeypatch, prepare_db_for_get_job): 2: "Job2name", 3: "JobNameFromJobsMicroservice", } - result = collect_job_names( - session, job_ids, JOB_TEST_TENANTS[0], TEST_TOKEN - ) + result = collect_job_names(session, job_ids, JOB_TEST_TENANTS[0], TEST_TOKEN) job_name_from_db = session.query(Job.name).filter(Job.job_id == 3).scalar() assert job_name_from_db == "JobNameFromJobsMicroservice" assert result == expected_result diff --git a/annotation/tests/test_get_job_files.py b/annotation/tests/test_get_job_files.py index ceb4788bc..b2de86494 100644 --- a/annotation/tests/test_get_job_files.py +++ b/annotation/tests/test_get_job_files.py @@ -200,9 +200,7 @@ def test_get_job_files_404_error( ), ], ) -def test_get_job_files( - prepare_db_for_get_job_files, job_id, tenant, expected_files -): +def test_get_job_files(prepare_db_for_get_job_files, job_id, tenant, expected_files): response = client.get( GET_JOB_FILES_PATH.format(job_id=job_id), headers={ @@ -237,8 +235,7 @@ def test_get_job_files( "current_page": 1, "page_size": 50, "files": [ - {"id": f.file_id, "status": f.status} - for f in GET_JOB_FILES[:3] + {"id": f.file_id, "status": f.status} for f in GET_JOB_FILES[:3] ], }, ), @@ -271,8 +268,7 @@ def test_get_job_files( "current_page": 1, "page_size": 2, "files": [ - {"id": f.file_id, "status": f.status} - for f in GET_JOB_FILES[:2] + {"id": f.file_id, "status": f.status} for f in GET_JOB_FILES[:2] ], }, ), diff --git a/annotation/tests/test_get_jobs_info_by_files.py b/annotation/tests/test_get_jobs_info_by_files.py index 791f1dd97..4e7b7f4bc 100644 --- a/annotation/tests/test_get_jobs_info_by_files.py +++ b/annotation/tests/test_get_jobs_info_by_files.py @@ -30,9 +30,7 @@ File(file_id=3, tenant=TEST_TENANT, job_id=2, pages_number=5), File(file_id=4, tenant=TEST_TENANT, job_id=2, pages_number=5), ] -FILES_THIRD_JOB = [ - File(file_id=5, tenant=TEST_TENANT, job_id=3, pages_number=5) -] +FILES_THIRD_JOB = [File(file_id=5, tenant=TEST_TENANT, job_id=3, pages_number=5)] JOBS = [ # files with ids [1, 2, 3, 6] belong to this job diff --git a/annotation/tests/test_get_pages_info.py b/annotation/tests/test_get_pages_info.py index 5aa92c869..6bff4e0d5 100644 --- a/annotation/tests/test_get_pages_info.py +++ b/annotation/tests/test_get_pages_info.py @@ -129,7 +129,7 @@ failed_validation_pages=[], tenant=TEST_TENANT, task_id=TASKS[0].id, - categories={'some'} + categories={"some"}, ), AnnotatedDoc( revision="2", @@ -197,7 +197,8 @@ def test_accumulate_pages_info(revisions, task_pages, expected_result): def test_accumulate_pages_info_can_extract_categories(): revisions = DOCS_FOR_ACCUMULATE_PAGES_INFO[1] _, _, _, _, categories, _ = accumulate_pages_info( - *(TASKS[0].pages,), revisions, + *(TASKS[0].pages,), + revisions, ) assert categories == revisions[0].categories diff --git a/annotation/tests/test_get_revisions.py b/annotation/tests/test_get_revisions.py index 8834f8b82..aac8663d1 100644 --- a/annotation/tests/test_get_revisions.py +++ b/annotation/tests/test_get_revisions.py @@ -363,9 +363,7 @@ def test_get_latest_revision_by_user_s3_connection_error( @pytest.mark.integration @patch.object(Session, "query") -def test_get_all_revisions_sql_connection_error( - Session, prepare_db_for_get_revisions -): +def test_get_all_revisions_sql_connection_error(Session, prepare_db_for_get_revisions): Session.side_effect = Mock(side_effect=SQLAlchemyError()) response = client.get( f"{ANNOTATION_PATH}/{JOBS_IDS[0]}/{FILES_IDS[0]}", diff --git a/annotation/tests/test_get_revisions_without_annotation.py b/annotation/tests/test_get_revisions_without_annotation.py index 206b0fdce..cc3a6b78c 100644 --- a/annotation/tests/test_get_revisions_without_annotation.py +++ b/annotation/tests/test_get_revisions_without_annotation.py @@ -204,9 +204,7 @@ def test_get_revisions_without_annotation_status_codes( ], indirect=["db_errors"], ) -def test_get_revisions_without_annotation_db_exceptions( - monkeypatch, db_errors -): +def test_get_revisions_without_annotation_db_exceptions(monkeypatch, db_errors): response = client.get( construct_rev_without_annotation_path(JOB_ID, FILE_ID_1), headers={"X-Current-Tenant": TEST_TENANT}, diff --git a/annotation/tests/test_job_categories.py b/annotation/tests/test_job_categories.py index d7522ccda..ee3a7795a 100644 --- a/annotation/tests/test_job_categories.py +++ b/annotation/tests/test_job_categories.py @@ -425,9 +425,7 @@ def test_get_job_categories_pagination( @mark.integration @mark.parametrize("page_size", (20, 1, 10)) -def test_get_job_wrong_pagination( - page_size, prepare_db_job_with_filter_categories -): +def test_get_job_wrong_pagination(page_size, prepare_db_job_with_filter_categories): pagination_params = {"page_size": page_size, "page_num": 1} response = client.get( f"{JOBS_PATH}/{MOCK_ID}/categories", @@ -528,9 +526,7 @@ def test_search_allowed_categories( ) category = response.json()["data"][0] assert response.status_code == 200 - assert prepare_expected_result(category) == prepare_category_response( - expected - ) + assert prepare_expected_result(category) == prepare_category_response(expected) @mark.integration @@ -567,9 +563,7 @@ def test_search_filter_name_like( prepare_db_categories_for_filtration, prepare_db_job_with_filter_categories, ): - data = prepare_filtration_body( - field="name", operator=operator, value=value - ) + data = prepare_filtration_body(field="name", operator=operator, value=value) response = client.post( f"{POST_JOBS_PATH}/{MOCK_ID}/categories/search", json=data, @@ -588,9 +582,7 @@ def test_search_filter_ordering( prepare_db_categories_for_filtration, prepare_db_job_with_filter_categories, ): - data = prepare_filtration_body( - operator="lt", value="5", direction=direction - ) + data = prepare_filtration_body(operator="lt", value="5", direction=direction) response = client.post( f"{POST_JOBS_PATH}/{MOCK_ID}/categories/search", json=data, @@ -606,9 +598,7 @@ def test_search_filter_distinct_id( prepare_db_categories_for_filtration, prepare_db_job_with_filter_categories, ): - data = prepare_filtration_body( - page_size=30, field="id", operator="distinct" - ) + data = prepare_filtration_body(page_size=30, field="id", operator="distinct") response = client.post( f"{POST_JOBS_PATH}/{MOCK_ID}/categories/search", json=data, @@ -674,8 +664,7 @@ def test_search_categories_400_error( headers=TEST_HEADERS, ) error_message = ( - "SELECT DISTINCT ON expressions must " - "match initial ORDER BY expressions" + "SELECT DISTINCT ON expressions must " "match initial ORDER BY expressions" ) assert response.status_code == 400 assert error_message in response.text diff --git a/annotation/tests/test_microservices_search.py b/annotation/tests/test_microservices_search.py index c476d6f0f..27d34be9a 100644 --- a/annotation/tests/test_microservices_search.py +++ b/annotation/tests/test_microservices_search.py @@ -235,9 +235,7 @@ @pytest.mark.parametrize( ["elem_amount", "expected_amount_of_pages"], [(50, 1), (100, 1), (101, 2)] ) -def test_calculate_amount_of_pagination_pages( - elem_amount, expected_amount_of_pages -): +def test_calculate_amount_of_pagination_pages(elem_amount, expected_amount_of_pages): actual_result = calculate_amount_of_pagination_pages(elem_amount) assert actual_result == expected_amount_of_pages @@ -298,9 +296,7 @@ def test_expand_response(): ], ) @responses.activate -def test_get_response( - ids, url, is_assets, microservice_response, expected_response -): +def test_get_response(ids, url, is_assets, microservice_response, expected_response): responses.add( responses.POST, url, diff --git a/annotation/tests/test_post.py b/annotation/tests/test_post.py index 9f2b2edf8..b9a50b7ba 100644 --- a/annotation/tests/test_post.py +++ b/annotation/tests/test_post.py @@ -392,9 +392,7 @@ def check_files_distributed_pages(test_session: Session, job_id: int): ) files = test_session.query(File).filter(File.job_id == job_id).all() validation_type = ( - test_session.query(Job.validation_type) - .filter_by(job_id=job_id) - .first() + test_session.query(Job.validation_type).filter_by(job_id=job_id).first() ) test_session.add_all(files) test_session.commit() @@ -409,8 +407,7 @@ def check_files_distributed_pages(test_session: Session, job_id: int): distributed_annotating_pages = sorted(distributed_annotating_pages) if validation_type[0] != ValidationSchema.validation_only: assert ( - task_file.distributed_annotating_pages - == distributed_annotating_pages + task_file.distributed_annotating_pages == distributed_annotating_pages ) validating_tasks = tasks.filter( @@ -421,10 +418,7 @@ def check_files_distributed_pages(test_session: Session, job_id: int): for validating_task in validating_tasks: distributed_validating_pages.update(validating_task.pages) distributed_validating_pages = sorted(distributed_validating_pages) - assert ( - task_file.distributed_validating_pages - == distributed_validating_pages - ) + assert task_file.distributed_validating_pages == distributed_validating_pages @pytest.mark.integration @@ -512,12 +506,8 @@ def test_post_tasks_only_datasets( @pytest.mark.integration def test_post_tasks_new_user(monkeypatch, prepare_db_for_post): - assert not prepare_db_for_post.query(User).get( - TASK_INFO_NEW_USER["user_ids"][0] - ) - assert not prepare_db_for_post.query(User).get( - TASK_INFO_NEW_USER["user_ids"][1] - ) + assert not prepare_db_for_post.query(User).get(TASK_INFO_NEW_USER["user_ids"][0]) + assert not prepare_db_for_post.query(User).get(TASK_INFO_NEW_USER["user_ids"][1]) monkeypatch.setattr( "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=FILES_FROM_ASSETS_FOR_TASK_INFO_NEW_USER), @@ -535,15 +525,9 @@ def test_post_tasks_new_user(monkeypatch, prepare_db_for_post): for user in TASK_INFO_NEW_USER["user_ids"]: assert user in response.text assert expected_message in response.text - assert not prepare_db_for_post.query(User).get( - TASK_INFO_NEW_USER["user_ids"][0] - ) - assert not prepare_db_for_post.query(User).get( - TASK_INFO_NEW_USER["user_ids"][1] - ) - check_files_distributed_pages( - prepare_db_for_post, TASK_INFO_NEW_USER["job_id"] - ) + assert not prepare_db_for_post.query(User).get(TASK_INFO_NEW_USER["user_ids"][0]) + assert not prepare_db_for_post.query(User).get(TASK_INFO_NEW_USER["user_ids"][1]) + check_files_distributed_pages(prepare_db_for_post, TASK_INFO_NEW_USER["job_id"]) @pytest.mark.integration @@ -584,9 +568,7 @@ def test_post_tasks_deadline( "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=assets_files), ) - response = client.post( - f"{POST_TASKS_PATH}", json=task_info, headers=TEST_HEADERS - ) + response = client.post(f"{POST_TASKS_PATH}", json=task_info, headers=TEST_HEADERS) assert response.status_code == 201 for task in response.json(): assert task["deadline"] == expected_deadline @@ -605,9 +587,7 @@ def test_post_tasks_validation_only(monkeypatch, prepare_db_for_post): "datasets": [], "job_id": JOBS_ID[3], } - response = client.post( - f"{POST_TASKS_PATH}", json=tasks_info, headers=TEST_HEADERS - ) + response = client.post(f"{POST_TASKS_PATH}", json=tasks_info, headers=TEST_HEADERS) assert response.status_code == 201 for task in response.json(): assert task["is_validation"] diff --git a/annotation/tests/test_post_annotation.py b/annotation/tests/test_post_annotation.py index a9714671d..e09220063 100644 --- a/annotation/tests/test_post_annotation.py +++ b/annotation/tests/test_post_annotation.py @@ -68,12 +68,8 @@ type=CategoryTypeSchema.box, ), ] -POST_ANNOTATION_ANNOTATOR = User( - user_id="6ffab2dd-3605-46d4-98a1-2d20011e132d" -) -POST_ANNOTATION_VALIDATOR = User( - user_id="6ffab2dd-3605-46d4-98a1-2d20011e132e" -) +POST_ANNOTATION_ANNOTATOR = User(user_id="6ffab2dd-3605-46d4-98a1-2d20011e132d") +POST_ANNOTATION_VALIDATOR = User(user_id="6ffab2dd-3605-46d4-98a1-2d20011e132e") FIRST_DATE = "2021-12-01T12:19:54.188831" @@ -340,9 +336,7 @@ DIFF_FIRST_PAGE = copy.deepcopy(PAGES[1]) DIFF_FIRST_PAGE["page_num"] = 1 -HASH_OF_DIFF_FIRST_PAGE = sha1( - json.dumps(DIFF_FIRST_PAGE).encode() -).hexdigest() +HASH_OF_DIFF_FIRST_PAGE = sha1(json.dumps(DIFF_FIRST_PAGE).encode()).hexdigest() DOC_FOR_FIRST_SAVE_BY_USER = { "user": POST_ANNOTATION_ANNOTATOR.user_id, @@ -519,9 +513,7 @@ "revision": sha1( json.dumps(DOC_FOR_FIRST_SAVE_BY_USER["pages"][0]).encode() + json.dumps(DOC_FOR_FIRST_SAVE_BY_USER["validated"]).encode() - + json.dumps( - DOC_FOR_FIRST_SAVE_BY_USER["failed_validation_pages"] - ).encode() + + json.dumps(DOC_FOR_FIRST_SAVE_BY_USER["failed_validation_pages"]).encode() ).hexdigest(), "user": POST_ANNOTATION_ANNOTATOR.user_id, "pipeline": None, @@ -542,9 +534,7 @@ ANNOTATED_DOC_PIPELINE_FIRST = { "revision": sha1( json.dumps(DOC_FOR_FIRST_SAVE_BY_PIPELINE["pages"][0]).encode() - + json.dumps( - DOC_FOR_FIRST_SAVE_BY_PIPELINE.get("validated", []) - ).encode() + + json.dumps(DOC_FOR_FIRST_SAVE_BY_PIPELINE.get("validated", [])).encode() + json.dumps( DOC_FOR_FIRST_SAVE_BY_PIPELINE.get("failed_validation_pages", []) ).encode() @@ -564,15 +554,9 @@ } ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE = copy.deepcopy(ANNOTATED_DOC_FIRST) -ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["file_id"] = POST_ANNOTATION_TASK_2[ - "file_id" -] -ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["job_id"] = POST_ANNOTATION_TASK_2[ - "job_id" -] -ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["task_id"] = POST_ANNOTATION_TASK_2[ - "id" -] +ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["file_id"] = POST_ANNOTATION_TASK_2["file_id"] +ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["job_id"] = POST_ANNOTATION_TASK_2["job_id"] +ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["task_id"] = POST_ANNOTATION_TASK_2["id"] PAGES_SHA = {} B_PAGES = b"" @@ -619,9 +603,7 @@ DOC_FOR_SECOND_SAVE_BY_USER["base_revision"].encode() + json.dumps(DOC_FOR_SECOND_SAVE_BY_USER["pages"][0]).encode() + json.dumps(DOC_FOR_SECOND_SAVE_BY_USER["validated"]).encode() - + json.dumps( - DOC_FOR_SECOND_SAVE_BY_USER["failed_validation_pages"] - ).encode() + + json.dumps(DOC_FOR_SECOND_SAVE_BY_USER["failed_validation_pages"]).encode() ).hexdigest(), "user": POST_ANNOTATION_ANNOTATOR.user_id, "pipeline": None, @@ -642,9 +624,7 @@ ANNOTATED_DOC_WITH_BOTH_TOKENS_AND_BBOX = { "revision": sha1( json.dumps(DOC_WITH_BBOX_AND_TOKENS_FIELDS["pages"][0]).encode() - + json.dumps( - DOC_WITH_BBOX_AND_TOKENS_FIELDS.get("validated", []) - ).encode() + + json.dumps(DOC_WITH_BBOX_AND_TOKENS_FIELDS.get("validated", [])).encode() + json.dumps( DOC_WITH_BBOX_AND_TOKENS_FIELDS.get("failed_validation_pages", []) ).encode() @@ -871,9 +851,7 @@ POST_ANNOTATION_PG_DOC.revision.encode() + json.dumps(DOC_FOR_CHECK_MERGE_CONFLICT["pages"][0]).encode() + json.dumps(DOC_FOR_CHECK_MERGE_CONFLICT["validated"]).encode() - + json.dumps( - DOC_FOR_CHECK_MERGE_CONFLICT["failed_validation_pages"] - ).encode() + + json.dumps(DOC_FOR_CHECK_MERGE_CONFLICT["failed_validation_pages"]).encode() ).hexdigest(), "user": POST_ANNOTATION_ANNOTATOR.user_id, "pipeline": None, @@ -1204,8 +1182,7 @@ def test_post_annotation_by_pipeline_status_codes( response = client.post( construct_path( ANNOTATION_PATH, - f"{POST_ANNOTATION_PG_DOC.job_id}/" - f"{POST_ANNOTATION_PG_DOC.file_id}", + f"{POST_ANNOTATION_PG_DOC.job_id}/" f"{POST_ANNOTATION_PG_DOC.file_id}", ), headers={ HEADER_TENANT: POST_ANNOTATION_PG_DOC.tenant, @@ -1456,9 +1433,7 @@ def test_upload_json_to_minio(mock_minio_empty_bucket): def test_upload_pages_to_minio(mock_minio_empty_bucket): s3_resource = mock_minio_empty_bucket - upload_pages_to_minio( - PAGES_SCHEMA, PAGES_SHA, S3_PATH, TEST_TENANT, s3_resource - ) + upload_pages_to_minio(PAGES_SCHEMA, PAGES_SHA, S3_PATH, TEST_TENANT, s3_resource) for page_obj in s3_resource.Bucket(TEST_TENANT).objects.filter( Delimiter="/", Prefix=S3_PATH + "/" @@ -1639,9 +1614,7 @@ def test_check_docs_identity(latest_doc, new_doc, expected_result): # validated: empty # failed_validation_pages: empty ( - ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories")[ - 0 - ], + ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories")[0], "path/to/file", "bucket-of-phys-file", { @@ -1688,9 +1661,7 @@ def test_create_manifest_json_first_upload( prepare_db_for_manifest_creation_with_one_record, s3_resource, ) - man_obj = s3_resource.Object( - POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}" - ) + man_obj = s3_resource.Object(POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}") actual_manifest = json.loads(man_obj.get()["Body"].read().decode("utf-8")) del actual_manifest["date"] assert actual_manifest == expected_manifest @@ -1824,12 +1795,8 @@ def test_create_manifest_json_first_upload( # validated : from latest revision # failed_validation_pages: empty ( - ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get( - "same_pages_not_validated" - ), - ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get( - "same_pages_not_validated" - )[1], + ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("same_pages_not_validated"), + ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("same_pages_not_validated")[1], "path/to/another/file", "another-bucket", { @@ -1852,9 +1819,7 @@ def test_create_manifest_json_first_upload( # failed_validation_pages: empty ( ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories"), - ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories")[ - 1 - ], + ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories")[1], "path/to/file", "bucket-of-phys-file", { @@ -1903,9 +1868,7 @@ def test_create_manifest_json_with_annotated_docs_and_manifest_in_minio( prepare_db_for_manifest_creation_with_several_records, s3_resource, ) - man_obj = s3_resource.Object( - POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}" - ) + man_obj = s3_resource.Object(POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}") actual_manifest = json.loads(man_obj.get()["Body"].read().decode("utf-8")) delete_date_fields([actual_manifest]) assert actual_manifest == expected_manifest @@ -1934,9 +1897,7 @@ def test_create_manifest_json_date_field( ) ) prepare_db_for_manifest_creation_with_one_record.commit() - man_obj = s3_resource.Object( - POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}" - ) + man_obj = s3_resource.Object(POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}") actual_manifest = json.loads(man_obj.get()["Body"].read().decode("utf-8")) assert annotated_doc["date"] @@ -2059,9 +2020,7 @@ def test_construct_annotated_doc( ) amount_of_docs_after_commit = db.query(AnnotatedDoc).count() - delete_date_fields( - [actual_doc, doc_in_db_after_commit, formatted_actual_doc] - ) + delete_date_fields([actual_doc, doc_in_db_after_commit, formatted_actual_doc]) assert doc_in_session_after_commit == [] assert doc_in_db_after_commit == expected_result @@ -2083,9 +2042,7 @@ def test_construct_annotated_doc_different_jobs_and_files( s3_resource = mock_minio_empty_bucket expected_result_1 = { - k: v - for k, v in ANNOTATED_DOC_FIRST.items() - if k not in ("similar_revisions",) + k: v for k, v in ANNOTATED_DOC_FIRST.items() if k not in ("similar_revisions",) } expected_result_2 = { k: v @@ -2246,8 +2203,7 @@ def test_post_annotation_by_pipeline( actual_result = client.post( construct_path( ANNOTATION_PATH, - f"{POST_ANNOTATION_PG_DOC.job_id}/" - f"{POST_ANNOTATION_PG_DOC.file_id}", + f"{POST_ANNOTATION_PG_DOC.job_id}/" f"{POST_ANNOTATION_PG_DOC.file_id}", ), headers={ HEADER_TENANT: POST_ANNOTATION_PG_DOC.tenant, diff --git a/annotation/tests/test_post_job.py b/annotation/tests/test_post_job.py index 812824d2c..f436f79c5 100644 --- a/annotation/tests/test_post_job.py +++ b/annotation/tests/test_post_job.py @@ -430,10 +430,7 @@ def test_post_job_connection_exception(Session, prepare_db_for_post_job): ( POST_JOB_NEW_JOBS[11], 422, - ( - "Fields files and datasets should not be empty " - "at the same time." - ), + ("Fields files and datasets should not be empty " "at the same time."), ), # even in ExtractionJob must be either files or datasets ( POST_JOB_NEW_JOBS[12], @@ -725,9 +722,7 @@ def test_get_job_attributes_for_post( [(POST_JOB_NEW_JOBS[10], "ExtractionJob1"), (POST_JOB_NEW_JOBS[13], None)], ) @responses.activate -def test_post_extraction_job_saved( - prepare_db_for_post_job, job_info, expected_name -): +def test_post_extraction_job_saved(prepare_db_for_post_job, job_info, expected_name): """Tests that new ExtractionJob with valid user type fields will be created in db in default 'pending' status and that values for not-provided optional fields 'validation_type', 'deadline', 'name' are @@ -771,9 +766,7 @@ def test_post_import_job_saved(prepare_db_for_post_job): ) assert response.status_code == expected_response_code saved_job = row_to_dict(session.query(Job).get(job_info["job_id"])) - assert ( - not session.query(File).filter(File.job_id == job_info["job_id"]).all() - ) + assert not session.query(File).filter(File.job_id == job_info["job_id"]).all() assert not saved_job.get("categories") assert not saved_job.get("deadline") assert saved_job.get("validation_type") == ValidationSchema.cross diff --git a/annotation/tests/test_post_unassgined_files.py b/annotation/tests/test_post_unassgined_files.py index 01c15e343..84d797e01 100644 --- a/annotation/tests/test_post_unassgined_files.py +++ b/annotation/tests/test_post_unassgined_files.py @@ -328,9 +328,7 @@ def test_post_tasks_for_unassigned_files( db_post_unassigned_files.query(ManualAnnotationTask) .filter( ManualAnnotationTask.job_id == job_id, - not_( - ManualAnnotationTask.status == TaskStatusEnumSchema.in_progress - ), + not_(ManualAnnotationTask.status == TaskStatusEnumSchema.in_progress), ) .all() ) @@ -343,9 +341,7 @@ def test_post_tasks_for_unassigned_files( del task["id"] files_in_db = ( - db_post_unassigned_files.query(File) - .filter(File.job_id == job_id) - .all() + db_post_unassigned_files.query(File).filter(File.job_id == job_id).all() ) files_in_db = [row_to_dict(f) for f in files_in_db] diff --git a/annotation/tests/test_start_job.py b/annotation/tests/test_start_job.py index 3da8691bb..08b87561a 100644 --- a/annotation/tests/test_start_job.py +++ b/annotation/tests/test_start_job.py @@ -180,9 +180,7 @@ (CHANGE_STATUSES_JOBS[0].job_id, JobStatusEnumSchema.failed), ], ) -def test_update_inner_job_status( - job_id, status, prepare_db_for_update_job_status -): +def test_update_inner_job_status(job_id, status, prepare_db_for_update_job_status): update_inner_job_status(prepare_db_for_update_job_status, job_id, status) prepare_db_for_update_job_status.commit() db_job = prepare_db_for_update_job_status.query(Job).get(job_id) @@ -197,9 +195,7 @@ def test_post_start_job_500_response( job_id=CHANGE_STATUSES_TASKS[0].job_id, ): Session.side_effect = Mock(side_effect=SQLAlchemyError()) - response = client.post( - START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS - ) + response = client.post(START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS) assert response.status_code == 500 assert "Error: connection error" in response.text @@ -246,9 +242,7 @@ def test_post_start_job_bad_job_response( status=job_response_status, headers=TEST_HEADERS, ) - response = client.post( - START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS - ) + response = client.post(START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS) assert response.status_code == 500 assert expected_response in response.text saved_tasks = ( @@ -295,9 +289,7 @@ def test_post_start_job_tasks_statuses( status=200, headers=TEST_HEADERS, ) - response = client.post( - START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS - ) + response = client.post(START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS) prepare_db_for_change_statuses.commit() assert response.status_code == 200 assert response.json() == expected_response diff --git a/annotation/tests/test_tasks_crud_cr.py b/annotation/tests/test_tasks_crud_cr.py index 9e2353c68..b86227ee0 100644 --- a/annotation/tests/test_tasks_crud_cr.py +++ b/annotation/tests/test_tasks_crud_cr.py @@ -577,13 +577,9 @@ "view": True, "mapRoles": True, "impersonate": True, - "manage": True - }, - "attributes": { - "tenants": [ - "test" - ] + "manage": True, }, + "attributes": {"tenants": ["test"]}, "clientConsents": None, "clientRoles": None, "createdTimestamp": 1638362379072, @@ -604,7 +600,7 @@ "requiredActions": [], "self": None, "serviceAccountClientId": None, - "username": "admin" + "username": "admin", } @@ -658,18 +654,14 @@ def prepare_stats_export_body( @patch.object(Session, "query") def test_post_task_500_response(Session, prepare_db_for_cr_task): Session.side_effect = Mock(side_effect=SQLAlchemyError()) - response = client.post( - CRUD_TASKS_PATH, json=NEW_TASKS[0], headers=TEST_HEADERS - ) + response = client.post(CRUD_TASKS_PATH, json=NEW_TASKS[0], headers=TEST_HEADERS) assert response.status_code == 500 assert "Error: " in response.text @pytest.mark.integration def test_post_task_wrong_job(prepare_db_for_cr_task): - response = client.post( - CRUD_TASKS_PATH, json=TASK_WRONG_JOB, headers=TEST_HEADERS - ) + response = client.post(CRUD_TASKS_PATH, json=TASK_WRONG_JOB, headers=TEST_HEADERS) assert response.status_code == 400 assert "Error: wrong job_id" in response.text @@ -705,12 +697,8 @@ def test_post_task_wrong_job(prepare_db_for_cr_task): ), # ExtractionJob ], ) -def test_post_task_wrong_users_errors( - prepare_db_for_cr_task, task_info, error_message -): - response = client.post( - CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS - ) +def test_post_task_wrong_users_errors(prepare_db_for_cr_task, task_info, error_message): + response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) assert response.status_code == 400 assert error_message in response.text @@ -724,9 +712,7 @@ def test_post_task_wrong_users_errors( ], ) def test_post_task_422_pages_response(prepare_db_for_cr_task, task_info): - response = client.post( - CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS - ) + response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) assert response.status_code == 422 @@ -757,18 +743,10 @@ def test_post_task_422_pages_response(prepare_db_for_cr_task, task_info): ], ) def test_post_task(prepare_db_for_cr_task, task_info, expected_response): - response = client.post( - CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS - ) + response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) assert response.status_code == 201 - assert [ - value - for key, value in response.json().items() - if key == "id" and value - ] - response = { - key: value for key, value in response.json().items() if key != "id" - } + assert [value for key, value in response.json().items() if key == "id" and value] + response = {key: value for key, value in response.json().items() if key != "id"} assert response == expected_response check_files_distributed_pages(prepare_db_for_cr_task, task_info["job_id"]) @@ -883,20 +861,15 @@ def test_update_task_already_updated_change_event( assert response.status_code == 201 assert validate_datetime(content, is_updated=True) - assert ( - prepare_task_stats_expected_response( - task_id=task_id, - event_type="closed", - ) - == prepare_task_stats_expected_response(**content) - ) + assert prepare_task_stats_expected_response( + task_id=task_id, + event_type="closed", + ) == prepare_task_stats_expected_response(**content) @pytest.mark.integration def test_create_export_data_not_found(prepare_db_update_stats): - body = prepare_stats_export_body( - user_ids=[f"{uuid4()}" for _ in range(10)] - ) + body = prepare_stats_export_body(user_ids=[f"{uuid4()}" for _ in range(10)]) response = client.post( f"{CRUD_TASKS_PATH}/export", @@ -921,9 +894,7 @@ def test_create_export_data_not_found(prepare_db_update_stats): def test_create_export_invalid_datetime_format( prepare_db_for_cr_task, date_from, date_to ): - body = prepare_stats_export_body( - user_ids=[f"{uuid4()}" for _ in range(10)] - ) + body = prepare_stats_export_body(user_ids=[f"{uuid4()}" for _ in range(10)]) body["date_from"] = date_from body["date_to"] = date_to @@ -939,9 +910,7 @@ def test_create_export_invalid_datetime_format( @pytest.mark.integration def test_create_export_return_csv(prepare_db_update_stats_already_updated): - body = prepare_stats_export_body( - user_ids=[str(ann.user_id) for ann in ANNOTATORS] - ) + body = prepare_stats_export_body(user_ids=[str(ann.user_id) for ann in ANNOTATORS]) response = client.post( f"{CRUD_TASKS_PATH}/export", @@ -951,10 +920,7 @@ def test_create_export_return_csv(prepare_db_update_stats_already_updated): assert response.status_code == 200 assert "text/csv" in response.headers["content-type"] - assert ( - "filename=annotator_stats_export" - in response.headers["content-disposition"] - ) + assert "filename=annotator_stats_export" in response.headers["content-disposition"] assert len(response.content) > 0 @@ -1090,9 +1056,7 @@ def test_get_tasks( status=200, headers=TEST_HEADERS, ) - response = client.get( - CRUD_TASKS_PATH, params=url_params, headers=TEST_HEADERS - ) + response = client.get(CRUD_TASKS_PATH, params=url_params, headers=TEST_HEADERS) assert response.status_code == 200 response = [ {key: value for key, value in x.items() if key != "id"} @@ -1194,9 +1158,7 @@ def test_get_tasks_pagination( status=200, headers=TEST_HEADERS, ) - response = client.get( - CRUD_TASKS_PATH, params=url_params, headers=TEST_HEADERS - ) + response = client.get(CRUD_TASKS_PATH, params=url_params, headers=TEST_HEADERS) assert response.status_code == 200 response = { key: value @@ -1215,12 +1177,8 @@ def test_get_tasks_pagination( (NEW_TASKS[3], CRUD_CR_JOBS[3].deadline), ], ) -def test_post_task_deadline( - prepare_db_for_cr_task, task_info, expected_deadline -): - response = client.post( - CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS - ) +def test_post_task_deadline(prepare_db_for_cr_task, task_info, expected_deadline): + response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) assert response.status_code == 201 assert response.json()["deadline"] == expected_deadline check_files_distributed_pages(prepare_db_for_cr_task, task_info["job_id"]) @@ -1312,13 +1270,10 @@ def test_search_tasks_500_error(prepare_db_for_cr_task): @pytest.mark.integration def test_search_tasks_400_error(prepare_db_for_cr_task): - data = prepare_filtration_body( - ordering_field="status", operator="distinct" - ) + data = prepare_filtration_body(ordering_field="status", operator="distinct") response = client.post(SEARCH_TASKS_PATH, json=data, headers=TEST_HEADERS) error_message = ( - "SELECT DISTINCT ON expressions must " - "match initial ORDER BY expressions" + "SELECT DISTINCT ON expressions must " "match initial ORDER BY expressions" ) assert response.status_code == 400 assert error_message in response.text @@ -1505,9 +1460,7 @@ def tests_search_tasks_ordering( ], ) @responses.activate -def test_search_tasks_wrong_parameters( - wrong_parameter, value, prepare_db_for_cr_task -): +def test_search_tasks_wrong_parameters(wrong_parameter, value, prepare_db_for_cr_task): responses.add( responses.POST, ASSETS_FILES_URL, @@ -1544,9 +1497,7 @@ def test_search_tasks_wrong_parameters( def test_post_task_validation_only( prepare_db_for_cr_task, task_info, expected_status_code, expected_response ): - response = client.post( - CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS - ) + response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) assert response.status_code == expected_status_code response = ( {key: value for key, value in response.json().items() if key != "id"} @@ -1560,9 +1511,7 @@ def test_post_task_validation_only( @pytest.mark.integration @pytest.mark.parametrize("task_info", (NEW_TASKS[9], NEW_TASKS[10])) def test_post_task_wrong_file_error(prepare_db_for_cr_task, task_info): - response = client.post( - CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS - ) + response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) error_message = ( f"{task_info['file_id']} is not assigned for job {task_info['job_id']}" ) @@ -1572,11 +1521,7 @@ def test_post_task_wrong_file_error(prepare_db_for_cr_task, task_info): @pytest.mark.integration def test_post_task_wrong_file_pages(prepare_db_for_cr_task): - response = client.post( - CRUD_TASKS_PATH, json=NEW_TASKS[11], headers=TEST_HEADERS - ) - error_message = "({101, 102}) do not belong to file %s" % ( - NEW_TASKS[11]["file_id"] - ) + response = client.post(CRUD_TASKS_PATH, json=NEW_TASKS[11], headers=TEST_HEADERS) + error_message = "({101, 102}) do not belong to file %s" % (NEW_TASKS[11]["file_id"]) assert response.status_code == 400 assert error_message in response.text diff --git a/annotation/tests/test_tasks_crud_ud.py b/annotation/tests/test_tasks_crud_ud.py index e94c40465..201e9a864 100644 --- a/annotation/tests/test_tasks_crud_ud.py +++ b/annotation/tests/test_tasks_crud_ud.py @@ -549,14 +549,12 @@ def test_delete_task( ( CRUD_UD_CONSTRAINTS_TASKS[6], {"is_validation": False}, - f"{CRUD_UD_CONSTRAINTS_TASKS[6].user_id} is not assigned " - f"as annotator", + f"{CRUD_UD_CONSTRAINTS_TASKS[6].user_id} is not assigned " f"as annotator", ), # same job validator but not annotator ( CRUD_UD_CONSTRAINTS_TASKS[6], {"user_id": CRUD_UD_CONSTRAINTS_USERS[0].user_id}, - f"{CRUD_UD_CONSTRAINTS_USERS[0].user_id} is not assigned " - f"as validator", + f"{CRUD_UD_CONSTRAINTS_USERS[0].user_id} is not assigned " f"as validator", ), # same job annotator but not validator ( CRUD_UD_CONSTRAINTS_TASKS[0], @@ -564,8 +562,7 @@ def test_delete_task( "job_id": CRUD_UD_CONSTRAINTS_JOBS[2].job_id, "is_validation": True, }, - f"{CRUD_UD_CONSTRAINTS_USERS[0].user_id} is not assigned " - f"as validator", + f"{CRUD_UD_CONSTRAINTS_USERS[0].user_id} is not assigned " f"as validator", ), # same user not from new job ], ) @@ -736,9 +733,7 @@ def test_update_task_empty_request(prepare_db_for_ud_task_constrains, task): @pytest.mark.integration @pytest.mark.parametrize("task", CRUD_UD_CONSTRAINTS_TASKS[0:2]) -def test_update_task_deadline_with_none_value( - prepare_db_for_ud_task_constrains, task -): +def test_update_task_deadline_with_none_value(prepare_db_for_ud_task_constrains, task): """Checks if task deadline can be updated with None value""" response = client.patch( construct_path(CRUD_TASKS_PATH, task.id), diff --git a/annotation/tests/test_update_job.py b/annotation/tests/test_update_job.py index 2761f3284..6dd1961b9 100644 --- a/annotation/tests/test_update_job.py +++ b/annotation/tests/test_update_job.py @@ -352,9 +352,7 @@ def test_update_categories(category_ids, prepare_db_for_update_job, job_id): .order_by(asc("category_id")) .all() ) - expected_categories = [ - (category_id, job_id) for category_id in category_ids - ] + expected_categories = [(category_id, job_id) for category_id in category_ids] assert jobs_categories == expected_categories all_categories_after = session.query(Category).all() assert all_categories_before == all_categories_after @@ -393,9 +391,7 @@ def test_update_wrong_categories(category_ids, prepare_db_for_update_job): ("files", UPDATE_JOB_IDS[7], [UPDATE_JOB_FILES_FROM_ASSETS[2]]), ], ) -def test_update_files( - prepare_db_for_update_job, monkeypatch, field, job_id, new_files -): +def test_update_files(prepare_db_for_update_job, monkeypatch, field, job_id, new_files): """Checks that files for job successfully update with 204 response both from 'files' and 'dataset' fields and that old job's files delete from 'files' table. Also checks that files with same id as deleted/added for @@ -415,10 +411,7 @@ def test_update_files( ) assert response.status_code == 204 job_files_db = ( - session.query(File) - .filter_by(job_id=job_id) - .order_by(asc(File.file_id)) - .all() + session.query(File).filter_by(job_id=job_id).order_by(asc(File.file_id)).all() ) job_files = [ {"file_id": job_file.file_id, "pages_number": job_file.pages_number} @@ -463,9 +456,7 @@ def test_update_job_new_user( assert existing_users_count == 4 association_table = ASSOCIATION_TABLES[user_type] old_association = ( - session.query(association_table) - .filter_by(job_id=UPDATE_JOB_IDS[1]) - .first() + session.query(association_table).filter_by(job_id=UPDATE_JOB_IDS[1]).first() ) assert str(old_association.user_id) == old_user_id response = client.patch( @@ -544,8 +535,7 @@ def test_update_job_new_user( [USER_IDS[1]], UPDATE_JOB_IDS[7], 400, - "There should be no annotators or validators provided " - "for ImportJob", + "There should be no annotators or validators provided " "for ImportJob", ), ], ) @@ -584,9 +574,7 @@ def test_update_files_and_datasets_for_already_started_job( """Tests that update of job which in progress status with files or datasets is restricted""" expected_code = 422 - error_message = ( - "files and datasets can't be updated for already started job" - ) + error_message = "files and datasets can't be updated for already started job" monkeypatch.setattr( "annotation.jobs.services.get_job_names", Mock(return_value={UPDATE_JOB_IDS[5]: "JobName"}), @@ -634,8 +622,7 @@ def test_update_extraction_job_new_user( session = prepare_db_for_update_job job_id = UPDATE_JOB_IDS[6] existing_users_count = sum( - session.query(table).filter_by(job_id=job_id).count() - for table in tables + session.query(table).filter_by(job_id=job_id).count() for table in tables ) assert existing_users_count == 1 monkeypatch.setattr( @@ -649,8 +636,7 @@ def test_update_extraction_job_new_user( ) assert response.status_code == expected_code new_users_count = sum( - session.query(table).filter_by(job_id=job_id).count() - for table in tables + session.query(table).filter_by(job_id=job_id).count() for table in tables ) assert new_users_count == expected_users_count @@ -672,9 +658,7 @@ def test_delete_redundant_users(prepare_db_for_update_job): ) prepare_db_for_update_job.commit() redundant_user = ( - prepare_db_for_update_job.query(User) - .filter(User.user_id == USER_IDS[3]) - .all() + prepare_db_for_update_job.query(User).filter(User.user_id == USER_IDS[3]).all() ) assert not redundant_user assert response.status_code == 204 @@ -693,9 +677,7 @@ def test_not_delete_redundant_user_as_owner_of_another_job( ) prepare_db_for_update_job.commit() redundant_user_owner = ( - prepare_db_for_update_job.query(User) - .filter(User.user_id == USER_IDS[2]) - .all() + prepare_db_for_update_job.query(User).filter(User.user_id == USER_IDS[2]).all() ) assert redundant_user_owner assert response.status_code == 204 diff --git a/annotation/tests/test_validation.py b/annotation/tests/test_validation.py index 0340b0df5..615790455 100644 --- a/annotation/tests/test_validation.py +++ b/annotation/tests/test_validation.py @@ -1417,9 +1417,7 @@ def test_create_validation_tasks_exceptions(db_validation_end, user): TASKS[8].id, { "annotation_user_for_failed_pages": AnnotationAndValidationActionsSchema.auto.value, # noqa E501 - "validation_user_for_reannotated_pages": str( - ANNOTATORS[3].user_id - ), + "validation_user_for_reannotated_pages": str(ANNOTATORS[3].user_id), }, 400, "does not belong", @@ -1477,11 +1475,7 @@ def test_create_validation_tasks_exceptions(db_validation_end, user): ), ( TASKS[4].id, - { - "validation_user_for_reannotated_pages": str( - ANNOTATORS[1].user_id - ) - }, + {"validation_user_for_reannotated_pages": str(ANNOTATORS[1].user_id)}, 400, "Missing `annotation_user", ), @@ -1665,9 +1659,7 @@ def test_check_delete_user_from_annotated_doc(db_validation_end): ManualAnnotationTask.id.in_([100, 102, 103, 107, 111]) ).delete(synchronize_session=False) db_validation_end.commit() - db_validation_end.query(User).filter( - User.user_id == ANNOTATORS[0].user_id - ).delete() + db_validation_end.query(User).filter(User.user_id == ANNOTATORS[0].user_id).delete() db_validation_end.commit() deleted_user = db_validation_end.query( diff --git a/assets/alembic/env.py b/assets/alembic/env.py index c8a6cf694..0bf7a9c97 100644 --- a/assets/alembic/env.py +++ b/assets/alembic/env.py @@ -1,10 +1,11 @@ -from logging.config import fileConfig import os +from logging.config import fileConfig + +from assets.config import settings +from assets.db.utils import get_test_db_url from sqlalchemy import engine_from_config, pool from alembic import context -from assets.db.utils import get_test_db_url -from assets.config import settings # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -72,9 +73,7 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure( - connection=connection, target_metadata=target_metadata - ) + context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() diff --git a/assets/alembic/versions/afa33cc83d57_new_fields.py b/assets/alembic/versions/afa33cc83d57_new_fields.py index 68b08323d..f15531afb 100644 --- a/assets/alembic/versions/afa33cc83d57_new_fields.py +++ b/assets/alembic/versions/afa33cc83d57_new_fields.py @@ -6,9 +6,9 @@ """ import sqlalchemy as sa +from assets.db.models import TSVector from alembic import op -from assets.db.models import TSVector # revision identifiers, used by Alembic. revision = "afa33cc83d57" @@ -55,9 +55,7 @@ def upgrade() -> None: sa.Column( "ts_vector", TSVector(), - sa.Computed( - "to_tsvector('english', original_name)", persisted=True - ), + sa.Computed("to_tsvector('english', original_name)", persisted=True), nullable=True, ), sa.PrimaryKeyConstraint("id"), @@ -70,9 +68,7 @@ def upgrade() -> None: sa.Column("dataset_id", sa.Integer(), nullable=False), sa.Column("file_id", sa.Integer(), nullable=False), sa.Column("created", sa.DateTime(), nullable=False), - sa.ForeignKeyConstraint( - ["dataset_id"], ["datasets.id"], ondelete="CASCADE" - ), + sa.ForeignKeyConstraint(["dataset_id"], ["datasets.id"], ondelete="CASCADE"), sa.ForeignKeyConstraint(["file_id"], ["files.id"], ondelete="CASCADE"), sa.PrimaryKeyConstraint("dataset_id", "file_id"), ) diff --git a/assets/assets/db/models.py b/assets/assets/db/models.py index 30edb04ad..d5b1ab06f 100644 --- a/assets/assets/db/models.py +++ b/assets/assets/db/models.py @@ -2,6 +2,7 @@ from typing import Any, Dict, Optional import sqlalchemy as sa +from assets.config import settings from filter_lib import create_filter_model from sqlalchemy.dialects.postgresql import TSVECTOR from sqlalchemy.engine.default import DefaultExecutionContext @@ -9,8 +10,6 @@ from sqlalchemy.orm import relationship, sessionmaker from sqlalchemy.types import TypeDecorator -from assets.config import settings - Base = declarative_base() engine = sa.create_engine( settings.database_url, @@ -50,9 +49,7 @@ class Association(Base): # type: ignore sa.ForeignKey("files.id", ondelete="CASCADE"), primary_key=True, ) - created = sa.Column( - sa.DateTime, nullable=False, default=datetime.datetime.utcnow() - ) + created = sa.Column(sa.DateTime, nullable=False, default=datetime.datetime.utcnow()) @property def as_dict(self) -> Dict[str, Any]: @@ -75,9 +72,7 @@ class Datasets(Base): # type: ignore ) name = sa.Column(sa.String(150), nullable=False, unique=True) count = sa.Column(sa.Integer, default=0) - created = sa.Column( - sa.DateTime, nullable=False, default=datetime.datetime.utcnow() - ) + created = sa.Column(sa.DateTime, nullable=False, default=datetime.datetime.utcnow()) ts_vector = sa.Column( TSVector(), sa.Computed( @@ -86,9 +81,7 @@ class Datasets(Base): # type: ignore ), ) - __table_args__ = ( - sa.Index("ix_ds_name", ts_vector, postgresql_using="gin"), - ) + __table_args__ = (sa.Index("ix_ds_name", ts_vector, postgresql_using="gin"),) @property def as_dict(self) -> Dict[str, Any]: @@ -129,9 +122,7 @@ class FileObject(Base): # type: ignore persisted=True, ), ) - datasets = relationship( - "Datasets", secondary="association", backref="files" - ) + datasets = relationship("Datasets", secondary="association", backref="files") __table_args__ = (sa.Index("ix_name", ts_vector, postgresql_using="gin"),) diff --git a/assets/assets/db/service.py b/assets/assets/db/service.py index a6620896d..0c89aa7ee 100644 --- a/assets/assets/db/service.py +++ b/assets/assets/db/service.py @@ -1,12 +1,11 @@ from typing import Any, Dict, Optional, Tuple -from filter_lib import PaginationParams, form_query, map_request_to_filter -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.orm import Query, Session, load_only, selectinload - from assets.db.models import Association, Datasets, FileObject, SessionLocal from assets.logger import get_logger from assets.schemas import FileProcessingStatusForUpdate +from filter_lib import PaginationParams, form_query, map_request_to_filter +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Query, Session, load_only, selectinload logger = get_logger(__name__) @@ -64,9 +63,7 @@ def update_file( file_status: str, ) -> Optional[FileObject]: file: Optional[FileObject] = ( - session.query(FileObject) - .filter(FileObject.id == file_id) - .with_for_update() + session.query(FileObject).filter(FileObject.id == file_id).with_for_update() ).first() file.original_name = file_to_update file.bucket = (bucket_name,) @@ -92,11 +89,7 @@ def insert_dataset(session: Session, dataset_name: str) -> None: def delete_file_from_db(session: Session, row_id: int) -> Any: - q = ( - session.query(FileObject) - .filter(FileObject.id == row_id) - .with_for_update() - ) + q = session.query(FileObject).filter(FileObject.id == row_id).with_for_update() decrease_count_in_bounded_datasets(session, row_id) res = q.delete() session.commit() @@ -113,9 +106,7 @@ def update_file_status( file_id: int, file_status: FileProcessingStatusForUpdate, session: Session ) -> Optional[FileObject]: file: Optional[FileObject] = ( - session.query(FileObject) - .filter(FileObject.id == file_id) - .with_for_update() + session.query(FileObject).filter(FileObject.id == file_id).with_for_update() ).first() file.status = file_status try: @@ -139,9 +130,7 @@ def get_all_files_query( session: Session, request: Dict[str, Any] ) -> Tuple[Query, PaginationParams]: filter_args = map_request_to_filter(request, "FileObject") - query = session.query(FileObject).options( - selectinload(FileObject.datasets) - ) + query = session.query(FileObject).options(selectinload(FileObject.datasets)) query, pag = form_query(filter_args, query) return query, pag @@ -181,9 +170,7 @@ def get_all_bonds_query( return query, pag -def is_bounded( - session: Session, file_id: int, ds_name: str -) -> Optional[FileObject]: +def is_bounded(session: Session, file_id: int, ds_name: str) -> Optional[FileObject]: bond = ( session.query(FileObject) .join(Association, Datasets) @@ -194,12 +181,8 @@ def is_bounded( return bond -def add_dataset_to_file( - session: Session, file: FileObject, ds: Datasets -) -> None: - ds_query = ( - session.query(Datasets).filter(Datasets.id == ds.id).with_for_update() - ) +def add_dataset_to_file(session: Session, file: FileObject, ds: Datasets) -> None: + ds_query = session.query(Datasets).filter(Datasets.id == ds.id).with_for_update() file_obj = ( session.query(FileObject) .filter(FileObject.id == file.id) @@ -212,12 +195,8 @@ def add_dataset_to_file( session.commit() -def remove_dataset_from_file( - session: Session, file: FileObject, ds: Datasets -) -> None: - ds_query = ( - session.query(Datasets).filter(Datasets.id == ds.id).with_for_update() - ) +def remove_dataset_from_file(session: Session, file: FileObject, ds: Datasets) -> None: + ds_query = session.query(Datasets).filter(Datasets.id == ds.id).with_for_update() file_obj = ( session.query(FileObject) .filter(FileObject.id == file.id) @@ -237,9 +216,7 @@ def decrease_count_in_bounded_datasets(session: Session, file_id: int) -> None: .filter(FileObject.id == file_id) ) ds_ids = [row.id for row in query] - session.query(Datasets).filter( - Datasets.id.in_(ds_ids) - ).with_for_update().update( + session.query(Datasets).filter(Datasets.id.in_(ds_ids)).with_for_update().update( {Datasets.count: Datasets.count - 1}, synchronize_session="fetch" ) session.commit() diff --git a/assets/assets/db/utils.py b/assets/assets/db/utils.py index 2251e4f75..ea1853ede 100644 --- a/assets/assets/db/utils.py +++ b/assets/assets/db/utils.py @@ -7,6 +7,6 @@ def get_test_db_url(main_db_url: str) -> str: postgresql+psycopg2://admin:admin@host:5432/test_db """ main_db_url_split = main_db_url.split("/") - main_db_url_split[-1] = 'test_db' + main_db_url_split[-1] = "test_db" result = "/".join(main_db_url_split) return result diff --git a/assets/assets/routers/bonds_router.py b/assets/assets/routers/bonds_router.py index 38bfb6c0c..a45075356 100644 --- a/assets/assets/routers/bonds_router.py +++ b/assets/assets/routers/bonds_router.py @@ -4,7 +4,6 @@ import fastapi import filter_lib import sqlalchemy.orm - from assets import db, schemas, utils router = fastapi.APIRouter(prefix="/datasets/bonds", tags=["bonds"]) @@ -20,9 +19,7 @@ async def search_bonds( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> filter_lib.Page[Dict[str, Any]]: """ Takes every bounded pair dataset-file and returns them. @@ -52,9 +49,7 @@ async def bound_files_to_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> List[schemas.ActionResponse]: """ Bounds file objects to a given dataset. If dataset does not exist HTTPException @@ -130,9 +125,7 @@ async def unbound_files_from_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> List[schemas.ActionResponse]: """ Unbound file objects with a given dataset. If dataset does not exist HTTPException diff --git a/assets/assets/routers/datasets_router.py b/assets/assets/routers/datasets_router.py index f6ffa3e89..bd4113100 100644 --- a/assets/assets/routers/datasets_router.py +++ b/assets/assets/routers/datasets_router.py @@ -6,7 +6,6 @@ import sqlalchemy.exc import sqlalchemy.orm import sqlalchemy_filters.exceptions - from assets import db, schemas router = fastapi.APIRouter(prefix="/datasets", tags=["datasets"]) @@ -22,9 +21,7 @@ async def search_datasets( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> filter_lib.Page[schemas.DatasetResponse]: """ Allows getting datasets data with filters, sorts and pagination. @@ -55,9 +52,7 @@ async def create_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> Dict[str, Any]: """ Creates a new dataset object in database. If dataset with given name is already exists @@ -100,9 +95,7 @@ async def delete_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> Dict[str, Any]: """ Deletes a dataset with a given name from a database. If that dataset does not exist @@ -138,9 +131,7 @@ async def get_files_by_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> filter_lib.Page[schemas.FileResponse]: """ Takes a dataset name and returns all files metadata with this dataset. @@ -186,9 +177,7 @@ def get_all_files_by_dataset_id( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> Optional[List[schemas.FileResponse]]: if not db.service.get_all_files_by_ds_id(session, dataset_id): raise fastapi.HTTPException( diff --git a/assets/assets/routers/files_router.py b/assets/assets/routers/files_router.py index 67ab315cf..f357976e9 100644 --- a/assets/assets/routers/files_router.py +++ b/assets/assets/routers/files_router.py @@ -6,7 +6,6 @@ import minio import sqlalchemy.orm import sqlalchemy_filters.exceptions - from assets import db, exceptions, schemas, utils router = fastapi.APIRouter(prefix="/files", tags=["files"]) @@ -22,9 +21,7 @@ async def search_files( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> filter_lib.Page[schemas.FileResponse]: """ Allows getting files metadata with filters, sorts and pagination. @@ -94,10 +91,7 @@ async def upload_files( bucket_name, files, session, storage_ ) - return [ - schemas.ActionResponse.parse_obj(response) - for response in upload_results - ] + return [schemas.ActionResponse.parse_obj(response) for response in upload_results] @router.delete( @@ -112,9 +106,7 @@ async def delete_files( db.service.session_scope_for_dependency ), storage: minio.Minio = fastapi.Depends(utils.minio_utils.get_storage), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> List[schemas.ActionResponse]: """ Deletes objects from minio storage and then their metadata from database. @@ -193,9 +185,7 @@ async def update_file( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> schemas.FileResponse: file_obj = db.service.get_file_by_id(session, request.file) if not file_obj: diff --git a/assets/assets/routers/minio_router.py b/assets/assets/routers/minio_router.py index 9822a0fe5..d4132b2a3 100644 --- a/assets/assets/routers/minio_router.py +++ b/assets/assets/routers/minio_router.py @@ -4,22 +4,17 @@ import minio import sqlalchemy.orm import urllib3.exceptions - from assets import db, schemas, utils from assets.config import settings router = fastapi.APIRouter(tags=["minio"]) -@router.get( - "/download", name="gets file from minio with original content-type" -) +@router.get("/download", name="gets file from minio with original content-type") async def get_from_minio( file_id: int, background_tasks: fastapi.BackgroundTasks, - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), original: bool = False, session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency @@ -49,18 +44,14 @@ async def get_from_minio( utils.minio_utils.check_bucket(f.bucket, storage) response = utils.minio_utils.stream_minio(f.path, f.bucket, storage) if original: - response = utils.minio_utils.stream_minio( - f.origin_path, f.bucket, storage - ) + response = utils.minio_utils.stream_minio(f.origin_path, f.bucket, storage) background_tasks.add_task(utils.minio_utils.close_conn, response) return fastapi.responses.StreamingResponse( response.stream(), media_type=response.headers["Content-Type"] ) -@router.get( - "/download/thumbnail", name="get thumbnail of original file in jpg format" -) +@router.get("/download/thumbnail", name="get thumbnail of original file in jpg format") async def get_preview_from_minio( file_id: int, background_tasks: fastapi.BackgroundTasks, @@ -68,9 +59,7 @@ async def get_preview_from_minio( db.service.session_scope_for_dependency ), storage: minio.Minio = fastapi.Depends(utils.minio_utils.get_storage), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> fastapi.responses.StreamingResponse: f = db.service.get_file_by_id(session, file_id) if not f: @@ -96,9 +85,7 @@ async def get_image_piece( ..., example=(100, 100, 200, 200) ), page_number: int = fastapi.Query(..., ge=1, example=1), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), storage: minio.Minio = fastapi.Depends(utils.minio_utils.get_storage), session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency @@ -117,9 +104,7 @@ async def get_image_piece( ) piece_path = f"files/bbox/{f.id}/page{page_number}_bbox{bbox}_ext{settings.bbox_ext}.jpg" # noqa if not utils.minio_utils.check_file_exist(piece_path, f.bucket, storage): - utils.minio_utils.make_pdf_piece( - f, page_number, bbox, piece_path, storage - ) + utils.minio_utils.make_pdf_piece(f, page_number, bbox, piece_path, storage) response = utils.minio_utils.stream_minio(piece_path, f.bucket, storage) background_tasks.add_task(utils.minio_utils.close_conn, response) @@ -136,9 +121,7 @@ async def get_image_piece( async def create_bucket( bucket: schemas.Bucket, storage: minio.Minio = fastapi.Depends(utils.minio_utils.get_storage), - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), ) -> Dict[str, str]: """ Creates bucket into Minio. If bucket exists HTTPException will be diff --git a/assets/assets/routers/s3_router.py b/assets/assets/routers/s3_router.py index b145cff21..ed346fde9 100644 --- a/assets/assets/routers/s3_router.py +++ b/assets/assets/routers/s3_router.py @@ -4,7 +4,6 @@ import minio import sqlalchemy.orm import urllib3.exceptions - from assets import db, exceptions, schemas, utils router = fastapi.APIRouter(prefix="/s3_upload", tags=["s_3"]) @@ -19,9 +18,7 @@ async def download_s3_files( s3_data: schemas.S3Data, storage_url: Optional[str] = None, - x_current_tenant: Optional[str] = fastapi.Header( - None, alias="X-Current-Tenant" - ), + x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), @@ -71,6 +68,5 @@ async def download_s3_files( ) return [ - schemas.ActionResponse.parse_obj(response) - for response in upload_results + schemas.ActionResponse.parse_obj(response) for response in upload_results ] # noqa diff --git a/assets/assets/utils/common_utils.py b/assets/assets/utils/common_utils.py index 64cda6323..8a1192925 100644 --- a/assets/assets/utils/common_utils.py +++ b/assets/assets/utils/common_utils.py @@ -9,7 +9,6 @@ import requests import sqlalchemy.orm import starlette.datastructures - from assets import db, exceptions, logger, schemas from assets.config import settings from assets.utils import minio_utils @@ -194,9 +193,7 @@ def convert_to_pdf(self) -> bytes: # In case of some error, the content of Gotenberg response is plain text. # noqa self.conversion_status = "conversion error" logger_.error( - logger_.error( - "%s with %s", self.conversion_status, self.file_name - ) + logger_.error("%s with %s", self.conversion_status, self.file_name) ) raise exceptions.FileConversionError self.converted_ext = ".pdf" @@ -224,12 +221,8 @@ def convert_to_jpg(self) -> bytes: return byte_im def convert_txt(self): - input_text_path = ( - f"files/{self.new_file.id}/" f"{self.new_file.id}.txt" - ) - output_pdf_path = ( - f"files/{self.new_file.id}/" f"{self.new_file.id}.pdf" - ) + input_text_path = f"files/{self.new_file.id}/" f"{self.new_file.id}.txt" + output_pdf_path = f"files/{self.new_file.id}/" f"{self.new_file.id}.pdf" output_tokens_path = f"files/{self.new_file.id}/ocr/1.json" post_to_convert( self.bucket_storage, diff --git a/assets/assets/utils/convert_service_utils.py b/assets/assets/utils/convert_service_utils.py index 69b0af269..f55180a82 100644 --- a/assets/assets/utils/convert_service_utils.py +++ b/assets/assets/utils/convert_service_utils.py @@ -1,14 +1,11 @@ import requests - from assets import logger from assets.config import settings logger_ = logger.get_logger(__name__) -def post_to_convert( - bucket: str, input_text, output_pdf, output_tokens -) -> bool: +def post_to_convert(bucket: str, input_text, output_pdf, output_tokens) -> bool: """ Puts file into convert service """ @@ -22,9 +19,7 @@ def post_to_convert( }, ) if response.status_code != 201: - logger_.info( - f"File {input_text} failed to convert: " f"{response.text}" - ) + logger_.info(f"File {input_text} failed to convert: " f"{response.text}") return False except requests.exceptions.ConnectionError as e: logger_.error(f"Connection error - detail: {e}") diff --git a/assets/assets/utils/minio_utils.py b/assets/assets/utils/minio_utils.py index a75a54291..4fd7b2ecf 100644 --- a/assets/assets/utils/minio_utils.py +++ b/assets/assets/utils/minio_utils.py @@ -6,10 +6,9 @@ import pdf2image.exceptions import PIL.Image import urllib3.exceptions -from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider - from assets import db, logger from assets.config import settings +from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider logger_ = logger.get_logger(__name__) @@ -40,20 +39,14 @@ def create_minio_config(): elif settings.s3_credentials_provider == "aws_config": # environmental variable AWS_PROFILE_NAME should be set minio_config.update( - { - "credentials": AWSConfigProvider( - profile=settings.aws_profile_name - ) - } + {"credentials": AWSConfigProvider(profile=settings.aws_profile_name)} ) else: raise NotConfiguredException( "s3 connection is not properly configured - " "s3_credentials_provider is not set" ) - logger_.debug( - f"S3_Credentials provider - {settings.s3_credentials_provider}" - ) + logger_.debug(f"S3_Credentials provider - {settings.s3_credentials_provider}") return minio_config @@ -75,37 +68,25 @@ def upload_in_minio( """ pdf_bytes = make_thumbnail_pdf(file) if pdf_bytes and isinstance(pdf_bytes, bytes): - upload_thumbnail( - file_obj.bucket, pdf_bytes, client, file_obj.thumb_path - ) + upload_thumbnail(file_obj.bucket, pdf_bytes, client, file_obj.thumb_path) image_bytes = make_thumbnail_images(file) if image_bytes and isinstance(image_bytes, bytes): - upload_thumbnail( - file_obj.bucket, image_bytes, client, file_obj.thumb_path - ) - return put_file_to_minio( - client, file, file_obj, file_obj.content_type, "converted" - ) + upload_thumbnail(file_obj.bucket, image_bytes, client, file_obj.thumb_path) + return put_file_to_minio(client, file, file_obj, file_obj.content_type, "converted") -def remake_thumbnail( - file_obj: db.models.FileObject, storage: minio.Minio -) -> bool: +def remake_thumbnail(file_obj: db.models.FileObject, storage: minio.Minio) -> bool: obj: urllib3.response.HTTPResponse = storage.get_object( file_obj.bucket, file_obj.path ) pdf_bytes = make_thumbnail_pdf(obj.data) if pdf_bytes and isinstance(pdf_bytes, bytes): - upload_thumbnail( - file_obj.bucket, pdf_bytes, storage, file_obj.thumb_path - ) + upload_thumbnail(file_obj.bucket, pdf_bytes, storage, file_obj.thumb_path) image_bytes = make_thumbnail_images(obj.data) if image_bytes and isinstance(image_bytes, bytes): - upload_thumbnail( - file_obj.bucket, image_bytes, storage, file_obj.thumb_path - ) + upload_thumbnail(file_obj.bucket, image_bytes, storage, file_obj.thumb_path) obj.close() if not pdf_bytes and not image_bytes: logger_.error("File is not an image") @@ -346,9 +327,7 @@ def get_size_ratio(width: int, height: int) -> float: try: r = width / height if r <= 0: - logger_.error( - "Current size raio <= 0! w = %s , h = %s", width, height - ) + logger_.error("Current size raio <= 0! w = %s , h = %s", width, height) r = 1.0 return r except ZeroDivisionError: diff --git a/assets/assets/utils/s3_utils.py b/assets/assets/utils/s3_utils.py index df5a514ef..9982dfdae 100644 --- a/assets/assets/utils/s3_utils.py +++ b/assets/assets/utils/s3_utils.py @@ -3,7 +3,6 @@ import boto3 import urllib3.exceptions - from assets import exceptions, logger from assets.config import settings @@ -37,9 +36,7 @@ def __init__( region_name=region_name, ) - def get_files( - self, bucket_s3: str, files_keys: List[str] - ) -> Dict[str, BytesIO]: + def get_files(self, bucket_s3: str, files_keys: List[str]) -> Dict[str, BytesIO]: """ Downloads files from S3 storage """ @@ -55,9 +52,7 @@ def _check_bucket_exist(self, bucket_s3: str) -> Any: """ Checks if required bucket exists in S3 """ - all_s3_buckets = [ - bucket.name for bucket in self.resource.buckets.all() - ] + all_s3_buckets = [bucket.name for bucket in self.resource.buckets.all()] if bucket_s3 not in all_s3_buckets: raise exceptions.BucketError(f"bucket {bucket_s3} does not exist!") @@ -67,15 +62,11 @@ def _check_files_exist(self, bucket_s3: str, files_keys: List[str]) -> Any: """ all_files_in_bucket = [ content["Key"] - for content in self.client.list_objects(Bucket=bucket_s3)[ - "Contents" - ] + for content in self.client.list_objects(Bucket=bucket_s3)["Contents"] ] for file_key in files_keys: if file_key not in all_files_in_bucket: - raise exceptions.FileKeyError( - f"file key {file_key} does not exist!" - ) + raise exceptions.FileKeyError(f"file key {file_key} does not exist!") def check_s3(self, bucket_s3: str, files_keys: List[str]) -> Any: """ diff --git a/assets/tests/conftest.py b/assets/tests/conftest.py index 8e82fd48d..726955cfb 100644 --- a/assets/tests/conftest.py +++ b/assets/tests/conftest.py @@ -53,9 +53,7 @@ def setup_database(use_temp_env_var): except SQLAlchemyError as e: raise SQLAlchemyError(f"Got an Exception during migrations - {e}") - session_local = sessionmaker( - autocommit=False, autoflush=False, bind=engine - ) + session_local = sessionmaker(autocommit=False, autoflush=False, bind=engine) session = session_local() yield session session.close() @@ -89,21 +87,11 @@ def setup_tenant(): @pytest.fixture -def client_app_main( - setup_database, minio_mock_exists_bucket_true, setup_tenant -): - minio_mock_exists_bucket_true.get_object.return_value = ( - urllib3.HTTPResponse() - ) - minio_mock_exists_bucket_true.get_object.return_value.headers[ - "Content-Type" - ] = "" - app.dependency_overrides[ - get_storage - ] = lambda: minio_mock_exists_bucket_true - app.dependency_overrides[ - session_scope_for_dependency - ] = lambda: setup_database +def client_app_main(setup_database, minio_mock_exists_bucket_true, setup_tenant): + minio_mock_exists_bucket_true.get_object.return_value = urllib3.HTTPResponse() + minio_mock_exists_bucket_true.get_object.return_value.headers["Content-Type"] = "" + app.dependency_overrides[get_storage] = lambda: minio_mock_exists_bucket_true + app.dependency_overrides[session_scope_for_dependency] = lambda: setup_database app.dependency_overrides[tenant] = lambda: setup_tenant with patch.object(minio_utils, "delete_one_from_minio", return_value=True): @@ -115,12 +103,8 @@ def client_app_main( def client_app_main_bucket_false( setup_database, minio_mock_exists_bucket_false, setup_tenant ): - app.dependency_overrides[ - get_storage - ] = lambda: minio_mock_exists_bucket_false - app.dependency_overrides[ - session_scope_for_dependency - ] = lambda: setup_database + app.dependency_overrides[get_storage] = lambda: minio_mock_exists_bucket_false + app.dependency_overrides[session_scope_for_dependency] = lambda: setup_database app.dependency_overrides[tenant] = lambda: setup_tenant client = TestClient(app) yield client diff --git a/assets/tests/test_helpers.py b/assets/tests/test_helpers.py index c736c60c2..641cf55bc 100644 --- a/assets/tests/test_helpers.py +++ b/assets/tests/test_helpers.py @@ -60,9 +60,7 @@ def test_delete_one_from_minio(minio_mock_exists_bucket_true): with patch("tests.test_helpers.delete_one_from_minio") as mock_: mock_.side_effect = [True, False] random_name = uuid.uuid4().hex - minio_mock_exists_bucket_true.fput_object( - random_name, "testfile", Mock() - ) + minio_mock_exists_bucket_true.fput_object(random_name, "testfile", Mock()) x = delete_one_from_minio( random_name, "testfile", minio_mock_exists_bucket_true ) @@ -93,11 +91,7 @@ def test_put_to_db(setup_database): def test_update_file_status(file_): session = file_ - f = ( - session.query(FileObject) - .filter(FileObject.original_name == "testname") - .first() - ) + f = session.query(FileObject).filter(FileObject.original_name == "testname").first() assert f fi = update_file_status(f.id, FileProcessingStatus.UPLOADED, file_) assert fi.status == "uploaded" diff --git a/assets/tests/test_main.py b/assets/tests/test_main.py index 5e69c7dc9..955cd9bb0 100644 --- a/assets/tests/test_main.py +++ b/assets/tests/test_main.py @@ -12,9 +12,7 @@ def test_create_bucket(client_app_main_bucket_false): random_name = "tests" + uuid.uuid4().hex bucket = {"name": random_name} - tests_bucket = client_app_main_bucket_false.post( - "/bucket", data=json.dumps(bucket) - ) + tests_bucket = client_app_main_bucket_false.post("/bucket", data=json.dumps(bucket)) assert tests_bucket.status_code == 201 @@ -29,9 +27,7 @@ def test_bucket_name_on_create_bucket_with_prefix( random_name = "tests" + uuid.uuid4().hex bucket = {"name": random_name} - response = client_app_main_bucket_false.post( - "/bucket", data=json.dumps(bucket) - ) + response = client_app_main_bucket_false.post("/bucket", data=json.dumps(bucket)) assert response.status_code == 201 assert ( response.json()["detail"] @@ -50,14 +46,9 @@ def test_bucket_name_on_create_bucket_without_prefix( random_name = "tests" + uuid.uuid4().hex bucket = {"name": random_name} - response = client_app_main_bucket_false.post( - "/bucket", data=json.dumps(bucket) - ) + response = client_app_main_bucket_false.post("/bucket", data=json.dumps(bucket)) assert response.status_code == 201 - assert ( - response.json()["detail"] - == f"Bucket {random_name} successfully created!" - ) + assert response.json()["detail"] == f"Bucket {random_name} successfully created!" def test_upload_and_delete_file_without_conversion(client_app_main): @@ -145,12 +136,8 @@ def test_get_file_by_id(client_app_main): ) file_id = response.json()[0]["id"] - search_body = { - "filters": [{"field": "id", "operator": "eq", "value": file_id}] - } - res_get_one = client_app_main.post( - "/files/search", data=json.dumps(search_body) - ) + search_body = {"filters": [{"field": "id", "operator": "eq", "value": file_id}]} + res_get_one = client_app_main.post("/files/search", data=json.dumps(search_body)) assert res_get_one.status_code == 200 assert res_get_one.json()["data"][0]["id"] == file_id @@ -187,9 +174,7 @@ def test_put_and_delete_dataset(client_app_main): body = {"name": random_name} res = client_app_main.post("/datasets", data=json.dumps(body)) assert res.status_code == 201 - assert res.json() == { - "detail": f"Dataset {random_name} successfully created!" - } + assert res.json() == {"detail": f"Dataset {random_name} successfully created!"} res_delete = client_app_main.delete("/datasets", data=json.dumps(body)) assert res_delete.status_code == 201 @@ -222,21 +207,15 @@ def test_bound_and_unbound(client_app_main): count_body = { "filters": [{"field": "name", "operator": "eq", "value": dataset_name}] } - res_count = client_app_main.post( - "/datasets/search", data=json.dumps(count_body) - ) + res_count = client_app_main.post("/datasets/search", data=json.dumps(count_body)) assert res_count.json()["data"][0]["count"] == 1 - res_unbound = client_app_main.delete( - "/datasets/bonds", data=json.dumps(data) - ) + res_unbound = client_app_main.delete("/datasets/bonds", data=json.dumps(data)) assert res_unbound.status_code == 201 assert file_id == res_unbound.json()[0]["id"] assert res_unbound.json()[0]["status"] - res_delete_dataset = client_app_main.delete( - "/datasets", data=json.dumps(body) - ) + res_delete_dataset = client_app_main.delete("/datasets", data=json.dumps(body)) assert res_delete_dataset.status_code == 201 assert res_delete_dataset.json() == { "detail": f"Dataset {dataset_name} successfully deleted!" @@ -267,9 +246,7 @@ def test_get_files_by_dataset(client_app_main): assert res_put.status_code == 201 bound_data = {"name": dataset_name, "objects": [file_id]} - res_bound = client_app_main.post( - "/datasets/bonds", data=json.dumps(bound_data) - ) + res_bound = client_app_main.post("/datasets/bonds", data=json.dumps(bound_data)) assert res_bound.status_code == 201 assert file_id == res_bound.json()[0]["id"] assert res_bound.json()[0]["status"] @@ -280,18 +257,14 @@ def test_get_files_by_dataset(client_app_main): assert res_get_by_dataset.status_code == 200 assert res_get_by_dataset.json()["data"][0]["id"] == file_id - res_delete_dataset = client_app_main.delete( - "/datasets", data=json.dumps(body) - ) + res_delete_dataset = client_app_main.delete("/datasets", data=json.dumps(body)) assert res_delete_dataset.status_code == 201 assert res_delete_dataset.json() == { "detail": f"Dataset {dataset_name} successfully deleted!" } file_body = {"objects": [file_id]} - res_delete_file = client_app_main.delete( - "/files", data=json.dumps(file_body) - ) + res_delete_file = client_app_main.delete("/files", data=json.dumps(file_body)) assert res_delete_file.status_code == 201 assert file_id == res_delete_file.json()[0]["id"] assert res_delete_file.json()[0]["status"] @@ -338,16 +311,12 @@ def test_get_dataset_by_name(client_app_main): body = {"name": random_name} res = client_app_main.post("/datasets", data=json.dumps(body)) assert res.status_code == 201 - assert res.json() == { - "detail": f"Dataset {random_name} successfully created!" - } + assert res.json() == {"detail": f"Dataset {random_name} successfully created!"} search_body = { "filters": [{"field": "name", "operator": "eq", "value": random_name}] } - res_id = client_app_main.post( - "/datasets/search", data=json.dumps(search_body) - ) + res_id = client_app_main.post("/datasets/search", data=json.dumps(search_body)) assert res_id.status_code == 200 assert res_id.json()["data"][0]["id"] == 1 assert res_id.json()["data"][0]["name"] == random_name @@ -390,13 +359,9 @@ def test_get_files_by_filename_positive(client_app_main): file_name = res_upload_1.json()[0]["file_name"] search_body = { - "filters": [ - {"field": "original_name", "operator": "eq", "value": file_name} - ] + "filters": [{"field": "original_name", "operator": "eq", "value": file_name}] } - get_by_name = client_app_main.post( - "/files/search", data=json.dumps(search_body) - ) + get_by_name = client_app_main.post("/files/search", data=json.dumps(search_body)) assert get_by_name.status_code == 200 all_names = [el["original_name"] for el in get_by_name.json()["data"]] @@ -418,12 +383,8 @@ def test_get_files_by_filename_empty_array(client_app_main): assert res_upload.status_code == 201 assert res_upload.json()[0]["status"] - search_body = { - "filters": [{"field": "id", "operator": "eq", "value": id_ + 10111}] - } - get_by_name = client_app_main.post( - "/files/search", data=json.dumps(search_body) - ) + search_body = {"filters": [{"field": "id", "operator": "eq", "value": id_ + 10111}]} + get_by_name = client_app_main.post("/files/search", data=json.dumps(search_body)) assert get_by_name.status_code == 200 assert get_by_name.json()["data"] == [] @@ -456,9 +417,7 @@ def test_download_positive(client_app_main): @patch("assets.utils.common_utils.requests.post") -def test_download_positive_originals( - gotenberg, pdf_file_bytes, client_app_main -): +def test_download_positive_originals(gotenberg, pdf_file_bytes, client_app_main): response = Response() response._content = pdf_file_bytes gotenberg.return_value = response @@ -474,9 +433,7 @@ def test_download_positive_originals( assert res_upload.status_code == 201 assert res_upload.json()[0]["status"] - res_download = client_app_main.get( - f"/download?file_id={id_}&original=true" - ) + res_download = client_app_main.get(f"/download?file_id={id_}&original=true") assert res_download.status_code == 200 @@ -504,9 +461,7 @@ def test_count_changing(client_app_main): count_body = { "filters": [{"field": "name", "operator": "eq", "value": dataset_name}] } - res_count = client_app_main.post( - "/datasets/search", data=json.dumps(count_body) - ) + res_count = client_app_main.post("/datasets/search", data=json.dumps(count_body)) assert res_count.json()["data"][0]["count"] == 1 id_ = res_upload.json()[0]["id"] diff --git a/assets/tests/test_utils.py b/assets/tests/test_utils.py index 19cf374c0..d33478a82 100644 --- a/assets/tests/test_utils.py +++ b/assets/tests/test_utils.py @@ -186,9 +186,7 @@ def test_file_processor_is_file_updated_status_not_updated(update_file_status): @patch("assets.utils.common_utils.FileProcessor.is_file_updated") @patch("assets.utils.common_utils.FileProcessor.is_blank_is_created") -@patch( - "assets.utils.common_utils.FileProcessor.is_original_file_uploaded_to_storage" -) +@patch("assets.utils.common_utils.FileProcessor.is_original_file_uploaded_to_storage") @patch("assets.utils.common_utils.FileProcessor.is_uploaded_to_storage") @patch("assets.utils.common_utils.FileProcessor.is_inserted_to_database") @patch("assets.utils.common_utils.FileProcessor.is_converted_file") @@ -430,9 +428,7 @@ def test_s3_manager_get_files(): @patch("assets.utils.s3_utils.S3Manager._check_bucket_exist") @patch("assets.utils.s3_utils.S3Manager._check_files_exist") -def test_s3_manager_check_s3_buckets_and_files_exist( - check_buckets, check_files -): +def test_s3_manager_check_s3_buckets_and_files_exist(check_buckets, check_files): s3 = S3Manager("a", "b", endpoint_url=None) check_buckets.return_value = None check_files.return_value = None @@ -478,9 +474,7 @@ def test_check_uploading_limit_not_exceed(): @patch("assets.utils.common_utils.get_mimetype") @patch("assets.utils.common_utils.requests.post") -def test_file_processor_conversion_error( - gotenberg, get_mimetype, pdf_file_bytes -): +def test_file_processor_conversion_error(gotenberg, get_mimetype, pdf_file_bytes): response = Response() response._content = pdf_file_bytes gotenberg.return_value = response @@ -530,9 +524,7 @@ def test_file_converted_converted_to_pdf_side_effect( def test_file_converted_converted_to_jpg(png_bytes): new_db_file = FileObject() - converter = FileConverter( - png_bytes, "some_file.png", ".png", "test", new_db_file - ) + converter = FileConverter(png_bytes, "some_file.png", ".png", "test", new_db_file) assert converter.convert() is True @@ -629,8 +621,6 @@ def test_get_pixel_bbox_size( current_pixel_size, original_pts_size, bbox, expected_result ): assert ( - minio_utils.get_pixel_bbox_size( - current_pixel_size, original_pts_size, bbox - ) + minio_utils.get_pixel_bbox_size(current_pixel_size, original_pts_size, bbox) == expected_result ) diff --git a/common/minio_service/minio_service/minio_api.py b/common/minio_service/minio_service/minio_api.py index 5f04cb417..fb75b3997 100644 --- a/common/minio_service/minio_service/minio_api.py +++ b/common/minio_service/minio_service/minio_api.py @@ -7,9 +7,7 @@ from . import logger # type: ignore -LOGGER = logger.get_logger( - __name__, "MINIO_COMMUNICATION_PATH", "minio_communication" -) +LOGGER = logger.get_logger(__name__, "MINIO_COMMUNICATION_PATH", "minio_communication") class BucketExistsError(Exception): @@ -72,7 +70,9 @@ class MinioCommunicator: client = None - def __init__(self, minio_server: str, minio_root_user: str, minio_root_password: str) -> None: + def __init__( + self, minio_server: str, minio_root_user: str, minio_root_password: str + ) -> None: if not MinioCommunicator.client: self.create_client(minio_server, minio_root_user, minio_root_password) @@ -130,9 +130,7 @@ def download_file(self, bucket: str, path: str, local_path: Path) -> None: ) self.client.fget_object(bucket, path, str(local_path)) - def download_directory( - self, bucket: str, path: str, local_dir: Path - ) -> None: + def download_directory(self, bucket: str, path: str, local_dir: Path) -> None: """ Download directory from minio to indicated local directory. Args: @@ -166,16 +164,10 @@ def upload_file(self, bucket: str, path: str, local_path: Path) -> None: if not local_path.exists() or not local_path.suffix: LOGGER.error("file %s doesn't exist", local_path) raise FileExistsError(f"file {local_path} doesn't exist") - LOGGER.info( - "Uploading from file %s to %s/%s", local_path, bucket, path - ) - self.client.fput_object( - bucket, os.path.join(path, local_path), local_path - ) + LOGGER.info("Uploading from file %s to %s/%s", local_path, bucket, path) + self.client.fput_object(bucket, os.path.join(path, local_path), local_path) - def upload_directory( - self, bucket: str, path: str, local_dir: Path - ) -> None: + def upload_directory(self, bucket: str, path: str, local_dir: Path) -> None: """ Upload directory to minio. Args: diff --git a/common/minio_service/setup.py b/common/minio_service/setup.py index b68846027..b7da4b995 100644 --- a/common/minio_service/setup.py +++ b/common/minio_service/setup.py @@ -1,36 +1,38 @@ # -*- coding: utf-8 -*- from setuptools import setup -packages = \ -['minio_service'] +packages = ["minio_service"] -package_data = \ -{'': ['*']} +package_data = {"": ["*"]} -install_requires = \ -['minio>=7.1.1,<8.0.0', - 'mypy-extensions>=0.4.3,<0.5.0', - 'pydantic>=1.8.2,<2.0.0'] +install_requires = [ + "minio>=7.1.1,<8.0.0", + "mypy-extensions>=0.4.3,<0.5.0", + "pydantic>=1.8.2,<2.0.0", +] -entry_points = \ -{'console_scripts': ['add-logging = commands:add_logger', - 'get-setup = commands:get_setup']} +entry_points = { + "console_scripts": [ + "add-logging = commands:add_logger", + "get-setup = commands:get_setup", + ] +} setup_kwargs = { - 'name': 'minio-service', - 'version': '0.1.0', - 'description': '', - 'long_description': None, - 'author': None, - 'author_email': None, - 'maintainer': None, - 'maintainer_email': None, - 'url': None, - 'packages': packages, - 'package_data': package_data, - 'install_requires': install_requires, - 'entry_points': entry_points, - 'python_requires': '>=3.8,<4.0', + "name": "minio-service", + "version": "0.1.0", + "description": "", + "long_description": None, + "author": None, + "author_email": None, + "maintainer": None, + "maintainer_email": None, + "url": None, + "packages": packages, + "package_data": package_data, + "install_requires": install_requires, + "entry_points": entry_points, + "python_requires": ">=3.8,<4.0", } diff --git a/common/model_api/example/__init__.py b/common/model_api/example/__init__.py index b794fd409..3dc1f76bc 100644 --- a/common/model_api/example/__init__.py +++ b/common/model_api/example/__init__.py @@ -1 +1 @@ -__version__ = '0.1.0' +__version__ = "0.1.0" diff --git a/common/model_api/example/__main__.py b/common/model_api/example/__main__.py index 35d722b27..b5f358794 100644 --- a/common/model_api/example/__main__.py +++ b/common/model_api/example/__main__.py @@ -1,20 +1,21 @@ from pathlib import Path import uvicorn +from model_api import config from model_api.common.minio_utils import MinioCommunicator from model_api.creator import create_app -from model_api import config from .config import settings from .inference import get_model, inference -app = create_app(get_model=get_model, - inference=inference, - bucket=settings.data_bucket, - model_files=None, - destination=Path(settings.volume_path) / settings.model_path, - loader=MinioCommunicator() - ) +app = create_app( + get_model=get_model, + inference=inference, + bucket=settings.data_bucket, + model_files=None, + destination=Path(settings.volume_path) / settings.model_path, + loader=MinioCommunicator(), +) # download_model(loader=MinioCommunicator()) config.settings = settings diff --git a/common/model_api/model_api/common/models.py b/common/model_api/model_api/common/models.py index a86658e84..8cb24b170 100644 --- a/common/model_api/model_api/common/models.py +++ b/common/model_api/model_api/common/models.py @@ -47,9 +47,7 @@ class Args(BaseModel): class ClassifierRequest(BaseModel): """Request to classify DOD's bboxes.""" - input_path: Path = Field( - example=Path("ternary_out/molecule_annotation.json") - ) + input_path: Path = Field(example=Path("ternary_out/molecule_annotation.json")) input_field: Dict[str, Dict[str, List[str]]] = Field( alias="input", example={ diff --git a/common/model_api/model_api/preprocessing.py b/common/model_api/model_api/preprocessing.py index 2cbe03701..4f30d4d82 100644 --- a/common/model_api/model_api/preprocessing.py +++ b/common/model_api/model_api/preprocessing.py @@ -32,9 +32,7 @@ def crop_page_images( figure_image: Image = pdf_page.to_image( resolution=calculate_dpi(figure_bbox) ).original.crop(figure_bbox) - image_path = ( - output_path / f"{obj.idx}.{settings.training_image_format}" - ) + image_path = output_path / f"{obj.idx}.{settings.training_image_format}" figure_image.save(str(image_path)) yield image_path @@ -47,12 +45,8 @@ def convert_figure_bbox_in_points( page_width_inch = page_pdf_bbox[3] - page_pdf_bbox[1] page_height_inch = page_pdf_bbox[2] - page_pdf_bbox[0] try: - figure_to_page_w_points = page_width_inch / Decimal( - page_dod_size.width - ) - figure_to_page_h_points = page_height_inch / Decimal( - page_dod_size.height - ) + figure_to_page_w_points = page_width_inch / Decimal(page_dod_size.width) + figure_to_page_h_points = page_height_inch / Decimal(page_dod_size.height) except ZeroDivisionError as err: logger.error("Page size from DOD is wrong! %s", page_dod_size) raise err diff --git a/common/model_api/model_api/storage_exchange.py b/common/model_api/model_api/storage_exchange.py index edfb33961..034afd6dc 100644 --- a/common/model_api/model_api/storage_exchange.py +++ b/common/model_api/model_api/storage_exchange.py @@ -15,9 +15,7 @@ def get_document( """Get a document from s3-storage.""" logger.info("Get a document from minio") document_path = work_dir / request.file.name - loader.client.fget_object( - request.bucket, str(request.file), str(document_path) - ) + loader.client.fget_object(request.bucket, str(request.file), str(document_path)) return document_path @@ -45,9 +43,7 @@ def put_annotation( """Put an annotation to s3-storage.""" logger.info("Put an annotation to minio") updated_annotation_path = Path(work_dir) / f"out_{request.input_path.name}" - output_annotation = m.AnnotationFromS3(pages=annotation).json( - by_alias=True - ) + output_annotation = m.AnnotationFromS3(pages=annotation).json(by_alias=True) updated_annotation_path.write_text(output_annotation) loader.client.fput_object( request.output_bucket, diff --git a/common/model_api/model_api/utils.py b/common/model_api/model_api/utils.py index 1daeaa1fd..33b824fdd 100644 --- a/common/model_api/model_api/utils.py +++ b/common/model_api/model_api/utils.py @@ -84,9 +84,7 @@ def update_annotation_categories( required_obj_ids: Optional[Tuple[str, ...]] = None, ) -> None: if page.page_num > len(pdf.pages): - logger.error( - "page %s in annotations doesn't exit in pdf", page.page_num - ) + logger.error("page %s in annotations doesn't exit in pdf", page.page_num) return bboxes_inference_result = { (page.page_num, Path(image).stem): inference_result @@ -114,9 +112,7 @@ def update_annotation_categories( obj.data = {} inference_key = (page.page_num, obj.idx) - if (data_field := "data") in bboxes_inference_result[ - inference_key - ].keys(): + if (data_field := "data") in bboxes_inference_result[inference_key].keys(): obj.data = { **obj.data, **bboxes_inference_result[inference_key][data_field], @@ -124,9 +120,7 @@ def update_annotation_categories( if (category_field := "category") in bboxes_inference_result[ inference_key ].keys(): - obj.category = bboxes_inference_result[inference_key][ - category_field - ] + obj.category = bboxes_inference_result[inference_key][category_field] logger.info( "An annotation of a page %s with %s updated", diff --git a/common/model_api/tests/test_api.py b/common/model_api/tests/test_api.py index 212a7c529..bb04baf83 100644 --- a/common/model_api/tests/test_api.py +++ b/common/model_api/tests/test_api.py @@ -14,9 +14,7 @@ # from model_api.inference import inference -@pytest.mark.skip( - reason="this is a test from a different, but similar service" -) +@pytest.mark.skip(reason="this is a test from a different, but similar service") def test_inference(monkeypatch): model_mock = MagicMock() monkeypatch.setattr( @@ -62,9 +60,7 @@ def test_update_annotation_categories_updating(monkeypatch): ) pdf = MagicMock() setattr(pdf, "pages", [1]) - update_annotation_categories( - inference, None, page_dod, pdf, ["1", "3"], ... - ) + update_annotation_categories(inference, None, page_dod, pdf, ["1", "3"], ...) assert page_dod == m.PageDOD( page_num=1, @@ -106,9 +102,7 @@ def test_update_annotation_categories_without_updating(monkeypatch): inference = MagicMock(return_value=[]) pdf = MagicMock() setattr(pdf, "pages", [1]) - update_annotation_categories( - inference, None, page_dod, pdf, ["1", "3"], ... - ) + update_annotation_categories(inference, None, page_dod, pdf, ["1", "3"], ...) assert page_dod == m.PageDOD( page_num=1, diff --git a/common/model_api/tests/test_preprocessing.py b/common/model_api/tests/test_preprocessing.py index 86514aa6d..f3d1174d7 100644 --- a/common/model_api/tests/test_preprocessing.py +++ b/common/model_api/tests/test_preprocessing.py @@ -40,15 +40,9 @@ def test_calculate_dpi(): def test_preprocessing(tmpdir, monkeypatch): - obj1 = GeometryObject( - category="1", bbox=(300, 300, 800, 800), id="object_id" - ) - obj2 = GeometryObject( - category="100500", bbox=(0, 0, 0, 0), id="does not matter" - ) - page_dod = PageDOD( - page_num=1, size=Size(width=595, height=841), objs=[obj1, obj2] - ) + obj1 = GeometryObject(category="1", bbox=(300, 300, 800, 800), id="object_id") + obj2 = GeometryObject(category="100500", bbox=(0, 0, 0, 0), id="does not matter") + page_dod = PageDOD(page_num=1, size=Size(width=595, height=841), objs=[obj1, obj2]) page_mock = MagicMock() image_mock = MagicMock() diff --git a/common/model_api/tests/test_smoke.py b/common/model_api/tests/test_smoke.py index ab3749836..ed5386d3f 100644 --- a/common/model_api/tests/test_smoke.py +++ b/common/model_api/tests/test_smoke.py @@ -184,8 +184,12 @@ def mock_put_annotation(loader, work_dir, annotation, request): # ] # ) response = { - "0": {"1": ["30e4d539-8e90-49c7-b49c-883073e2b8c8", - "aab83828-cd8b-41f7-a3c3-943f13e67c2c"]}, + "0": { + "1": [ + "30e4d539-8e90-49c7-b49c-883073e2b8c8", + "aab83828-cd8b-41f7-a3c3-943f13e67c2c", + ] + }, "3": { "2": [ "44d94e31-7079-470a-b8b5-74ce365353f7", @@ -235,6 +239,6 @@ def test_form_response(monkeypatch): inference=inference_return, request=request, loader=None, - work_dir=None + work_dir=None, ) assert m.ClassifierResponse(__root__=response) == inference_and_save_result diff --git a/common/page_rendering/page_rendering/page_rendering.py b/common/page_rendering/page_rendering/page_rendering.py index 25596aa46..e3f055a8e 100644 --- a/common/page_rendering/page_rendering/page_rendering.py +++ b/common/page_rendering/page_rendering/page_rendering.py @@ -44,9 +44,7 @@ def render( page = pdf.pages[page_number - 1] img = page.to_image(resolution=self.dpi) file_name = full_file_name.name.split(".")[0] - filename = dir_with_images / self.name_image( - file_name, page_number - ) + filename = dir_with_images / self.name_image(file_name, page_number) logger.info("Render page %s", page_number) img.save(filename, format=self.image_format) diff --git a/convert/convert/coco_export/convert.py b/convert/convert/coco_export/convert.py index c356bf12a..d0cf1c384 100644 --- a/convert/convert/coco_export/convert.py +++ b/convert/convert/coco_export/convert.py @@ -9,7 +9,6 @@ import requests from botocore.exceptions import ClientError - from convert.config import minio_client, minio_resource, settings from convert.logger import get_logger from convert.models.coco import Annotation, Category, CocoDataset, Image @@ -53,9 +52,7 @@ def download_image( image_folder = f"{Path(file_path).parent.parent}/" if not os.path.exists(image_folder): os.makedirs(image_folder, exist_ok=True) - image_local_path = ( - f"{image_folder}/{self.job_id}_{Path(file_path).name}" - ) + image_local_path = f"{image_folder}/{self.job_id}_{Path(file_path).name}" minio_resource.meta.client.download_file( self.bucket_name, file_path, image_local_path ) @@ -110,9 +107,7 @@ def download_annotation( f"{work_dir}/{page_name}.json", f"{local_path}/{page_name}.json", ) - add_to_zip_and_local_remove( - f"{local_path}/{page_name}.json", zip_file - ) + add_to_zip_and_local_remove(f"{local_path}/{page_name}.json", zip_file) def get_annotation_body( self, @@ -149,9 +144,7 @@ def fetch( """ work_dir = Path(manifest).parent manifest_content = json.loads( - minio_client.get_object(Bucket=self.bucket_name, Key=manifest)[ - "Body" - ] + minio_client.get_object(Bucket=self.bucket_name, Key=manifest)["Body"] .read() .decode("utf-8") ) @@ -283,8 +276,7 @@ def convert(self) -> ZipFile: annotation_num = 1 categories = loader.get_categories(self.token) category_names = { - category.lower(): number - for number, category in enumerate(categories) + category.lower(): number for number, category in enumerate(categories) } for page in file_id: files = minio_client.list_objects( @@ -317,9 +309,7 @@ def convert(self) -> ZipFile: key=lambda x: x["id"], # type: ignore ) export_save_to_json("coco", coco_annotation.dict()) - LOGGER.info( - "Converting of the job %s to coco has been finished", self.job_id - ) + LOGGER.info("Converting of the job %s to coco has been finished", self.job_id) self.zip_file.close() return self.zip_file @@ -414,9 +404,7 @@ def convert(self) -> ZipFile: minio_client.download_file( self.bucket_name, manifest_path, annotation_local_path ) - LOGGER.info( - "manifest.json was downloaded for the job %s", self.job_id - ) + LOGGER.info("manifest.json was downloaded for the job %s", self.job_id) add_to_zip_and_local_remove(annotation_local_path, self.zip_file) loader.fetch( manifest_path, diff --git a/convert/convert/coco_import/convert.py b/convert/convert/coco_import/convert.py index 706eaf541..f46c35fed 100644 --- a/convert/convert/coco_import/convert.py +++ b/convert/convert/coco_import/convert.py @@ -101,17 +101,11 @@ def check_category(self) -> Set[str]: "metadata": {"color": category["color"]}, "is_link": False, } - get_category_details_url = ( - f"{categories_services_url}{category['id']}" - ) - response = SESSION.get( - url=get_category_details_url, headers=headers - ) + get_category_details_url = f"{categories_services_url}{category['id']}" + response = SESSION.get(url=get_category_details_url, headers=headers) category_id = response.json().get("id", None) if not category_id: - SESSION.post( - url=categories_services_url, json=body, headers=headers - ) + SESSION.post(url=categories_services_url, json=body, headers=headers) LOGGER.info("Created category %s", category["name"]) LOGGER.info(response.json()) LOGGER.info("Checking categories has been finished") @@ -134,9 +128,7 @@ def convert(self) -> None: annotation["page_num"] = 1 pages["pages"].append(annotation) import_save_to_json( - os.path.join( - Path(self.s3_data.bucket_s3).stem, str(image_id) - ), + os.path.join(Path(self.s3_data.bucket_s3).stem, str(image_id)), str(obj["id"]), annotation, file_id=image_id, diff --git a/convert/convert/coco_import/import_service.py b/convert/convert/coco_import/import_service.py index 891c46012..dab9783e8 100644 --- a/convert/convert/coco_import/import_service.py +++ b/convert/convert/coco_import/import_service.py @@ -4,8 +4,6 @@ from urllib.parse import urljoin import requests -from fastapi import HTTPException, status - from convert.coco_import.convert import ConvertToBadgerdoc from convert.config import settings from convert.exceptions import UploadLimitExceedError @@ -13,6 +11,7 @@ from convert.models import coco from convert.utils.common_utils import check_uploading_limit from convert.utils.s3_utils import S3Manager, s3_download_files +from fastapi import HTTPException, status LOGGER = get_logger(__file__) @@ -24,9 +23,7 @@ def download_coco_from_aws(s3_data: coco.DataS3) -> S3Manager: try: check_uploading_limit(s3_data.files_keys) except UploadLimitExceedError as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail=str(e) - ) + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) s3 = S3Manager(s3_data.aws_access_key_id, s3_data.aws_secret_access_key) s3_download_files(s3, s3_data.bucket_s3, s3_data.files_keys) return s3 @@ -69,9 +66,7 @@ def import_run( json=body, headers={"X-Current-Tenant": current_tenant, "Authorization": token}, ) - converter.upload_annotations( - job_id, s3_data.bucket_s3, annotation_by_image - ) + converter.upload_annotations(job_id, s3_data.bucket_s3, annotation_by_image) return { "msg": f"Dataset was converted to {import_format} " f"format and upload to bucket {current_tenant}" diff --git a/convert/convert/config.py b/convert/convert/config.py index 9b6002f6d..6f9b19f55 100644 --- a/convert/convert/config.py +++ b/convert/convert/config.py @@ -4,15 +4,14 @@ import boto3 from botocore.client import BaseClient +from convert import logger +from dotenv import load_dotenv from mypy_extensions import KwArg, VarArg from pydantic import BaseSettings, Field from requests import Session from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry -from convert import logger -from dotenv import load_dotenv - load_dotenv() @@ -115,9 +114,7 @@ def create_boto3_config(): "s3 connection is not properly configured - " "s3_credentials_provider is not set" ) - logger_.info( - f"S3_Credentials provider - {settings.s3_credentials_provider}" - ) + logger_.info(f"S3_Credentials provider - {settings.s3_credentials_provider}") return boto3_config diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py index dd0d1148c..ea74a7c77 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py @@ -56,9 +56,7 @@ def convert( badgerdoc_tokens, ) for labelstudio_item in annotation.result: - self.process_relations( - badgerdoc_annotations, labelstudio_item - ) + self.process_relations(badgerdoc_annotations, labelstudio_item) badgerdoc_annotations_practic = AnnotationConverterPractic( badgerdoc_annotations, badgerdoc_tokens ).convert() @@ -146,14 +144,9 @@ def get_token_indexes_and_form_bbox( offset_end: int, badgerdoc_tokens: BadgerdocTokensPage, ) -> Tuple[List[int], List[float]]: - badgerdoc_annotation_token_indexes = list( - range(offset_begin, offset_end) - ) + badgerdoc_annotation_token_indexes = list(range(offset_begin, offset_end)) bbox = self.form_common_bbox( - [ - badgerdoc_tokens.objs[t].bbox - for t in badgerdoc_annotation_token_indexes - ] + [badgerdoc_tokens.objs[t].bbox for t in badgerdoc_annotation_token_indexes] ) return badgerdoc_annotation_token_indexes, bbox diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py index 0178a8dd3..e2e4911be 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py @@ -47,9 +47,7 @@ def convert_objs( text=text, data=bd_annotation_model_practic.AnnotationTokens( tokens=tokens, - dataAttributes=obj_theoretic.data.get( - "dataAttributes", [] - ), + dataAttributes=obj_theoretic.data.get("dataAttributes", []), ), links=links, ) @@ -78,9 +76,7 @@ def convert_links( ) -> List[bd_annotation_model_practic.AnnotationLink]: links = [] for link_theoretic in theoretic_links: - link = bd_annotation_model_practic.AnnotationLink( - **link_theoretic.dict() - ) + link = bd_annotation_model_practic.AnnotationLink(**link_theoretic.dict()) links.append(link) return links diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py index 17f72c9d4..a575d1bd4 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py @@ -24,18 +24,10 @@ def __init__( self.font_name = font_name self.font_size = font_size - def render_tokens( - self, tokens: List[BadgerdocToken], save_path: Path - ) -> None: + def render_tokens(self, tokens: List[BadgerdocToken], save_path: Path) -> None: with fitz.open() as doc: - width = ( - max(token.bbox[2] for token in tokens) - + self.page_border_offset - ) - height = ( - max(token.bbox[3] for token in tokens) - + self.page_border_offset - ) + width = max(token.bbox[2] for token in tokens) + self.page_border_offset + height = max(token.bbox[3] for token in tokens) + self.page_border_offset page = doc.new_page(height=height, width=width) for token in tokens: diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py index 14c72b9c7..345613b91 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py @@ -13,9 +13,7 @@ def generate_chunks(obj_to_split: List[str], size: int) -> List[List[str]]: - return [ - obj_to_split[i : i + size] for i in range(0, len(obj_to_split), size) - ] + return [obj_to_split[i : i + size] for i in range(0, len(obj_to_split), size)] class TextWrapper: diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py index 0373a0d72..3ec866065 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py @@ -3,13 +3,14 @@ from typing import NamedTuple from botocore.client import BaseClient -from tenant_dependency import TenantData - from convert.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter_practic import ( AnnotationConverterToTheory, ) -from convert.label_studio_to_badgerdoc.labelstudio_format import LabelStudioFormat +from convert.label_studio_to_badgerdoc.labelstudio_format import ( + LabelStudioFormat, +) from convert.logger import get_logger +from tenant_dependency import TenantData from .models import S3Path, bd_annotation_model_practic from .models.bd_annotation_model import BadgerdocAnnotation @@ -78,9 +79,7 @@ def download_badgerdoc_from_s3( input_annotations = self.download_file_from_s3( s3_input_annotations, tmp_dir ) - input_manifest = self.download_file_from_s3( - s3_input_manifest, tmp_dir - ) + input_manifest = self.download_file_from_s3(s3_input_manifest, tmp_dir) LOGGER.debug("input_manifest: %s", input_manifest.read_text()) page = Page.parse_file(input_tokens) @@ -90,9 +89,7 @@ def download_badgerdoc_from_s3( ) ).convert() manifest = Manifest.parse_file(input_manifest) - return BadgerdocData( - page=page, annotation=annotation, manifest=manifest - ) + return BadgerdocData(page=page, annotation=annotation, manifest=manifest) def download_file_from_s3(self, s3_path: S3Path, tmp_dir: Path) -> Path: local_file_path = tmp_dir / Path(s3_path.path).name @@ -110,9 +107,7 @@ def upload_labelstudio_to_s3( with tempfile.TemporaryDirectory() as tmp_dirname: tmp_dir = Path(tmp_dirname) - badgerdoc_annotations_path = tmp_dir / Path( - "labelstudio_format.json" - ) + badgerdoc_annotations_path = tmp_dir / Path("labelstudio_format.json") self.labelstudio_format.export_json(badgerdoc_annotations_path) self.s3_client.upload_file( str(badgerdoc_annotations_path), diff --git a/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py b/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py index 11bd8687e..4ee40a507 100644 --- a/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py +++ b/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py @@ -7,10 +7,6 @@ import requests from botocore.client import BaseClient from botocore.exceptions import ClientError -from fastapi import HTTPException, status -from fastapi.encoders import jsonable_encoder -from tenant_dependency import TenantData - from convert.config import DEFAULT_PAGE_BORDER_OFFSET, settings from convert.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter import ( AnnotationConverter, @@ -24,13 +20,19 @@ from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( TextToBadgerdocTokensConverter, ) -from convert.label_studio_to_badgerdoc.models import BadgerdocToken, DocumentLink +from convert.label_studio_to_badgerdoc.models import ( + BadgerdocToken, + DocumentLink, +) from convert.label_studio_to_badgerdoc.models.label_studio_models import ( LabelStudioModel, S3Path, ValidationType, ) from convert.logger import get_logger +from fastapi import HTTPException, status +from fastapi.encoders import jsonable_encoder +from tenant_dependency import TenantData LOGGER = get_logger(__file__) LOGGER.setLevel("DEBUG") @@ -98,9 +100,7 @@ def parse_document_links_from_labelstudio_format( self, label_studio_format: LabelStudioModel ) -> List[DocumentLink]: return [ - DocumentLink( - to=relation.to, category=relation.category, type=relation.type - ) + DocumentLink(to=relation.to, category=relation.category, type=relation.type) for relation in label_studio_format.__root__[0].meta.relations ] @@ -126,19 +126,17 @@ def execute(self) -> None: self.badgerdoc_format.convert_from_labelstudio(label_studio_format) LOGGER.debug("Tokens and annotations are converted") file_id_in_assets = self.upload_output_pdf_to_s3() - annotation_job_id_created = ( - self.import_annotations_to_annotation_microservice( - file_id_in_assets=file_id_in_assets, - owner=self.token_data.user_id, - validation_type=self.validation_type, - deadline=self.deadline, - extensive_coverage=self.extensive_coverage, - annotators=self.annotators, - validators=self.validators, - document_labels=document_labels, - categories_to_taxonomy_mapping=categories_to_taxonomy_mapping, - document_links=document_links, - ) + annotation_job_id_created = self.import_annotations_to_annotation_microservice( + file_id_in_assets=file_id_in_assets, + owner=self.token_data.user_id, + validation_type=self.validation_type, + deadline=self.deadline, + extensive_coverage=self.extensive_coverage, + annotators=self.annotators, + validators=self.validators, + document_labels=document_labels, + categories_to_taxonomy_mapping=categories_to_taxonomy_mapping, + document_links=document_links, ) self.upload_badgerdoc_annotations_and_tokens_to_s3( annotation_job_id_created, file_id_in_assets @@ -150,9 +148,7 @@ def download_label_studio_from_s3( s3_input_annotation: S3Path, ) -> LabelStudioModel: with tempfile.TemporaryDirectory() as tmp_dirname: - input_file = ( - Path(tmp_dirname) / Path(s3_input_annotation.path).name - ) + input_file = Path(tmp_dirname) / Path(s3_input_annotation.path).name try: self.s3_client.download_file( @@ -174,9 +170,7 @@ def download_label_studio_from_s3( return LabelStudioModel.parse_file(input_file) def get_output_tokens_path(self, file_id_in_assets: int) -> str: - return ( - f"files/{file_id_in_assets}/ocr/{self.converted_tokens_filename}" - ) + return f"files/{file_id_in_assets}/ocr/{self.converted_tokens_filename}" def get_output_pdf_path(self, file_id_in_assets: int) -> str: return f"files/{file_id_in_assets}/{file_id_in_assets}.pdf" @@ -202,9 +196,7 @@ def make_upload_file_request_to_assets(self, pdf_path: Path) -> int: ) request_to_post_assets.raise_for_status() except requests.exceptions.RequestException as e: - LOGGER.exception( - "Failed request to 'assets' to post converted pdf-file" - ) + LOGGER.exception("Failed request to 'assets' to post converted pdf-file") raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail="Failed request to 'assets' to post converted pdf-file", @@ -216,9 +208,7 @@ def upload_output_pdf_to_s3(self) -> int: pdf_path = tmp_dirname / Path(self.output_pdf_filename) self.badgerdoc_format.export_pdf(pdf_path) - file_id_in_assets = self.make_upload_file_request_to_assets( - pdf_path - ) + file_id_in_assets = self.make_upload_file_request_to_assets(pdf_path) return file_id_in_assets def upload_badgerdoc_annotations_and_tokens_to_s3( @@ -227,13 +217,9 @@ def upload_badgerdoc_annotations_and_tokens_to_s3( with tempfile.TemporaryDirectory() as tmp_dirname: tmp_dirname = Path(tmp_dirname) - s3_output_tokens_path = self.get_output_tokens_path( - file_id_in_assets - ) + s3_output_tokens_path = self.get_output_tokens_path(file_id_in_assets) - badgerdoc_tokens_path = tmp_dirname / Path( - self.badgerdoc_tokens_filename - ) + badgerdoc_tokens_path = tmp_dirname / Path(self.badgerdoc_tokens_filename) self.badgerdoc_format.export_tokens(badgerdoc_tokens_path) self.s3_client.upload_file( str(badgerdoc_tokens_path), @@ -244,9 +230,7 @@ def upload_badgerdoc_annotations_and_tokens_to_s3( badgerdoc_annotations_path = tmp_dirname / Path( self.badgerdoc_annotations_filename ) - self.badgerdoc_format.export_annotations( - badgerdoc_annotations_path - ) + self.badgerdoc_format.export_annotations(badgerdoc_annotations_path) s3_output_annotations_path = self.get_output_annotations_path( importjob_id_created, file_id_in_assets ) @@ -311,13 +295,9 @@ def request_jobs_to_create_annotation_job( "validators": validators, } if deadline: - post_annotation_job_body.update( - {"deadline": jsonable_encoder(deadline)} - ) + post_annotation_job_body.update({"deadline": jsonable_encoder(deadline)}) if extensive_coverage is not None: - post_annotation_job_body.update( - {"extensive_coverage": extensive_coverage} - ) + post_annotation_job_body.update({"extensive_coverage": extensive_coverage}) LOGGER.debug( "Making a request to create an Annotation Job in 'jobs' to url: %s with request body: %s", post_annotation_job_url, @@ -344,9 +324,7 @@ def request_jobs_to_create_annotation_job( ) return request_to_post_annotation_job.json()["id"] - def get_categories_of_links( - self, pages_objs: List[BadgerdocToken] - ) -> List[str]: + def get_categories_of_links(self, pages_objs: List[BadgerdocToken]) -> List[str]: result = [] for pages_obj in pages_objs: for link in pages_obj.links: @@ -356,9 +334,7 @@ def get_categories_of_links( def get_box_and_link_categories(self) -> List[str]: pages_objs = self.badgerdoc_format.badgerdoc_annotation.objs - categories_of_type_box = { - pages_obj.category for pages_obj in pages_objs - } + categories_of_type_box = {pages_obj.category for pages_obj in pages_objs} categories_of_type_link = self.get_categories_of_links(pages_objs) return [*categories_of_type_box, *categories_of_type_link] @@ -380,9 +356,7 @@ def request_annotation_to_post_annotations( "failed_validation_pages": [], "similar_revisions": [], # TODO: 'simial_revisions' will be replaced with 'links' with unknown format "categories": list(document_labels), - "links_json": [ - document_link.dict() for document_link in document_links - ], + "links_json": [document_link.dict() for document_link in document_links], } LOGGER.debug( "Making request to annotation to post annotations to url: %s with request body: %s", diff --git a/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py b/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py index f73621b9a..f6f1f319d 100644 --- a/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py +++ b/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py @@ -2,10 +2,9 @@ from typing import Any, Dict, List, Optional import requests -from fastapi import HTTPException, status - from convert.config import settings from convert.logger import get_logger +from fastapi import HTTPException, status from ..models.bd_annotation_model import AnnotationLink, BadgerdocAnnotation from ..models.bd_manifest_model_practic import Manifest @@ -59,8 +58,8 @@ def from_badgerdoc( ] job_id = badgerdoc_manifest.job_id - categories_linked_with_taxonomies = ( - self.get_categories_linked_with_taxonomies(job_id, request_headers) + categories_linked_with_taxonomies = self.get_categories_linked_with_taxonomies( + job_id, request_headers ) LOGGER.debug( "Got there categories linked to taxonomies: %s", @@ -222,9 +221,7 @@ def get_corresponding_taxonomy_obj( detail="Failed request to 'taxonomy' to get corresponding taxonomy", ) from e response_content = request_to_get_taxonomy.json() - LOGGER.debug( - "Got this response from taxonomy service: %s", response_content - ) + LOGGER.debug("Got this response from taxonomy service: %s", response_content) return [ {"taxonomy_id": element["id"], "version": element["version"]} @@ -280,9 +277,7 @@ def get_taxonomy_to_taxons_mapping( detail="Failed request to 'taxonomy' to get taxons_used", ) from e response_content = request_to_get_taxons_used.json() - LOGGER.debug( - "Got this response from taxonomy service: %s", response_content - ) + LOGGER.debug("Got this response from taxonomy service: %s", response_content) result = {taxonomy_id: [] for taxonomy_id in all_taxonomies_ids_used} for taxon_obj in response_content["data"]: diff --git a/convert/convert/label_studio_to_badgerdoc/models/bd_tokens_model.py b/convert/convert/label_studio_to_badgerdoc/models/bd_tokens_model.py index 8f3f19d1a..f0b8818bc 100644 --- a/convert/convert/label_studio_to_badgerdoc/models/bd_tokens_model.py +++ b/convert/convert/label_studio_to_badgerdoc/models/bd_tokens_model.py @@ -23,6 +23,7 @@ class PageSize(BaseModel): class Page(BaseModel): """A model for the field with bboxes.""" + page_num: int = Field(..., example=1) size: PageSize objs: List[BadgerdocToken] diff --git a/convert/convert/models/coco.py b/convert/convert/models/coco.py index 729cc13eb..a220cfbfa 100644 --- a/convert/convert/models/coco.py +++ b/convert/convert/models/coco.py @@ -35,9 +35,7 @@ def __init__(self, iterable: Iterable[Any]) -> None: "Bounding box should contains only numeric values" ) from err if len(tmp) != 4: - raise ValueError( - "Bounding box must contains x, y, width and height" - ) + raise ValueError("Bounding box must contains x, y, width and height") super().__init__() diff --git a/convert/convert/routers/coco.py b/convert/convert/routers/coco.py index a0a46528a..37bbdb9b8 100644 --- a/convert/convert/routers/coco.py +++ b/convert/convert/routers/coco.py @@ -2,11 +2,6 @@ from urllib.parse import urlparse import requests -from fastapi import APIRouter, BackgroundTasks, Depends, Header, status -from fastapi.responses import Response, StreamingResponse -from requests import HTTPError -from tenant_dependency import TenantData, get_tenant_info - from convert.coco_export.convert import ConvertToCoco, ExportBadgerdoc from convert.coco_export.export_service import ( export_run, @@ -18,12 +13,14 @@ from convert.logger import get_logger from convert.models import coco from convert.utils.s3_utils import get_bucket_path +from fastapi import APIRouter, BackgroundTasks, Depends, Header, status +from fastapi.responses import Response, StreamingResponse +from requests import HTTPError +from tenant_dependency import TenantData, get_tenant_info router = APIRouter(prefix="/coco", tags=["coco"]) LOGGER = get_logger(__file__) -tenant = get_tenant_info( - url=settings.keycloak_url, algorithm="RS256", debug=True -) +tenant = get_tenant_info(url=settings.keycloak_url, algorithm="RS256", debug=True) @router.post( @@ -139,9 +136,7 @@ def download_dataset( parsed = urlparse(url) minio_path = parsed.path[1:].split("/") bucket, key = minio_path[0], str.join("/", minio_path[1:-1]) - zip_file = minio_client.get_object( - Bucket=bucket, Key=str.join("/", minio_path[1:]) - ) + zip_file = minio_client.get_object(Bucket=bucket, Key=str.join("/", minio_path[1:])) background.add_task( minio_client.delete_object, Bucket=bucket, diff --git a/convert/convert/routers/label_studio.py b/convert/convert/routers/label_studio.py index 2dfcec552..c484acfae 100644 --- a/convert/convert/routers/label_studio.py +++ b/convert/convert/routers/label_studio.py @@ -1,8 +1,5 @@ from typing import Optional -from fastapi import APIRouter, Depends, Header, status -from tenant_dependency import TenantData, get_tenant_info - from convert.config import minio_client, settings from convert.label_studio_to_badgerdoc.badgerdoc_to_label_studio_use_case import ( BDToLabelStudioConvertUseCase, @@ -14,11 +11,11 @@ from convert.label_studio_to_badgerdoc.models.label_studio_models import ( BadgerdocToLabelStudioRequest, ) +from fastapi import APIRouter, Depends, Header, status +from tenant_dependency import TenantData, get_tenant_info router = APIRouter(prefix="/label_studio", tags=["label_studio"]) -tenant = get_tenant_info( - url=settings.keycloak_url, algorithm="RS256", debug=True -) +tenant = get_tenant_info(url=settings.keycloak_url, algorithm="RS256", debug=True) @router.post( diff --git a/convert/convert/routers/text.py b/convert/convert/routers/text.py index 272bec5b2..fd0b413e4 100644 --- a/convert/convert/routers/text.py +++ b/convert/convert/routers/text.py @@ -1,8 +1,9 @@ -from fastapi import APIRouter, status - from convert.config import minio_client from convert.label_studio_to_badgerdoc.models.text_model import TextRequest -from convert.label_studio_to_badgerdoc.text_to_badgerdoc_use_case import TextToBDConvertUseCase +from convert.label_studio_to_badgerdoc.text_to_badgerdoc_use_case import ( + TextToBDConvertUseCase, +) +from fastapi import APIRouter, status router = APIRouter(prefix="/text", tags=["text"]) diff --git a/convert/convert/utils/json_utils.py b/convert/convert/utils/json_utils.py index b03c98f90..0c3a49f35 100644 --- a/convert/convert/utils/json_utils.py +++ b/convert/convert/utils/json_utils.py @@ -24,9 +24,7 @@ def load_from_json(file_name: str) -> Any: return json.load(f_o) except FileNotFoundError: LOGGER.error(f"[Errno 2] No such file or directory: {file_name}") - raise FileNotFoundError( - f"[Errno 2] No such file or directory: {file_name}" - ) + raise FileNotFoundError(f"[Errno 2] No such file or directory: {file_name}") def annotation_category_change( @@ -72,9 +70,7 @@ def merge_jobs_annotation( last_annotation_id = file_annotation["annotations"][-1]["id"] last_image_id = file_annotation["images"][-1]["id"] last_category_id = file_annotation["categories"][-1]["id"] - file_categories = [ - category["name"] for category in file_annotation["categories"] - ] + file_categories = [category["name"] for category in file_annotation["categories"]] for category_merge in merge_annotation["categories"]: if category_merge["name"] in file_categories: continue @@ -116,9 +112,7 @@ def export_save_to_json( with open(file_name) as f_obr: annotations_in_file = json.load(f_obr) with open(file_name, "w") as f_obw: - annotation = merge_jobs_annotation( - annotations_in_file, annotations - ) + annotation = merge_jobs_annotation(annotations_in_file, annotations) json.dump(annotation, f_obw, default=str) diff --git a/convert/convert/utils/render_pdf_page.py b/convert/convert/utils/render_pdf_page.py index d17d8101f..e43802abf 100644 --- a/convert/convert/utils/render_pdf_page.py +++ b/convert/convert/utils/render_pdf_page.py @@ -3,7 +3,6 @@ from zipfile import ZipFile import pdfplumber - from convert.config import settings from convert.logger import get_logger from convert.utils.common_utils import add_to_zip_and_local_remove @@ -29,15 +28,9 @@ def pdf_page_to_jpg( if validated_pages and num not in validated_pages: continue image = page.to_image(resolution=settings.dpi).original - image_path = ( - output_path / f"{job_id}_{num}.{settings.coco_image_format}" - ) + image_path = output_path / f"{job_id}_{num}.{settings.coco_image_format}" image.save(image_path) - LOGGER.info( - "Page %s was rendered and saved to %s", num, image_path - ) - LOGGER.info( - "Page %s was written to archive %s", num, zip_file.filename - ) + LOGGER.info("Page %s was rendered and saved to %s", num, image_path) + LOGGER.info("Page %s was written to archive %s", num, zip_file.filename) LOGGER.info("Page %s was removed", num) add_to_zip_and_local_remove(str(image_path), zip_file) diff --git a/convert/convert/utils/s3_utils.py b/convert/convert/utils/s3_utils.py index d1f890151..d17dca8ff 100644 --- a/convert/convert/utils/s3_utils.py +++ b/convert/convert/utils/s3_utils.py @@ -3,13 +3,16 @@ import boto3 import urllib3 -from fastapi import HTTPException, status - from convert.config import settings -from convert.exceptions import BucketError, FileKeyError, UploadLimitExceedError +from convert.exceptions import ( + BucketError, + FileKeyError, + UploadLimitExceedError, +) from convert.logger import get_logger from convert.models import coco from convert.utils.common_utils import check_uploading_limit +from fastapi import HTTPException, status logger = get_logger(__name__) @@ -56,9 +59,7 @@ def _check_bucket_exist(self, bucket_s3: str) -> Any: """ Checks if required bucket exists in S3 """ - all_s3_buckets = [ - bucket.name for bucket in self.resource.buckets.all() - ] + all_s3_buckets = [bucket.name for bucket in self.resource.buckets.all()] if bucket_s3 not in all_s3_buckets: raise BucketError(f"bucket {bucket_s3} does not exist!") @@ -68,9 +69,7 @@ def _check_files_exist(self, bucket_s3: str, files_keys: List[str]) -> Any: """ all_files_in_bucket = [ content["Key"] - for content in self.client.list_objects(Bucket=bucket_s3)[ - "Contents" - ] + for content in self.client.list_objects(Bucket=bucket_s3)["Contents"] ] for file_key in files_keys: if file_key not in all_files_in_bucket: @@ -94,9 +93,7 @@ def check_s3(self, bucket_s3: str, files_keys: List[str]) -> Any: raise urllib3.exceptions.MaxRetryError -def s3_download_files( - s3: S3Manager, bucket_s3: str, files_keys: List[str] -) -> None: +def s3_download_files(s3: S3Manager, bucket_s3: str, files_keys: List[str]) -> None: """ Tue function downloads list of the files from s3 storage Args: @@ -108,9 +105,7 @@ def s3_download_files( s3.check_s3(bucket_s3, files_keys) except (FileKeyError, BucketError) as e: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, detail=str(e) - ) + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) except urllib3.exceptions.MaxRetryError as e: raise HTTPException( @@ -126,9 +121,7 @@ def download_file_from_aws(s3_data: coco.DataS3) -> S3Manager: try: check_uploading_limit(s3_data.files_keys) except UploadLimitExceedError as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail=str(e) - ) + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) s3 = S3Manager(s3_data.aws_access_key_id, s3_data.aws_secret_access_key) s3_download_files(s3, s3_data.bucket_s3, s3_data.files_keys) return s3 diff --git a/convert/tests/test_label_studio/test_export.py b/convert/tests/test_label_studio/test_export.py index e3a30b379..cbe3db6f7 100644 --- a/convert/tests/test_label_studio/test_export.py +++ b/convert/tests/test_label_studio/test_export.py @@ -28,13 +28,11 @@ def test_annotation_converter(): TEST_FILES_DIR / "badgerdoc_etalon" / "manifest.json" ) page_annotation_file_name = f"{manifest_test.pages['1']}.json" - annotations_test = ( - annotation_converter_practic.AnnotationConverterToTheory( - bd_annotation_model_practic.BadgerdocAnnotation.parse_file( - TEST_FILES_DIR / "badgerdoc_etalon" / page_annotation_file_name - ) - ).convert() - ) + annotations_test = annotation_converter_practic.AnnotationConverterToTheory( + bd_annotation_model_practic.BadgerdocAnnotation.parse_file( + TEST_FILES_DIR / "badgerdoc_etalon" / page_annotation_file_name + ) + ).convert() labelstudio_format_test = LabelStudioFormat() labelstudio_format_test.from_badgerdoc( @@ -63,6 +61,5 @@ def test_annotation_converter(): str(relation) for relation in labelstudio_model_etalon.__root__[0].meta.relations ) == set( - str(relation) - for relation in labelstudio_model_test.__root__[0].meta.relations + str(relation) for relation in labelstudio_model_test.__root__[0].meta.relations ) diff --git a/convert/tests/test_label_studio/test_import.py b/convert/tests/test_label_studio/test_import.py index 048bbe6e0..2300512f1 100644 --- a/convert/tests/test_label_studio/test_import.py +++ b/convert/tests/test_label_studio/test_import.py @@ -73,9 +73,7 @@ def test_annotation_converter(): tokens_test = json.loads(tokens_test_path.read_text()) annotations_test = json.loads(annotations_test_path.read_text()) - tokens_etalon_path = ( - TEST_FILES_DIR / "badgerdoc_etalon" / "tokens_test.json" - ) + tokens_etalon_path = TEST_FILES_DIR / "badgerdoc_etalon" / "tokens_test.json" annotations_etalon_path = ( TEST_FILES_DIR / "badgerdoc_etalon" / "annotations_test.json" ) @@ -105,9 +103,7 @@ def test_import_document_links(): tokens_test = json.loads(tokens_test_path.read_text()) annotations_test = json.loads(annotations_test_path.read_text()) - tokens_etalon_path = ( - TEST_FILES_DIR / "badgerdoc_etalon" / "tokens_test.json" - ) + tokens_etalon_path = TEST_FILES_DIR / "badgerdoc_etalon" / "tokens_test.json" annotations_etalon_path = ( TEST_FILES_DIR / "badgerdoc_etalon" / "annotations_test.json" ) diff --git a/convert/tests/test_label_studio/test_text_wrapper.py b/convert/tests/test_label_studio/test_text_wrapper.py index aef00398f..a43ef0f40 100644 --- a/convert/tests/test_label_studio/test_text_wrapper.py +++ b/convert/tests/test_label_studio/test_text_wrapper.py @@ -117,10 +117,7 @@ def test_wrap_single_paragraph_text(): def test_wrap_text_with_several_paragraphs(): tw = TextWrapper(line_length=20) - text = ( - "Text which contains more then one paragraph\n" - "It is the second paragraph" - ) + text = "Text which contains more then one paragraph\n" "It is the second paragraph" result = tw.wrap(text) assert result == [ diff --git a/dev_runner/dev_runner/runners/annotation_runner.py b/dev_runner/dev_runner/runners/annotation_runner.py index 171f87180..ded3dced4 100644 --- a/dev_runner/dev_runner/runners/annotation_runner.py +++ b/dev_runner/dev_runner/runners/annotation_runner.py @@ -1,6 +1,7 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class AnnotationRunner(BaseRunner): PACKAGE_NAME = "annotation" @@ -8,4 +9,4 @@ class AnnotationRunner(BaseRunner): PORT = settings.ANNOTATION_PORT DB_CREDENTIALS = { "POSTGRES_DB": "annotation", - } \ No newline at end of file + } diff --git a/dev_runner/dev_runner/runners/assets_runner.py b/dev_runner/dev_runner/runners/assets_runner.py index 7a84369d4..cafcec0f1 100644 --- a/dev_runner/dev_runner/runners/assets_runner.py +++ b/dev_runner/dev_runner/runners/assets_runner.py @@ -1,14 +1,13 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class AssetsRunner(BaseRunner): PACKAGE_NAME = "assets" PORT = settings.ASSETS_PORT APP_NAME = "assets" - DB_CREDENTIALS = { - "POSTGRES_DB": "file_management" - } + DB_CREDENTIALS = {"POSTGRES_DB": "file_management"} ENVIRONMENT = { "APP_NAME": "assets", "UPLOADING_LIMIT": "100", diff --git a/dev_runner/dev_runner/runners/base_runner.py b/dev_runner/dev_runner/runners/base_runner.py index bec12fa23..449080a57 100644 --- a/dev_runner/dev_runner/runners/base_runner.py +++ b/dev_runner/dev_runner/runners/base_runner.py @@ -1,12 +1,13 @@ import asyncio -from pathlib import Path +import logging import os import sys from importlib import import_module +from pathlib import Path + from uvicorn.config import Config from uvicorn.server import Server from uvicorn.supervisors import ChangeReload, Multiprocess -import logging ROOT_PATH = Path(__file__).parent.parent.parent.parent @@ -36,7 +37,9 @@ async def run(mcs, services: tuple[str]): service = runner().run_app_async() service.__name__ = runner.PACKAGE_NAME runners.append(service) - done, pending = await asyncio.wait([service for service in runners], return_when=asyncio.FIRST_COMPLETED) + done, pending = await asyncio.wait( + [service for service in runners], return_when=asyncio.FIRST_COMPLETED + ) for task in pending: task.cancel() @@ -68,7 +71,7 @@ def _default_db_credentials() -> dict[str, str]: "POSTGRES_PASSWORD": "postgres", "POSTGRES_HOST": "localhost", "POSTGRES_PORT": "5432", - "POSTGRES_DB": "postgres" + "POSTGRES_DB": "postgres", } @staticmethod @@ -86,7 +89,9 @@ def setup_env(self): os.environ.update(db_credentials) def create_server(self): - logging.debug(f"[{self.__class__.__name__}]Starting {self.PACKAGE_NAME} on port {self.PORT}") + logging.debug( + f"[{self.__class__.__name__}]Starting {self.PACKAGE_NAME} on port {self.PORT}" + ) self.setup_env() package_path = str(ROOT_PATH / self.PACKAGE_NAME) sys.path.append(package_path) @@ -95,11 +100,15 @@ def create_server(self): app = module.app print(f"[{self.__class__.__name__}]: Module {module} is imported") except ModuleNotFoundError as e: - logging.error(f"[{self.__class__.__name__}]: Module {self.APP_NAME}.{self.MODULE_NAME} not found") + logging.error( + f"[{self.__class__.__name__}]: Module {self.APP_NAME}.{self.MODULE_NAME} not found" + ) raise e sys.path.remove(package_path) - config = Config(app, host=self.HOST, port=self.PORT, reload=True) # TODO: check additional folders for reloading + config = Config( + app, host=self.HOST, port=self.PORT, reload=True + ) # TODO: check additional folders for reloading server = Server(config=config) if config.should_reload: diff --git a/dev_runner/dev_runner/runners/convert_runner.py b/dev_runner/dev_runner/runners/convert_runner.py index 9c5b4979f..70ebe6c20 100644 --- a/dev_runner/dev_runner/runners/convert_runner.py +++ b/dev_runner/dev_runner/runners/convert_runner.py @@ -1,11 +1,10 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class ConvertRunner(BaseRunner): PACKAGE_NAME = "convert" PORT = settings.CONVERT_PORT APP_NAME = "convert" - ENVIRONMENT = { - "IMPORT_COCO_URL": "http://0.0.0.0:8080/converter/import/" - } + ENVIRONMENT = {"IMPORT_COCO_URL": "http://0.0.0.0:8080/converter/import/"} diff --git a/dev_runner/dev_runner/runners/scheduler_runner.py b/dev_runner/dev_runner/runners/scheduler_runner.py index 8e4c62360..224b7d8a4 100644 --- a/dev_runner/dev_runner/runners/scheduler_runner.py +++ b/dev_runner/dev_runner/runners/scheduler_runner.py @@ -1,15 +1,14 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class SchedulerRunner(BaseRunner): PACKAGE_NAME = "scheduler" PORT = settings.SCHEDULER_PORT APP_NAME = "scheduler" MODULE_NAME = "app" - DB_CREDENTIALS = { - "POSTGRES_DB": "scheduler" - } + DB_CREDENTIALS = {"POSTGRES_DB": "scheduler"} ENVIRONMENT = { "DB_NAME": "scheduler", "DB_URL": "postgresql+psycopg2://postgres:postgres@localhost:5432/scheduler", diff --git a/dev_runner/dev_runner/runners/users_runner.py b/dev_runner/dev_runner/runners/users_runner.py index 4eae4f4f0..9e8ad1275 100644 --- a/dev_runner/dev_runner/runners/users_runner.py +++ b/dev_runner/dev_runner/runners/users_runner.py @@ -1,6 +1,7 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class UsersRunner(BaseRunner): PACKAGE_NAME = "users" @@ -22,4 +23,4 @@ class UsersRunner(BaseRunner): "KEYCLOAK_REALM": "master", "KEYCLOAK_ROLE_ADMIN": "admin", "KEYCLOAK_USERS_PUBLIC_KEY": "", - } \ No newline at end of file + } diff --git a/dev_runner/start.py b/dev_runner/start.py index 657ab3d48..068f9fc6e 100644 --- a/dev_runner/start.py +++ b/dev_runner/start.py @@ -2,11 +2,9 @@ from pathlib import Path import click -from dotenv import load_dotenv - -from dev_runner.runners.base_runner import RunnerRegistry from dev_runner.runners.annotation_runner import AnnotationRunner from dev_runner.runners.assets_runner import AssetsRunner +from dev_runner.runners.base_runner import RunnerRegistry from dev_runner.runners.convert_runner import ConvertRunner from dev_runner.runners.jobs_runner import JobsRunner from dev_runner.runners.models_runner import ModelsRunner @@ -16,7 +14,7 @@ from dev_runner.runners.search_runner import SearchRunner from dev_runner.runners.taxonomy_runner import TaxonomyRunner from dev_runner.runners.users_runner import UsersRunner - +from dotenv import load_dotenv ROOT_DIR = Path(__file__).parent SHARED_DOT_ENV = ROOT_DIR / "conf" / "shared.env" @@ -27,9 +25,13 @@ def _info(message): @click.command() -@click.argument("services", nargs=-1, type=click.Choice(RunnerRegistry.get_runners().keys())) +@click.argument( + "services", nargs=-1, type=click.Choice(RunnerRegistry.get_runners().keys()) +) def cli(services): - _info(f"Starting {services or 'all'} service{'s' if not services or len(services) > 1 else ''}...") + _info( + f"Starting {services or 'all'} service{'s' if not services or len(services) > 1 else ''}..." + ) load_dotenv(SHARED_DOT_ENV) asyncio.run(RunnerRegistry.run(services)) diff --git a/jobs/alembic/env.py b/jobs/alembic/env.py index af431b4b8..846f0eb98 100644 --- a/jobs/alembic/env.py +++ b/jobs/alembic/env.py @@ -28,15 +28,11 @@ # my_important_option = config.get_main_option("my_important_option") # ... etc. -main_database_url = os.environ.get( - "POSTGRESQL_JOBMANAGER_DATABASE_URI" -) +main_database_url = os.environ.get("POSTGRESQL_JOBMANAGER_DATABASE_URI") if not os.getenv("USE_TEST_DB"): config.set_main_option("sqlalchemy.url", main_database_url) else: - config.set_main_option( - "sqlalchemy.url", get_test_db_url(main_database_url) - ) + config.set_main_option("sqlalchemy.url", get_test_db_url(main_database_url)) def run_migrations_offline(): @@ -78,9 +74,7 @@ def run_migrations_online(): ) with connectable.connect() as connection: - context.configure( - connection=connection, target_metadata=target_metadata - ) + context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() diff --git a/jobs/alembic/versions/3f5b2d199d38_.py b/jobs/alembic/versions/3f5b2d199d38_.py index 8b1872cd8..bc4027b3e 100644 --- a/jobs/alembic/versions/3f5b2d199d38_.py +++ b/jobs/alembic/versions/3f5b2d199d38_.py @@ -19,9 +19,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "job", sa.Column("mode", sa.String(length=30), nullable=True) - ) + op.add_column("job", sa.Column("mode", sa.String(length=30), nullable=True)) # ### end Alembic commands ### diff --git a/jobs/alembic/versions/7511c6790067_.py b/jobs/alembic/versions/7511c6790067_.py index 2857a099a..fc8b8bc05 100644 --- a/jobs/alembic/versions/7511c6790067_.py +++ b/jobs/alembic/versions/7511c6790067_.py @@ -20,9 +20,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "job", "users", nullable=True, new_column_name="annotators" - ) + op.alter_column("job", "users", nullable=True, new_column_name="annotators") op.add_column( "job", sa.Column( @@ -33,18 +31,14 @@ def upgrade(): ) op.add_column( "job", - sa.Column( - "owners", postgresql.JSONB(astext_type=sa.Text()), nullable=True - ), + sa.Column("owners", postgresql.JSONB(astext_type=sa.Text()), nullable=True), ) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "job", "annotators", nullable=True, new_column_name="users" - ) + op.alter_column("job", "annotators", nullable=True, new_column_name="users") op.drop_column("job", "owners") op.drop_column("job", "validators") # ### end Alembic commands ### diff --git a/jobs/alembic/versions/9229e70d2791_.py b/jobs/alembic/versions/9229e70d2791_.py index 19b0c9c87..ed11eeaa9 100644 --- a/jobs/alembic/versions/9229e70d2791_.py +++ b/jobs/alembic/versions/9229e70d2791_.py @@ -25,17 +25,11 @@ def upgrade(): sa.Column("id", sa.Integer(), nullable=False), sa.Column("name", sa.String(length=250), nullable=True), sa.Column("status", sa.String(length=250), nullable=True), - sa.Column( - "files", postgresql.JSONB(astext_type=sa.Text()), nullable=True - ), - sa.Column( - "datasets", postgresql.JSONB(astext_type=sa.Text()), nullable=True - ), + sa.Column("files", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("datasets", postgresql.JSONB(astext_type=sa.Text()), nullable=True), sa.Column("creation_datetime", sa.DateTime(), nullable=True), sa.Column("type", sa.String(length=20), nullable=True), - sa.Column( - "users", postgresql.JSONB(astext_type=sa.Text()), nullable=True - ), + sa.Column("users", postgresql.JSONB(astext_type=sa.Text()), nullable=True), sa.Column( "categories", postgresql.JSONB(astext_type=sa.Text()), diff --git a/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py b/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py index 25a1ed7f3..eeef4ce61 100644 --- a/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py +++ b/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py @@ -5,24 +5,26 @@ Create Date: 2022-03-17 20:22:30.242625 """ -from alembic import op import sqlalchemy as sa +from alembic import op # revision identifiers, used by Alembic. -revision = 'b4afb5ae8923' -down_revision = '86f432539475' +revision = "b4afb5ae8923" +down_revision = "86f432539475" branch_labels = None depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column('job', sa.Column('start_manual_job_automatically', sa.Boolean(), nullable=True)) + op.add_column( + "job", sa.Column("start_manual_job_automatically", sa.Boolean(), nullable=True) + ) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('job', 'start_manual_job_automatically') + op.drop_column("job", "start_manual_job_automatically") # ### end Alembic commands ### diff --git a/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py b/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py index 8c321be4b..46bb7e25a 100644 --- a/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py +++ b/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py @@ -5,24 +5,24 @@ Create Date: 2022-12-09 13:10:42.668902 """ -from alembic import op import sqlalchemy as sa +from alembic import op # revision identifiers, used by Alembic. -revision = 'f60dd492b17f' -down_revision = 'b4afb5ae8923' +revision = "f60dd492b17f" +down_revision = "b4afb5ae8923" branch_labels = None depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column('job', sa.Column('extensive_coverage', sa.Integer(), nullable=True)) + op.add_column("job", sa.Column("extensive_coverage", sa.Integer(), nullable=True)) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('job', 'extensive_coverage') + op.drop_column("job", "extensive_coverage") # ### end Alembic commands ### diff --git a/jobs/jobs/create_job_funcs.py b/jobs/jobs/create_job_funcs.py index da528d783..e1f8a1536 100644 --- a/jobs/jobs/create_job_funcs.py +++ b/jobs/jobs/create_job_funcs.py @@ -57,9 +57,7 @@ async def create_extraction_job( ) pipeline_id = pipeline_instance.get("id") - pipeline_categories = pipeline_instance.get("meta", {}).get( - "categories", [] - ) + pipeline_categories = pipeline_instance.get("meta", {}).get("categories", []) ( files_data, @@ -151,9 +149,7 @@ async def create_extraction_annotation_job( detail="No valid data (files, datasets) provided", ) - pipeline_categories = pipeline_instance.get("meta", {}).get( - "categories", [] - ) + pipeline_categories = pipeline_instance.get("meta", {}).get("categories", []) manual_categories = extraction_annotation_job_input.categories categories = list( set( diff --git a/jobs/jobs/db_service.py b/jobs/jobs/db_service.py index 7d9b2f3e8..ff43ce008 100644 --- a/jobs/jobs/db_service.py +++ b/jobs/jobs/db_service.py @@ -136,9 +136,7 @@ def get_all_jobs(db: Session) -> List[Dict[str, Any]]: return [job.as_dict for job in db.query(dbm.CombinedJob)] -def get_job_in_db_by_id( - db: Session, job_id: int -) -> Union[dbm.CombinedJob, Any]: +def get_job_in_db_by_id(db: Session, job_id: int) -> Union[dbm.CombinedJob, Any]: """Getting hold on a job in the database by its id""" job_needed = db.query(dbm.CombinedJob).get(job_id) return job_needed diff --git a/jobs/jobs/main.py b/jobs/jobs/main.py index 57a8939f7..4faaab45b 100644 --- a/jobs/jobs/main.py +++ b/jobs/jobs/main.py @@ -218,8 +218,7 @@ async def change_job( if (owners := job_to_change.owners) and user_id not in owners: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, - detail="Access denied. This user is not " - "allowed to change the job", + detail="Access denied. This user is not " "allowed to change the job", ) if ( @@ -249,9 +248,7 @@ async def change_job( schemas.JobType.AnnotationJob, schemas.JobType.ExtractionWithAnnotationJob, ]: - new_job_params_for_annotation = utils.pick_params_for_annotation( - new_job_params - ) + new_job_params_for_annotation = utils.pick_params_for_annotation(new_job_params) if new_job_params_for_annotation.dict(exclude_defaults=True): await utils.update_job_in_annotation( job_id=job_id, diff --git a/jobs/jobs/schemas.py b/jobs/jobs/schemas.py index 90cbb386c..893a7cc5a 100644 --- a/jobs/jobs/schemas.py +++ b/jobs/jobs/schemas.py @@ -62,9 +62,7 @@ class AnnotationJobParams(BaseModel): extensive_coverage: int = 1 -class ExtractionWithAnnotationJobParams( - ExtractionJobParams, AnnotationJobParams -): +class ExtractionWithAnnotationJobParams(ExtractionJobParams, AnnotationJobParams): start_manual_job_automatically: Optional[bool] = True @@ -117,9 +115,7 @@ def check_files_and_datasets_are_not_empty( # pylint: disable=no-self-argument ) -> List[int]: if not values.get("type") == JobType.ImportJob: if not v and not values.get("files"): - raise ValueError( - "files and datasets cannot be empty at the same time" - ) + raise ValueError("files and datasets cannot be empty at the same time") return v # ---- AnnotationJob and ExtractionWithAnnotationJob attributes ---- # @@ -128,9 +124,7 @@ def check_is_auto_distribution( # pylint: disable=no-self-argument cls, v: bool, values: Dict[str, Any] ) -> bool: if values.get("type") == JobType.ExtractionJob and v: - raise ValueError( - "is_auto_distribution cannot be assigned to ExtractionJob" - ) + raise ValueError("is_auto_distribution cannot be assigned to ExtractionJob") return v @validator( @@ -148,9 +142,7 @@ def check_annotationjob_attributes( job_type = values.get("type") if v: if job_type == JobType.ExtractionJob: - raise ValueError( - f"{field.name} cannot be assigned to ExtractionJob" - ) + raise ValueError(f"{field.name} cannot be assigned to ExtractionJob") elif job_type == JobType.AnnotationJob: raise ValueError(f"{field.name} cannot be empty for {job_type}") @@ -163,23 +155,17 @@ def check_annotators( # pylint: disable=no-self-argument job_type = values.get("type") validation_type = values.get("validation_type") if job_type == JobType.ExtractionJob: - raise ValueError( - f"{field.name} cannot be assigned to ExtractionJob" - ) + raise ValueError(f"{field.name} cannot be assigned to ExtractionJob") require_annotators = { ValidationType.hierarchical, ValidationType.cross, } if v and validation_type == ValidationType.validation_only: - raise ValueError( - f"{field.name} should be empty with {validation_type=}" - ) + raise ValueError(f"{field.name} should be empty with {validation_type=}") elif not v and validation_type in require_annotators: - raise ValueError( - f"{field.name} cannot be empty with {validation_type=}" - ) + raise ValueError(f"{field.name} cannot be empty with {validation_type=}") elif len(v) < 2 and validation_type == ValidationType.cross: raise ValueError( @@ -197,23 +183,17 @@ def check_validators( # pylint: disable=no-self-argument validation_type = values.get("validation_type") if job_type == JobType.ExtractionJob: - raise ValueError( - f"{field.name} cannot be assigned to ExtractionJob" - ) + raise ValueError(f"{field.name} cannot be assigned to ExtractionJob") if ( validation_type in [ValidationType.hierarchical, ValidationType.validation_only] and not v ): - raise ValueError( - f"{field.name} cannot be empty with {validation_type=}" - ) + raise ValueError(f"{field.name} cannot be empty with {validation_type=}") if validation_type == ValidationType.cross and v: - raise ValueError( - f"{field.name} should be empty with {validation_type=}" - ) + raise ValueError(f"{field.name} should be empty with {validation_type=}") return v @@ -225,9 +205,7 @@ def check_import_job_attributes( # pylint: disable=no-self-argument if job_type != JobType.ImportJob and v: raise ValueError(f"{field.name} cannot be assigned to {job_type}") if job_type == JobType.ImportJob and not v: - raise ValueError( - f"{field.name} cannot be empty in {JobType.ImportJob}" - ) + raise ValueError(f"{field.name} cannot be empty in {JobType.ImportJob}") return v @validator("extensive_coverage") @@ -236,18 +214,12 @@ def check_extensive_coverage( ): validation_type = values.get("validation_type") if validation_type != ValidationType.extensive_coverage and v: - raise ValueError( - f"{field.name} cannot be assigned to {validation_type}." - ) + raise ValueError(f"{field.name} cannot be assigned to {validation_type}.") if validation_type != ValidationType.extensive_coverage and not v: - raise ValueError( - f"{field.name} cannot be empty with {validation_type=}." - ) + raise ValueError(f"{field.name} cannot be empty with {validation_type=}.") annotators = values.get("annotators") if v > len(annotators): - raise ValueError( - f"{field.name} cannot be less then number of annotators." - ) + raise ValueError(f"{field.name} cannot be less then number of annotators.") return v # ---- ExtractionJob and ExtractionWithAnnotationJob attributes ---- # @@ -256,16 +228,12 @@ def check_pipeline_name( # pylint: disable=no-self-argument cls, v: str, values: Dict[str, Any] ) -> str: if values.get("type") == JobType.AnnotationJob and v: - raise ValueError( - "pipeline_name cannot be assigned to AnnotationJob" - ) + raise ValueError("pipeline_name cannot be assigned to AnnotationJob") if ( values.get("type") == JobType.ExtractionJob or values.get("type") == JobType.ExtractionWithAnnotationJob ) and not v: - raise ValueError( - f'pipeline cannot be empty for {values.get("type")}' - ) + raise ValueError(f'pipeline cannot be empty for {values.get("type")}') return v diff --git a/jobs/jobs/utils.py b/jobs/jobs/utils.py index c92d977b9..a9e78e006 100644 --- a/jobs/jobs/utils.py +++ b/jobs/jobs/utils.py @@ -49,9 +49,7 @@ async def get_files_data_from_datasets( raise_for_status=True, ) if status == 404: - logger.error( - f"Failed request to the Dataset Manager: {response}" - ) + logger.error(f"Failed request to the Dataset Manager: {response}") continue except aiohttp.client_exceptions.ClientError as err: logger.error(f"Failed request to the Dataset Manager: {err}") @@ -87,8 +85,7 @@ async def get_files_data_from_separate_files( "filters": [{"field": "id", "operator": "in", "value": batch}], } logger.info( - "Sending request to the dataset manager " - "to get info about files" + "Sending request to the dataset manager " "to get info about files" ) _, response = await fetch( method="POST", @@ -109,9 +106,7 @@ async def get_files_data_from_separate_files( all_files_data.extend(response["data"]) - valid_separate_files_uuids = [ - file_data["id"] for file_data in all_files_data - ] + valid_separate_files_uuids = [file_data["id"] for file_data in all_files_data] return all_files_data, valid_separate_files_uuids @@ -178,9 +173,7 @@ def convert_files_data_for_inference( ) ) else: - for batch_id, pages_list_chunk in enumerate( - divided_pages_list, start=1 - ): + for batch_id, pages_list_chunk in enumerate(divided_pages_list, start=1): converted_data.append( generate_file_data( file_data, @@ -346,9 +339,7 @@ async def execute_in_annotation_microservice( return None -def delete_duplicates( - files_data: List[Dict[str, Any]] -) -> List[Dict[str, Any]]: +def delete_duplicates(files_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Delete duplicates""" used_file_ids = set() @@ -364,9 +355,7 @@ def delete_duplicates( def pick_params_for_annotation( new_job_params: JobParamsToChange, ) -> AnnotationJobUpdateParamsInAnnotation: - picked_params = AnnotationJobUpdateParamsInAnnotation.parse_obj( - new_job_params - ) + picked_params = AnnotationJobUpdateParamsInAnnotation.parse_obj(new_job_params) return picked_params @@ -393,9 +382,7 @@ async def start_job_in_annotation( raise_for_status=True, ) except aiohttp.client_exceptions.ClientError as err: - logger.error( - "Failed request to the Annotation Manager: {}".format(err) - ) + logger.error("Failed request to the Annotation Manager: {}".format(err)) raise fastapi.HTTPException( status_code=fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY, detail="Failed request to the Annotation Manager: {}".format(err), @@ -554,9 +541,7 @@ def get_categories_ids( categories: List[Union[str, CategoryLinkInput]] ) -> Tuple[List[str], List[CategoryLinkInput]]: categories_ids = [ - category_id - for category_id in categories - if isinstance(category_id, str) + category_id for category_id in categories if isinstance(category_id, str) ] categories_links = [ category_link diff --git a/jobs/tests/conftest.py b/jobs/tests/conftest.py index 667f04a4d..532d454e6 100644 --- a/jobs/tests/conftest.py +++ b/jobs/tests/conftest.py @@ -101,9 +101,7 @@ def setup_tenant(): def testing_app(testing_engine, testing_session, setup_tenant): with patch("jobs.db_service.LocalSession", testing_session): main.app.dependency_overrides[main.tenant] = lambda: setup_tenant - main.app.dependency_overrides[ - service.get_session - ] = lambda: testing_session + main.app.dependency_overrides[service.get_session] = lambda: testing_session client = TestClient(main.app) yield client @@ -223,9 +221,7 @@ def mock_data_dataset22(): def request_body_for_invalid_file(): request_body = { "pagination": {"page_num": 1, "page_size": 15}, - "filters": [ - {"field": "id", "operator": "eq", "value": "some invalid file id"} - ], + "filters": [{"field": "id", "operator": "eq", "value": "some invalid file id"}], "sorting": [{"field": "id", "direction": "asc"}], } return request_body @@ -262,8 +258,7 @@ def pipeline_info_from_pipeline_manager(): { "id": "7571f17b-d9f1-4d31-af42-7f29fbfd0fb9", "model": "ternary", - "model_url": "http://ternary.dev1/v1/models/" - "ternary:predict", + "model_url": "http://ternary.dev1/v1/models/" "ternary:predict", "categories": ["mrt"], "steps": [], } diff --git a/jobs/tests/test_API_functions/test_ExtractionWithAnnotationJob_workflows.py b/jobs/tests/test_API_functions/test_ExtractionWithAnnotationJob_workflows.py index f783fdb3b..887954ef7 100644 --- a/jobs/tests/test_API_functions/test_ExtractionWithAnnotationJob_workflows.py +++ b/jobs/tests/test_API_functions/test_ExtractionWithAnnotationJob_workflows.py @@ -41,9 +41,7 @@ def test_change_extraction_job_to_extraction_with_annotation_job_and_run_it( job_id = int(response1.json()["id"]) # --------- Changing Job Status to Finished - imitates callback from Pipeline Manager -------- # - response2 = testing_app.put( - f"/jobs/{job_id}", json={"status": "Finished"} - ) + response2 = testing_app.put(f"/jobs/{job_id}", json={"status": "Finished"}) assert response2.status_code == 200 assert response2.json()["status"] == schemas.Status.finished @@ -63,13 +61,8 @@ def test_change_extraction_job_to_extraction_with_annotation_job_and_run_it( }, ) assert response3.status_code == 200 - assert ( - response3.json()["type"] - == schemas.JobType.ExtractionWithAnnotationJob - ) - assert ( - response3.json()["status"] == schemas.Status.ready_for_annotation - ) + assert response3.json()["type"] == schemas.JobType.ExtractionWithAnnotationJob + assert response3.json()["status"] == schemas.Status.ready_for_annotation assert response3.json()["mode"] == schemas.JobMode.Manual # ---------- Running ExtractionWithAnnotationJob - only manual part ------ # @@ -130,13 +123,9 @@ def test_create_extraction_with_annotation_job_and_run_it( test_job_id = int(response.json()["id"]) # --------- Changing Job Status to Finished - imitates callback from Pipeline Manager -------- # - response2 = testing_app.put( - f"/jobs/{test_job_id}", json={"status": "Finished"} - ) + response2 = testing_app.put(f"/jobs/{test_job_id}", json={"status": "Finished"}) assert response2.status_code == 200 - assert ( - response2.json()["status"] == schemas.Status.ready_for_annotation - ) + assert response2.json()["status"] == schemas.Status.ready_for_annotation assert response2.json()["mode"] == schemas.JobMode.Manual # --------- Changing Job Status to In Progress - imitates callback from Annotation Manager -------- # @@ -150,9 +139,7 @@ def test_create_extraction_with_annotation_job_and_run_it( # Then Manual Part executes from Annotation Microservice # --------- Changing Job Status to Finished - imitates callback from Annotation Manager -------- # - response5 = testing_app.put( - f"/jobs/{test_job_id}", json={"status": "Finished"} - ) + response5 = testing_app.put(f"/jobs/{test_job_id}", json={"status": "Finished"}) assert response5.status_code == 200 assert response5.json()["status"] == schemas.Status.finished @@ -205,11 +192,7 @@ def test_create_extraction_with_annotation_job_and_autostart_false( test_job_id = int(response.json()["id"]) # --------- Changing Job Status to Finished - imitates callback from Pipeline Manager -------- # - response2 = testing_app.put( - f"/jobs/{test_job_id}", json={"status": "Finished"} - ) + response2 = testing_app.put(f"/jobs/{test_job_id}", json={"status": "Finished"}) assert response2.status_code == 200 - assert ( - response2.json()["status"] == schemas.Status.ready_for_annotation - ) + assert response2.json()["status"] == schemas.Status.ready_for_annotation assert response2.json()["mode"] == schemas.JobMode.Manual diff --git a/jobs/tests/test_API_functions/test_args_validation.py b/jobs/tests/test_API_functions/test_args_validation.py index 06af3b196..1d1ed8ea2 100644 --- a/jobs/tests/test_API_functions/test_args_validation.py +++ b/jobs/tests/test_API_functions/test_args_validation.py @@ -12,9 +12,7 @@ def test_create_annotation_job_lack_of_data(testing_app): "datasets": [1, 2], "files": [], "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), "is_draft": False, }, ) @@ -54,9 +52,7 @@ def test_create_annotation_job_excessive_data(testing_app): "validators": ["validator1", "validator2"], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), "pipeline_name": "pipeline", }, ) @@ -128,9 +124,7 @@ def test_create_extraction_with_annotation_job_lack_of_data(testing_app): "users": [1, 2], "files": [1, 2], "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), "is_draft": False, }, ) @@ -163,9 +157,7 @@ def test_create_annotation_job_cross_validation_with_validators(testing_app): "validators": ["validator1"], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), }, ) assert response.status_code == 422 @@ -197,9 +189,7 @@ def test_create_annotation_job_cross_validation_without_annotators( "validators": [], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), }, ) assert response.status_code == 422 @@ -231,9 +221,7 @@ def test_create_annotation_job_cross_validation_annotators_not_enough( "validators": [], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), }, ) assert response.status_code == 422 @@ -265,9 +253,7 @@ def test_create_annotation_job_hierarchichal_validation_without_validators( "validators": [], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), }, ) assert response.status_code == 422 @@ -324,9 +310,7 @@ def test_create_annotationjob_validation_only_validation_type_with_annotators( "validators": ["validator1"], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), }, ) assert response.status_code == 422 @@ -359,9 +343,7 @@ def test_create_annotationjob_validation_only_validation_type_without_validators "validators": [], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), }, ) assert response.status_code == 422 diff --git a/jobs/tests/test_API_functions/test_change_job-proxy_to_annotation.py b/jobs/tests/test_API_functions/test_change_job-proxy_to_annotation.py index 2283747d1..719505dc4 100644 --- a/jobs/tests/test_API_functions/test_change_job-proxy_to_annotation.py +++ b/jobs/tests/test_API_functions/test_change_job-proxy_to_annotation.py @@ -9,9 +9,7 @@ def test_change_annotation_job_with_request_to_annotation( ): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: mock.side_effect = [(200, {})] - create_mock_annotation_job_in_db( - testing_session, mock_AnnotationJobParams2 - ) + create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams2) response = testing_app.put( "/jobs/1", json={ @@ -33,9 +31,7 @@ def test_change_annotation_job_without_request_to_annotation( testing_app, testing_session, mock_AnnotationJobParams2 ): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: - create_mock_annotation_job_in_db( - testing_session, mock_AnnotationJobParams2 - ) + create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams2) response = testing_app.put("/jobs/1", json={"status": "Finished"}) assert response.status_code == 200 assert response.json()["status"] == "Finished" @@ -48,9 +44,7 @@ def test_change_annotation_job_with_partial_request_to_annotation( ): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: mock.side_effect = [(200, {})] - create_mock_annotation_job_in_db( - testing_session, mock_AnnotationJobParams2 - ) + create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams2) response = testing_app.put( "/jobs/1", json={ diff --git a/jobs/tests/test_API_functions/test_change_job.py b/jobs/tests/test_API_functions/test_change_job.py index 1a2c66aaf..2f7793d1c 100644 --- a/jobs/tests/test_API_functions/test_change_job.py +++ b/jobs/tests/test_API_functions/test_change_job.py @@ -48,9 +48,7 @@ def test_change_job_status_with_validation_incorrect_job_owner( ): create_mock_extraction_job_in_db(testing_session) - create_mock_annotation_job_in_db( - testing_session, mock_AnnotationJobParams2 - ) + create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams2) response2 = testing_app.put( "/jobs/2", json={"status": "Finished"}, @@ -61,9 +59,7 @@ def test_change_job_status_with_validation_incorrect_job_owner( } -def test_change_job_pipeline_id( - testing_app, testing_session, mock_AnnotationJobParams -): +def test_change_job_pipeline_id(testing_app, testing_session, mock_AnnotationJobParams): create_mock_extraction_job_in_db(testing_session) response = testing_app.put("/jobs/1", json={"pipeline_id": 555}) assert response.status_code == 200 @@ -76,10 +72,17 @@ def test_change_job_linked_taxonomy( create_mock_extraction_job_in_db(testing_session) with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: mock.side_effect = [(204, {}), (200, {})] - response = testing_app.put("/jobs/1", json={"categories": [{ - "category_id": "category2", - "taxonomy_id": "my_taxonomy_id", - "taxonomy_version": 1 - }]}) + response = testing_app.put( + "/jobs/1", + json={ + "categories": [ + { + "category_id": "category2", + "taxonomy_id": "my_taxonomy_id", + "taxonomy_version": 1, + } + ] + }, + ) assert response.status_code == 200 assert response.json()["categories"] == ["category2"] diff --git a/jobs/tests/test_API_functions/test_create_job.py b/jobs/tests/test_API_functions/test_create_job.py index 31a91435e..33f6f3364 100644 --- a/jobs/tests/test_API_functions/test_create_job.py +++ b/jobs/tests/test_API_functions/test_create_job.py @@ -24,9 +24,7 @@ def test_create_annotation_job_draft(testing_app, jw_token): "categories": ["category1", "category2"], "validation_type": schemas.ValidationType.hierarchical, "is_auto_distribution": False, - "deadline": str( - datetime.datetime.utcnow() + datetime.timedelta(days=1) - ), + "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), "is_draft": True, }, ) @@ -194,9 +192,7 @@ def test_schedule_manual_job_valid_datasets( assert response.json()["name"] == "MockAnnotationJob" -def test_schedule_manual_job_one_invalid_dataset( - testing_app, mock_data_dataset11 -): +def test_schedule_manual_job_one_invalid_dataset(testing_app, mock_data_dataset11): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: mock.side_effect = [ (200, mock_data_dataset11), diff --git a/jobs/tests/test_API_functions/test_other_API_functions.py b/jobs/tests/test_API_functions/test_other_API_functions.py index 6dc7c93f5..144e0adb1 100644 --- a/jobs/tests/test_API_functions/test_other_API_functions.py +++ b/jobs/tests/test_API_functions/test_other_API_functions.py @@ -7,9 +7,7 @@ ) -def test_get_all_jobs_endpoint( - testing_app, testing_session, mock_AnnotationJobParams -): +def test_get_all_jobs_endpoint(testing_app, testing_session, mock_AnnotationJobParams): create_mock_extraction_job_in_db(testing_session) create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams) @@ -20,9 +18,7 @@ def test_get_all_jobs_endpoint( assert response.json()[1]["name"] == "MockAnnotationJob" -def test_get_job_by_id_positive( - testing_app, testing_session, mock_AnnotationJobParams -): +def test_get_job_by_id_positive(testing_app, testing_session, mock_AnnotationJobParams): create_mock_extraction_job_in_db(testing_session) create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams) response = testing_app.get("/jobs/2") @@ -30,9 +26,7 @@ def test_get_job_by_id_positive( assert response.json()["name"] == "MockAnnotationJob" -def test_get_job_by_id_negative( - testing_app, testing_session, mock_AnnotationJobParams -): +def test_get_job_by_id_negative(testing_app, testing_session, mock_AnnotationJobParams): create_mock_extraction_job_in_db( testing_session, ) @@ -42,9 +36,7 @@ def test_get_job_by_id_negative( assert response.json()["detail"] == "Job with this id does not exist." -def test_delete_job_positive( - testing_app, testing_session, mock_AnnotationJobParams -): +def test_delete_job_positive(testing_app, testing_session, mock_AnnotationJobParams): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: mock.side_effect = [(200, {})] create_mock_extraction_job_in_db(testing_session) diff --git a/jobs/tests/test_API_functions/test_search_jobs.py b/jobs/tests/test_API_functions/test_search_jobs.py index 317ce99a3..1ea01cdf9 100644 --- a/jobs/tests/test_API_functions/test_search_jobs.py +++ b/jobs/tests/test_API_functions/test_search_jobs.py @@ -10,9 +10,7 @@ def test_search_job_positive(testing_app, testing_session): "/jobs/search", json={ "pagination": {"page_num": 1, "page_size": 15}, - "filters": [ - {"field": "id", "operator": "is_not_null", "value": "string"} - ], + "filters": [{"field": "id", "operator": "is_not_null", "value": "string"}], "sorting": [{"field": "id", "direction": "asc"}], }, ) @@ -37,7 +35,7 @@ def test_search_job_invalid_field(testing_app, testing_session): }, ) assert response.status_code == 422 - response_message = response.json()['detail'][0]['msg'] + response_message = response.json()["detail"][0]["msg"] assert response_message.startswith("value is not a valid enumeration member") @@ -73,17 +71,13 @@ def test_search_job_has_pagination( ): for _ in range(25): create_mock_extraction_job_in_db(testing_session) - create_mock_annotation_job_in_db( - testing_session, mock_AnnotationJobParams - ) + create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams) response1 = testing_app.post( "/jobs/search", json={ "pagination": {"page_num": 1, "page_size": 15}, - "filters": [ - {"field": "id", "operator": "is_not_null", "value": "string"} - ], + "filters": [{"field": "id", "operator": "is_not_null", "value": "string"}], "sorting": [{"field": "id", "direction": "asc"}], }, ) @@ -100,9 +94,7 @@ def test_search_job_has_pagination( "/jobs/search", json={ "pagination": {"page_num": 2, "page_size": 15}, - "filters": [ - {"field": "id", "operator": "is_not_null", "value": "string"} - ], + "filters": [{"field": "id", "operator": "is_not_null", "value": "string"}], "sorting": [{"field": "id", "direction": "asc"}], }, ) diff --git a/jobs/tests/test_db.py b/jobs/tests/test_db.py index 63b580536..95e149580 100644 --- a/jobs/tests/test_db.py +++ b/jobs/tests/test_db.py @@ -92,9 +92,7 @@ def test_create_extraction_job_in_db(testing_session): assert second_quantity_of_jobs - first_quantity_of_jobs == 1 -def test_create_annotation_job_in_db( - testing_session, mock_AnnotationJobParams -): +def test_create_annotation_job_in_db(testing_session, mock_AnnotationJobParams): first_quantity_of_jobs = len(db_service.get_all_jobs(testing_session)) assert db_service.create_annotation_job( db=testing_session, @@ -133,9 +131,7 @@ def create_mock_extraction_job_in_db_draft(testing_session): return result -def create_mock_annotation_job_in_db( - testing_session, mock_AnnotationJobParams -): +def create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams): result = db_service.create_annotation_job( db=testing_session, annotation_job_input=mock_AnnotationJobParams, @@ -192,9 +188,7 @@ def test_create_ImportJob(testing_session): import_bucket="jpg", ) - new_import_job = db_service.create_import_job( - testing_session, mockImportJobParams - ) + new_import_job = db_service.create_import_job(testing_session, mockImportJobParams) assert new_import_job assert new_import_job.name == "MockImportJob" assert new_import_job.type == schemas.JobType.ImportJob diff --git a/jobs/tests/test_utils.py b/jobs/tests/test_utils.py index ec68c45a2..a15ad3449 100644 --- a/jobs/tests/test_utils.py +++ b/jobs/tests/test_utils.py @@ -75,9 +75,7 @@ async def test_positive_get_files_data_from_datasets( ) assert ( - await utils.get_files_data_from_datasets( - [1, 2], "test_tenant", jw_token - ) + await utils.get_files_data_from_datasets([1, 2], "test_tenant", jw_token) == expected_result ) @@ -125,9 +123,7 @@ async def test_get_files_data_from_datasets_with_one_invalid_tag( [1], ) assert ( - await utils.get_files_data_from_datasets( - [1, 444], "test_tenant", jw_token - ) + await utils.get_files_data_from_datasets([1, 444], "test_tenant", jw_token) == expected_result ) @@ -145,13 +141,9 @@ async def test_get_files_data_from_datasets_with_all_invalid_tags(jw_token): @pytest.mark.asyncio async def test_get_files_data_from_datasets_501_error(jw_token): - with patch( - "jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError() - ): + with patch("jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError()): with pytest.raises(HTTPException) as e_info: - await utils.get_files_data_from_datasets( - [121], "test_tenant", jw_token - ) + await utils.get_files_data_from_datasets([121], "test_tenant", jw_token) assert e_info.value.status_code == 422 @@ -267,9 +259,7 @@ async def test_get_files_data_from_separate_files_100_elements(jw_token): ], } - with patch( - "jobs.utils.fetch", return_value=(200, large_mock_files_data) - ) as mock: + with patch("jobs.utils.fetch", return_value=(200, large_mock_files_data)) as mock: assert await utils.get_files_data_from_separate_files( list(range(1, 101)), "test_tenant", jw_token ) == ( @@ -331,9 +321,7 @@ async def test_get_files_data_from_separate_files_101_elements(jw_token): for i in range(101, 102) ], } - with patch( - "jobs.utils.fetch", side_effect=[(200, json_1), (200, json_2)] - ) as mock: + with patch("jobs.utils.fetch", side_effect=[(200, json_1), (200, json_2)]) as mock: expected_files_data = [ { "id": i, @@ -427,9 +415,7 @@ async def test_get_files_data_from_separate_files_111_elements(jw_token): for i in range(101, 111) ], } - with patch( - "jobs.utils.fetch", side_effect=[(200, json_1), (200, json_2)] - ) as mock: + with patch("jobs.utils.fetch", side_effect=[(200, json_1), (200, json_2)]) as mock: assert await utils.get_files_data_from_separate_files( list(range(1, 111)), "test_tenant", jw_token ) == ( @@ -443,9 +429,7 @@ async def test_get_files_data_from_separate_files_111_elements(jw_token): async def test_get_files_data_from_separate_files_501_code( request_body_for_invalid_file, jw_token ): - with patch( - "jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError() - ): + with patch("jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError()): with pytest.raises(HTTPException) as e_info: await utils.get_files_data_from_separate_files( [1234], "test_tenant", jw_token @@ -508,9 +492,7 @@ async def test_get_pipeline_id_by_its_name_positive(jw_token): @pytest.mark.asyncio async def test_get_pipeline_id_by_its_name_negative(jw_token): - with patch( - "jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError() - ): + with patch("jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError()): with pytest.raises(HTTPException) as e_info: await utils.get_pipeline_instance_by_its_name( "invalid_pipeline_name", "test_tenant", jw_token @@ -523,9 +505,7 @@ async def test_get_pipeline_id_by_its_name_negative(jw_token): @pytest.mark.asyncio async def test_execute_pipeline_negative(jw_token): - with patch( - "jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError() - ): + with patch("jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError()): with pytest.raises(HTTPException) as e_info: await utils.execute_pipeline( pipeline_id=2, diff --git a/lib/filter_lib/src/dict_parser.py b/lib/filter_lib/src/dict_parser.py index 112b3277e..2c4418df9 100644 --- a/lib/filter_lib/src/dict_parser.py +++ b/lib/filter_lib/src/dict_parser.py @@ -1,9 +1,7 @@ from typing import Any, Dict -def map_request_to_filter( - fields: Dict[str, Any], model: str -) -> Dict[str, Any]: +def map_request_to_filter(fields: Dict[str, Any], model: str) -> Dict[str, Any]: result: Dict[str, Any] = { "pagination": {}, "filters": [], diff --git a/lib/filter_lib/src/enum_generator.py b/lib/filter_lib/src/enum_generator.py index 4f78341c3..4a2517b05 100644 --- a/lib/filter_lib/src/enum_generator.py +++ b/lib/filter_lib/src/enum_generator.py @@ -15,9 +15,7 @@ class TempEnum(str, enum.Enum): def _get_model_fields(model: Type[DeclarativeMeta]) -> List[str]: mapper: Mapper = inspect(model) relations = [ - attr - for attr in inspect(model).attrs - if isinstance(attr, RelationshipProperty) + attr for attr in inspect(model).attrs if isinstance(attr, RelationshipProperty) ] relation_fields = [ rel.key + "." + col.key diff --git a/lib/filter_lib/src/query_modificator.py b/lib/filter_lib/src/query_modificator.py index b04621879..488506add 100644 --- a/lib/filter_lib/src/query_modificator.py +++ b/lib/filter_lib/src/query_modificator.py @@ -38,9 +38,7 @@ def get_distinct_columns( return result -def form_query( - args: Dict[str, Any], query: Query -) -> Tuple[Query, PaginationParams]: +def form_query(args: Dict[str, Any], query: Query) -> Tuple[Query, PaginationParams]: filters = args.get("filters") sorting = args.get("sorting") pagination = args.get("pagination") @@ -50,14 +48,10 @@ def form_query( filters with 'distinct' operator and others. It's being done because DISTINCT statements should only be applied to query all at once, rather than one by one""" - distinct_filters, non_distinct_filters = splint_to_distinct_and_not( - filters - ) + distinct_filters, non_distinct_filters = splint_to_distinct_and_not(filters) if distinct_filters: distinct_columns = get_distinct_columns(query, distinct_filters) - query = query.with_entities(*distinct_columns).distinct( - *distinct_columns - ) + query = query.with_entities(*distinct_columns).distinct(*distinct_columns) for fil in non_distinct_filters: query = _create_filter(query, fil) @@ -129,9 +123,7 @@ def validate_filter_args( f"Field value should not be null for operator {operator.value}." ) if _has_relation(model, field) and _op_is_match(fil): - raise BadFilterFormat( - "Operator 'match' shouldn't be used with relations" - ) + raise BadFilterFormat("Operator 'match' shouldn't be used with relations") def _create_filter(query: Query, fil: Dict[str, Any]) -> Query: @@ -225,17 +217,13 @@ def _make_ltree_query( :param value: Id of record :return: Query instance """ - subquery = ( - query.with_entities(model.tree).filter(model.id == value).subquery() - ) + subquery = query.with_entities(model.tree).filter(model.id == value).subquery() if op == "parent": return ( query.filter( ( - func.subpath( - model.tree, 0, func.nlevel(subquery.c.tree) - 1 - ) + func.subpath(model.tree, 0, func.nlevel(subquery.c.tree) - 1) == model.tree ), func.index(subquery.c.tree, model.tree) != -1, @@ -262,9 +250,7 @@ def _make_ltree_query( return query -def _create_or_condition( - fil: Dict[str, str] -) -> Dict[str, List[Dict[str, str]]]: +def _create_or_condition(fil: Dict[str, str]) -> Dict[str, List[Dict[str, str]]]: fil_include_null = fil.copy() fil_include_null["op"] = "is_null" filter_args = {"or": [{**fil}, {**fil_include_null}]} diff --git a/lib/filter_lib/src/schema_generator.py b/lib/filter_lib/src/schema_generator.py index f6e0d5813..b494b8bb0 100644 --- a/lib/filter_lib/src/schema_generator.py +++ b/lib/filter_lib/src/schema_generator.py @@ -68,9 +68,7 @@ class BaseSearch(BaseModel): pagination: Optional[Pagination] @root_validator - def root_validate( # pylint: disable=no-self-argument - cls, values: Any - ) -> Any: + def root_validate(cls, values: Any) -> Any: # pylint: disable=no-self-argument if not values.get("pagination"): values["pagination"] = Pagination(page_num=1, page_size=15) return values @@ -92,9 +90,7 @@ class Page(GenericModel, Generic[TypeC], BaseModel): data: Sequence[TypeC] @validator("data") - def custom_validator( # pylint: disable=no-self-argument - cls, v: Any - ) -> Any: + def custom_validator(cls, v: Any) -> Any: # pylint: disable=no-self-argument """Custom validator applied to data in case of using 'distinct' statement and getting result as 'sqlalchemy.util._collections.result' but not as model class object diff --git a/lib/filter_lib/tests/test_dict_parser.py b/lib/filter_lib/tests/test_dict_parser.py index 5228d14b8..a2130ef8a 100644 --- a/lib/filter_lib/tests/test_dict_parser.py +++ b/lib/filter_lib/tests/test_dict_parser.py @@ -3,9 +3,7 @@ example_1 = { "pagination": {"page_num": 1, "page_size": 50}, - "filters": [ - {"field": "ts_vector", "operator": "match", "value": "kubeflow"} - ], + "filters": [{"field": "ts_vector", "operator": "match", "value": "kubeflow"}], "sorting": [{"field": "id", "direction": "desc"}], } @@ -48,9 +46,7 @@ def test_positive_standard_structure(): "value": "kubeflow", } ], - "sorting": [ - {"model": "test_model", "field": "id", "direction": "desc"} - ], + "sorting": [{"model": "test_model", "field": "id", "direction": "desc"}], } @@ -77,9 +73,7 @@ def test_positive_many_nested_structures(): "value": 100, }, ], - "sorting": [ - {"model": "test_model", "field": "created", "direction": "desc"} - ], + "sorting": [{"model": "test_model", "field": "created", "direction": "desc"}], } diff --git a/lib/filter_lib/tests/test_enum_generator.py b/lib/filter_lib/tests/test_enum_generator.py index 3624d69fc..b821ddc25 100644 --- a/lib/filter_lib/tests/test_enum_generator.py +++ b/lib/filter_lib/tests/test_enum_generator.py @@ -32,9 +32,11 @@ def test_exclude_fields(): assert _exclude_fields( user_fields, ["id", "addresses.id", "addresses.location"] ) == ["name", "email", "addresses.owner"] - assert _exclude_fields( - address_fields, ["id", "user.name", "user.email"] - ) == ["location", "owner", "user.id"] + assert _exclude_fields(address_fields, ["id", "user.name", "user.email"]) == [ + "location", + "owner", + "user.id", + ] def test_get_table_name(): @@ -54,7 +56,5 @@ def test_create_enum_model(): assert user_enum.EMAIL.value == "email" address_enum = _create_enum_model(address_table_name, address_fields) - assert address_enum.ID.value == "id", ( - address_enum.LOCATION.value == "location" - ) + assert address_enum.ID.value == "id", address_enum.LOCATION.value == "location" assert address_enum.OWNER.value == "owner" diff --git a/lib/filter_lib/tests/test_pagination.py b/lib/filter_lib/tests/test_pagination.py index 638faf66b..327778c74 100644 --- a/lib/filter_lib/tests/test_pagination.py +++ b/lib/filter_lib/tests/test_pagination.py @@ -18,9 +18,7 @@ ], ) def test_pag_params(page_num, page_size, min_pages_left, total, has_more): - res = PaginationParams( - page_num, page_size, min_pages_left, total, has_more - ) + res = PaginationParams(page_num, page_size, min_pages_left, total, has_more) assert ( res.page_num, res.page_size, diff --git a/lib/filter_lib/tests/test_query_modifier.py b/lib/filter_lib/tests/test_query_modifier.py index 2a6f36b75..2198210f7 100644 --- a/lib/filter_lib/tests/test_query_modifier.py +++ b/lib/filter_lib/tests/test_query_modifier.py @@ -184,9 +184,7 @@ def test_create_filter_ltree_not_supported_operation(get_session): # Act query = _create_filter(query, spec) - expected_sql_str = ( - "SELECT categories.id, categories.tree \nFROM categories" - ) + expected_sql_str = "SELECT categories.id, categories.tree \nFROM categories" compiled_statement = query.statement.compile() @@ -344,9 +342,7 @@ def test_form_query_with_distincts_and_filters_and_sorting(get_session): "value": "%or%", }, ], - "sorting": [ - {"model": "User", "field": user_enum.NAME, "direction": "desc"} - ], + "sorting": [{"model": "User", "field": user_enum.NAME, "direction": "desc"}], } query, pag = form_query(specs, query) assert query.all() == [("Grigoriy",), ("Fedor",)] diff --git a/lib/filter_lib/tests/test_schema_generator.py b/lib/filter_lib/tests/test_schema_generator.py index 571582c4c..0b5caa6c9 100644 --- a/lib/filter_lib/tests/test_schema_generator.py +++ b/lib/filter_lib/tests/test_schema_generator.py @@ -18,9 +18,13 @@ def test_search_class_creating(): ] AddressFilter = create_filter_model(Address, exclude=["location"]) - assert AddressFilter.schema()["definitions"]["addresses_Address"][ - "enum" - ] == ["id", "owner", "user.id", "user.name", "user.email"] + assert AddressFilter.schema()["definitions"]["addresses_Address"]["enum"] == [ + "id", + "owner", + "user.id", + "user.name", + "user.email", + ] def test_page_schema(): diff --git a/lib/filter_lib/usage_example/app.py b/lib/filter_lib/usage_example/app.py index f8de34a8b..d2e1f7210 100644 --- a/lib/filter_lib/usage_example/app.py +++ b/lib/filter_lib/usage_example/app.py @@ -2,9 +2,6 @@ from db_example import Address, User, get_db from fastapi import Depends, FastAPI -from pydantic import BaseModel -from sqlalchemy.orm import Session - from filter_lib import ( # type: ignore Page, create_filter_model, @@ -12,6 +9,8 @@ map_request_to_filter, paginate, ) +from pydantic import BaseModel +from sqlalchemy.orm import Session app = FastAPI() @@ -71,17 +70,13 @@ def search_users( def create_new_address( request: AddressCreate, session: Session = Depends(get_db) ) -> Set[str]: - new_address = Address( - email_address=request.email_address, user_id=request.user_id - ) + new_address = Address(email_address=request.email_address, user_id=request.user_id) session.add(new_address) session.commit() return {"New address created"} -@app.post( - "/addresses/search", tags=["addresses"], response_model=Page[AddressOut] -) +@app.post("/addresses/search", tags=["addresses"], response_model=Page[AddressOut]) def search_address( request: AddressFilterModel, session: Session = Depends(get_db) # type: ignore # noqa ) -> Page[UserOut]: diff --git a/lib/tenants/src/dependency.py b/lib/tenants/src/dependency.py index d12a98a46..bf09e684a 100644 --- a/lib/tenants/src/dependency.py +++ b/lib/tenants/src/dependency.py @@ -27,9 +27,7 @@ def __init__( """ self.key = key self.algorithm = self._check_algorithm(algorithm) - self.jwk_client: jwt.PyJWKClient = jwt.PyJWKClient( - self._create_url(url) - ) + self.jwk_client: jwt.PyJWKClient = jwt.PyJWKClient(self._create_url(url)) async def __call__(self, request: Request) -> TenantData: authorization: str = request.headers.get("Authorization") @@ -59,9 +57,7 @@ async def __call__(self, request: Request) -> TenantData: tenants = decoded.get("tenants") if decoded.get("clientId") == "pipelines": - return TenantData( - token=token, user_id=sub, roles=roles, tenants=tenants - ) + return TenantData(token=token, user_id=sub, roles=roles, tenants=tenants) if not (sub and roles and tenants): raise HTTPException( @@ -75,9 +71,7 @@ async def __call__(self, request: Request) -> TenantData: detail="X-Current-Tenant not in jwt tenants!", ) - return TenantData( - token=token, user_id=sub, roles=roles, tenants=tenants - ) + return TenantData(token=token, user_id=sub, roles=roles, tenants=tenants) def decode_hs256(self, token: str) -> Dict[str, Any]: try: @@ -97,9 +91,7 @@ def decode_hs256(self, token: str) -> Dict[str, Any]: def decode_rs256(self, token: str) -> Dict[str, Any]: try: signing_key = self.jwk_client.get_signing_key_from_jwt(token) - decoded = jwt.decode( - token, signing_key.key, algorithms=[self.algorithm] - ) + decoded = jwt.decode(token, signing_key.key, algorithms=[self.algorithm]) except jwt.ExpiredSignatureError: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, @@ -115,9 +107,7 @@ def decode_rs256(self, token: str) -> Dict[str, Any]: @staticmethod def _check_algorithm(alg: str) -> str: if alg not in SupportedAlgorithms.members(): - raise ValueError( - f"Available algorithms {SupportedAlgorithms.members()}" - ) + raise ValueError(f"Available algorithms {SupportedAlgorithms.members()}") return alg @staticmethod @@ -167,7 +157,5 @@ def get_tenant_info( debug: If True button 'Authorize' will be rendered on Swagger. """ if debug: - return TenantDependencyDocs( - key, algorithm, url, scheme_name, description - ) + return TenantDependencyDocs(key, algorithm, url, scheme_name, description) return TenantDependencyBase(key, algorithm, url) diff --git a/lib/tenants/tests/conftest.py b/lib/tenants/tests/conftest.py index 4132b7027..877b9dbf1 100644 --- a/lib/tenants/tests/conftest.py +++ b/lib/tenants/tests/conftest.py @@ -24,9 +24,7 @@ def get_key(filename: str) -> str: @pytest.fixture def mock_jwk_client(): - with patch( - "src.dependency.jwt.PyJWKClient.__init__", return_value=None - ) as mock: + with patch("src.dependency.jwt.PyJWKClient.__init__", return_value=None) as mock: yield mock @@ -141,9 +139,7 @@ def token_mock_hs256(): "realm_access": {"roles": ["role-annotator"]}, "tenants": ["tenant1", "epam"], } - token = create_access_token( - data=payload, secret=SECRET_KEY, expires_delta=15 - ) + token = create_access_token(data=payload, secret=SECRET_KEY, expires_delta=15) yield token @@ -154,9 +150,7 @@ def expired_token_mock_hs256(): "realm_access": {"roles": ["role-annotator"]}, "tenants": ["tenant1", "epam"], } - token = create_access_token( - data=payload, secret=SECRET_KEY, expires_delta=-15 - ) + token = create_access_token(data=payload, secret=SECRET_KEY, expires_delta=-15) yield token @@ -167,9 +161,7 @@ def wrong_data_token_mock_hs256(): "realm_access": {"roles": ["role-annotator"]}, "qtenants": ["tenant1"], } - token = create_access_token( - data=payload, secret=SECRET_KEY, expires_delta=15 - ) + token = create_access_token(data=payload, secret=SECRET_KEY, expires_delta=15) yield token diff --git a/lib/tenants/tests/test_dependency_rs256.py b/lib/tenants/tests/test_dependency_rs256.py index b7f8280dc..259c0383e 100644 --- a/lib/tenants/tests/test_dependency_rs256.py +++ b/lib/tenants/tests/test_dependency_rs256.py @@ -97,9 +97,7 @@ def test_client_token_positive(client_token_mock_rs256, test_app_rs256): assert res.json() == response_body -def test_wrong_client_token_data( - wrong_client_token_mock_rs256, test_app_rs256 -): +def test_wrong_client_token_data(wrong_client_token_mock_rs256, test_app_rs256): headers = { "Authorization": f"Bearer {wrong_client_token_mock_rs256}", "X-Current-Tenant": CURRENT_TENANT, diff --git a/lib/tenants/tests/test_schema.py b/lib/tenants/tests/test_schema.py index c79257cec..7a77912cc 100644 --- a/lib/tenants/tests/test_schema.py +++ b/lib/tenants/tests/test_schema.py @@ -35,9 +35,7 @@ ) def test_tenant_data_positive(token, user_id, roles, tenants, expected_result): assert ( - TenantData( - token=token, user_id=user_id, roles=roles, tenants=tenants - ).dict() + TenantData(token=token, user_id=user_id, roles=roles, tenants=tenants).dict() == expected_result ) diff --git a/models/alembic/env.py b/models/alembic/env.py index 1889ab486..ac145e51f 100644 --- a/models/alembic/env.py +++ b/models/alembic/env.py @@ -1,12 +1,12 @@ import os from logging.config import fileConfig -from sqlalchemy import engine_from_config, pool - -from alembic import context from models.constants import DATABASE_URL from models.db import Base from models.utils import get_test_db_url +from sqlalchemy import engine_from_config, pool + +from alembic import context # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -71,9 +71,7 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure( - connection=connection, target_metadata=target_metadata - ) + context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() diff --git a/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py b/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py index a3e04f982..7a87b4f50 100644 --- a/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py +++ b/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py @@ -19,9 +19,7 @@ def upgrade(): op.add_column( "model", - sa.Column( - "description", sa.VARCHAR(), server_default="", nullable=False - ), + sa.Column("description", sa.VARCHAR(), server_default="", nullable=False), ) diff --git a/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py b/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py index 79a891592..ca4bb3875 100644 --- a/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py +++ b/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py @@ -18,12 +18,8 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "basement", sa.Column("key_script", sa.String(), nullable=True) - ) - op.add_column( - "basement", sa.Column("key_archive", sa.String(), nullable=True) - ) + op.add_column("basement", sa.Column("key_script", sa.String(), nullable=True)) + op.add_column("basement", sa.Column("key_archive", sa.String(), nullable=True)) # ### end Alembic commands ### diff --git a/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py b/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py index f4f40b469..fa36dda40 100644 --- a/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py +++ b/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py @@ -18,9 +18,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "training", sa.Column("key_archive", sa.String(), nullable=True) - ) + op.add_column("training", sa.Column("key_archive", sa.String(), nullable=True)) # ### end Alembic commands ### diff --git a/models/alembic/versions/683f401ed33e_create_tables.py b/models/alembic/versions/683f401ed33e_create_tables.py index a1829eb95..93c90142c 100644 --- a/models/alembic/versions/683f401ed33e_create_tables.py +++ b/models/alembic/versions/683f401ed33e_create_tables.py @@ -46,9 +46,7 @@ def upgrade() -> None: sa.Column("created_by", sa.String()), sa.Column("created_at", sa.DateTime()), sa.Column("tenant", sa.String(length=100)), - sa.ForeignKeyConstraint( - ["basement"], ["basement.id"], ondelete="CASCADE" - ), + sa.ForeignKeyConstraint(["basement"], ["basement.id"], ondelete="CASCADE"), sa.PrimaryKeyConstraint("id"), ) op.create_table( @@ -77,9 +75,7 @@ def upgrade() -> None: sa.Column("created_by", sa.String()), sa.Column("created_at", sa.DateTime()), sa.Column("tenant", sa.String(length=100)), - sa.ForeignKeyConstraint( - ["basement"], ["basement.id"], ondelete="CASCADE" - ), + sa.ForeignKeyConstraint(["basement"], ["basement.id"], ondelete="CASCADE"), sa.ForeignKeyConstraint( ["training_id"], ["training.id"], diff --git a/models/alembic/versions/826680104247_pod_limits_column.py b/models/alembic/versions/826680104247_pod_limits_column.py index 770f778d9..e1ef44ea0 100644 --- a/models/alembic/versions/826680104247_pod_limits_column.py +++ b/models/alembic/versions/826680104247_pod_limits_column.py @@ -23,9 +23,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column( "basement", - sa.Column( - "limits", postgresql.JSON(astext_type=sa.Text()), nullable=True - ), + sa.Column("limits", postgresql.JSON(astext_type=sa.Text()), nullable=True), ) default_limits = { @@ -48,8 +46,7 @@ def upgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits = '{dumps(dod_limits)}' " - f"WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits = '{dumps(dod_limits)}' " f"WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits = '{dumps(table_extractor_limits)}' " diff --git a/models/alembic/versions/8fd15e9edd28_pod_cpu_limits_change.py b/models/alembic/versions/8fd15e9edd28_pod_cpu_limits_change.py index 5314a583c..eece05c2f 100644 --- a/models/alembic/versions/8fd15e9edd28_pod_cpu_limits_change.py +++ b/models/alembic/versions/8fd15e9edd28_pod_cpu_limits_change.py @@ -38,8 +38,7 @@ def upgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits = " - f"'{dumps(dod_limits)}' WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits = " f"'{dumps(dod_limits)}' WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits = " @@ -76,8 +75,7 @@ def downgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits =" - f" '{dumps(dod_limits)}' WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits =" f" '{dumps(dod_limits)}' WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits =" diff --git a/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py b/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py index 3f83181a2..4aa952e83 100644 --- a/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py +++ b/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py @@ -19,9 +19,7 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "training", sa.Column("jobs", sa.ARRAY(sa.Integer()), nullable=True) - ) + op.add_column("training", sa.Column("jobs", sa.ARRAY(sa.Integer()), nullable=True)) op.drop_column("training", "datasets_ids") op.drop_column("training", "files_ids") op.drop_column("basement", "supported_args") diff --git a/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py b/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py index f6d970c46..585c6ab66 100644 --- a/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py +++ b/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py @@ -19,9 +19,7 @@ def upgrade(): op.add_column( "model", - sa.Column( - "latest", sa.Boolean(), nullable=False, server_default="True" - ), + sa.Column("latest", sa.Boolean(), nullable=False, server_default="True"), ) op.alter_column("model", "latest", server_default=None) diff --git a/models/alembic/versions/b5d7e85a73c2_set_basement_concurrency_limits.py b/models/alembic/versions/b5d7e85a73c2_set_basement_concurrency_limits.py index 844bef4f4..6ed22f8da 100644 --- a/models/alembic/versions/b5d7e85a73c2_set_basement_concurrency_limits.py +++ b/models/alembic/versions/b5d7e85a73c2_set_basement_concurrency_limits.py @@ -41,8 +41,7 @@ def upgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits = '{dumps(dod_limits)}' " - "WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits = '{dumps(dod_limits)}' " "WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits = '{dumps(table_extractor_limits)}' " @@ -75,8 +74,7 @@ def downgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits = '{dumps(dod_limits)}' " - "WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits = '{dumps(dod_limits)}' " "WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits = '{dumps(table_extractor_limits)}' " diff --git a/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py b/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py index a826bbd03..98bfc632c 100644 --- a/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py +++ b/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py @@ -18,9 +18,7 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "model", sa.Column("type", sa.String(length=100), nullable=True) - ) + op.add_column("model", sa.Column("type", sa.String(length=100), nullable=True)) # ### end Alembic commands ### diff --git a/models/models/colab_ssh_utils.py b/models/models/colab_ssh_utils.py index 55c40d843..e5bc3166d 100644 --- a/models/models/colab_ssh_utils.py +++ b/models/models/colab_ssh_utils.py @@ -5,12 +5,11 @@ from typing import BinaryIO, Iterator, Union from botocore.response import StreamingBody -from paramiko import AutoAddPolicy, SSHClient -from paramiko.ssh_exception import SSHException - from models.constants import MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY from models.errors import ColabFileUploadError from models.schemas import TrainingCredentials +from paramiko import AutoAddPolicy, SSHClient +from paramiko.ssh_exception import SSHException LOGGER = logging.getLogger(name="models") COLAB_TRAINING_DIRECTORY = "/content/training/" @@ -98,9 +97,7 @@ def local_mount_colab_drive( ) -def sync_colab_with_minio( - temp_directory: str, tenant: str, training_id: int -) -> None: +def sync_colab_with_minio(temp_directory: str, tenant: str, training_id: int) -> None: syn_command = ( f"aws --endpoint-url http://{MINIO_HOST} s3 sync {temp_directory} " f"s3://{tenant}/trainings/{training_id}/results/ --delete" diff --git a/models/models/crud.py b/models/models/crud.py index 83f8e75e8..0c349f3ee 100644 --- a/models/models/crud.py +++ b/models/models/crud.py @@ -1,11 +1,15 @@ from typing import Dict, Optional, Tuple, Union +from models.db import Basement, Model, Training +from models.schemas import ( + BasementBase, + ModelBase, + TrainingBase, + TrainingUpdate, +) from sqlalchemy import desc from sqlalchemy.orm import Session -from models.db import Basement, Model, Training -from models.schemas import BasementBase, ModelBase, TrainingBase, TrainingUpdate - def is_id_existing( session: Session, @@ -95,9 +99,7 @@ def get_latest_model(session: Session, model_id: str) -> Optional[Model]: ) -def get_second_latest_model( - session: Session, model_id: str -) -> Optional[Model]: +def get_second_latest_model(session: Session, model_id: str) -> Optional[Model]: """ Find second model by desc version """ diff --git a/models/models/errors.py b/models/models/errors.py index 11d7bde59..4a41d2a41 100644 --- a/models/models/errors.py +++ b/models/models/errors.py @@ -17,18 +17,14 @@ def __init__(self, message: str): self.message = message -def botocore_error_handler( - request: Request, exc: BotoCoreError -) -> JSONResponse: +def botocore_error_handler(request: Request, exc: BotoCoreError) -> JSONResponse: return JSONResponse( status_code=500, content={"detail": f"Error: connection error ({exc})"}, ) -def minio_client_error_handler( - request: Request, exc: ClientError -) -> JSONResponse: +def minio_client_error_handler(request: Request, exc: ClientError) -> JSONResponse: return JSONResponse( status_code=500, content={"detail": f"Error: client error ({exc})"}, @@ -44,9 +40,7 @@ def minio_no_such_bucket_error_handler( ) -def ssh_connection_error_handler( - request: Request, exc: SSHException -) -> JSONResponse: +def ssh_connection_error_handler(request: Request, exc: SSHException) -> JSONResponse: return JSONResponse( status_code=500, content={"detail": f"Error: ssh connection error ({exc})"}, @@ -62,9 +56,7 @@ def colab_execution_error_handler( ) -def sqlalchemy_db_error_handler( - request: Request, exc: SQLAlchemyError -) -> JSONResponse: +def sqlalchemy_db_error_handler(request: Request, exc: SQLAlchemyError) -> JSONResponse: return JSONResponse( status_code=500, content={"detail": f"Error: connection error ({exc})"}, diff --git a/models/models/routers/basements_routers.py b/models/models/routers/basements_routers.py index 5a166793f..1ba636746 100644 --- a/models/models/routers/basements_routers.py +++ b/models/models/routers/basements_routers.py @@ -9,9 +9,6 @@ map_request_to_filter, paginate, ) -from sqlalchemy.orm import Session -from tenant_dependency import TenantData - from models import crud, schemas from models.db import Basement, get_db from models.routers import tenant @@ -21,6 +18,8 @@ get_minio_resource, upload_to_object_storage, ) +from sqlalchemy.orm import Session +from tenant_dependency import TenantData LOGGER = logging.getLogger(name="models") @@ -199,9 +198,7 @@ def upload_files_to_object_storage( bucket_name = convert_bucket_name_if_s3prefix(x_current_tenant) basement = crud.get_instance(session, Basement, basement_id) if not basement: - LOGGER.info( - "upload_script_to_minio got not existing id %s", basement_id - ) + LOGGER.info("upload_script_to_minio got not existing id %s", basement_id) raise HTTPException(status_code=404, detail="Not existing basement") try: s3_resource = get_minio_resource(tenant=bucket_name) diff --git a/models/models/routers/deployed_models_routers.py b/models/models/routers/deployed_models_routers.py index 8ca563f03..8a7f41267 100644 --- a/models/models/routers/deployed_models_routers.py +++ b/models/models/routers/deployed_models_routers.py @@ -5,7 +5,6 @@ from fastapi import APIRouter, HTTPException from kubernetes import client, config from kubernetes.client.exceptions import ApiException - from models import schemas, utils from models.constants import MODELS_NAMESPACE @@ -93,9 +92,7 @@ def get_deployed_model_by_name( return schemas.DeployedModelDetails( apiVersion=model["apiVersion"], datetime_creation=str( - datetime.strptime( - metadata["creationTimestamp"], "%Y-%m-%dT%H:%M:%SZ" - ) + datetime.strptime(metadata["creationTimestamp"], "%Y-%m-%dT%H:%M:%SZ") ), model_id=metadata["generation"], model_name=metadata["name"], diff --git a/models/models/routers/models_routers.py b/models/models/routers/models_routers.py index 8bf314bb8..b931bcc69 100644 --- a/models/models/routers/models_routers.py +++ b/models/models/routers/models_routers.py @@ -9,13 +9,12 @@ map_request_to_filter, paginate, ) -from sqlalchemy.orm import Session -from tenant_dependency import TenantData - from models import crud, schemas, utils from models.crud import get_latest_model, get_second_latest_model from models.db import Basement, Model, Training, get_db from models.routers import tenant +from sqlalchemy.orm import Session +from tenant_dependency import TenantData LOGGER = logging.getLogger(name="models") @@ -108,9 +107,7 @@ def search_models( session: Session = Depends(get_db), ) -> Union[Page[schemas.Model], Page[Any]]: query = session.query(Model) - filter_args = map_request_to_filter( - request.dict(), "Model" # type: ignore - ) + filter_args = map_request_to_filter(request.dict(), "Model") # type: ignore query, pagination = form_query(filter_args, query) return paginate([x for x in query], pagination) @@ -129,9 +126,7 @@ def search_models( }, }, ) -def get_model_by_id( - model_id: str, session: Session = Depends(get_db) -) -> Model: +def get_model_by_id(model_id: str, session: Session = Depends(get_db)) -> Model: query = crud.get_latest_model(session, model_id) if not query: LOGGER.error("Get_model_by_id get not existing id %s", model_id) @@ -159,8 +154,7 @@ def get_model_by_id_and_version( model = crud.get_instance(session, Model, (model_id, version)) if not model: LOGGER.error( - "Get_model_by_id get not existing model with " - "id: %s, version: %d", + "Get_model_by_id get not existing model with " "id: %s, version: %d", model_id, version, ) @@ -204,9 +198,7 @@ def update_model( if request.training_id and not crud.is_id_existing( session, Training, request.training_id ): - LOGGER.info( - "Update_model get not existing training id %s", request.training_id - ) + LOGGER.info("Update_model get not existing training id %s", request.training_id) raise HTTPException(status_code=404, detail="Not existing training") modified_model = crud.modify_instance(session, model, request) @@ -254,9 +246,7 @@ def update_model_by_id_and_version( if request.training_id and not crud.is_id_existing( session, Training, request.training_id ): - LOGGER.info( - "Update_model get not existing training id %s", request.training_id - ) + LOGGER.info("Update_model get not existing training id %s", request.training_id) raise HTTPException(status_code=404, detail="Not existing training") modified_model = crud.modify_instance(session, model, request) @@ -360,9 +350,7 @@ def deploy_model( schemas.StatusEnum.READY.value, schemas.StatusEnum.DEPLOYED.value, ) - LOGGER.info( - "Deploy_model get id of already deployed model %s", model.id - ) + LOGGER.info("Deploy_model get id of already deployed model %s", model.id) raise HTTPException( status_code=409, detail=f"Model {model.id} has already been deployed", @@ -411,17 +399,13 @@ def deploy_model_by_id_and_version( schemas.StatusEnum.READY.value, schemas.StatusEnum.DEPLOYED.value, ) - LOGGER.info( - "Deploy_model get id of already deployed model %s", model.id - ) + LOGGER.info("Deploy_model get id of already deployed model %s", model.id) raise HTTPException( status_code=409, detail=f"Model {model.id} has already been deployed", ) - LOGGER.info( - "Deploying model with " "id: %s, version: %d", model_id, version - ) + LOGGER.info("Deploying model with " "id: %s, version: %d", model_id, version) utils.deploy(session, model) return {"msg": f"Model {model_id} with version {version} is deploying"} @@ -462,9 +446,7 @@ def undeploy_model( return {"msg": f"Model {model.id} is undeployed"} if utils.undeploy(session, model): return {"msg": f"Model {model.id} is undeployed"} - raise HTTPException( - status_code=409, detail=f"Failed to undeploy model {model.id}" - ) + raise HTTPException(status_code=409, detail=f"Failed to undeploy model {model.id}") @router.post( @@ -493,8 +475,7 @@ def undeploy_model_by_id_and_version( model = crud.get_instance(session, Model, (model_id, version)) if not model: LOGGER.info( - "Undeploy_model get not existing model with " - "id: %s, version: %d", + "Undeploy_model get not existing model with " "id: %s, version: %d", model_id, version, ) @@ -512,6 +493,5 @@ def undeploy_model_by_id_and_version( return {"msg": f"Model {model.id} is undeployed"} raise HTTPException( status_code=409, - detail=f"Failed to undeploy model {model_id} " - f"with version {version}", + detail=f"Failed to undeploy model {model_id} " f"with version {version}", ) diff --git a/models/models/routers/training_routers.py b/models/models/routers/training_routers.py index e06a55584..8bc186c50 100644 --- a/models/models/routers/training_routers.py +++ b/models/models/routers/training_routers.py @@ -21,9 +21,6 @@ map_request_to_filter, paginate, ) -from sqlalchemy.orm import Session -from tenant_dependency import TenantData - from models import crud, schemas, utils from models.colab_ssh_utils import ( COLAB_TRAINING_DIRECTORY, @@ -42,6 +39,8 @@ get_minio_object, get_minio_resource, ) +from sqlalchemy.orm import Session +from tenant_dependency import TenantData LOGGER = logging.getLogger(name="models") TRAINING_SCRIPT_NAME = "training_script.py" @@ -145,9 +144,7 @@ def search_training( session: Session = Depends(get_db), ) -> Union[Page[schemas.Training], Page[Any]]: query = session.query(Training) - filter_args = map_request_to_filter( - request.dict(), "Training" # type: ignore - ) + filter_args = map_request_to_filter(request.dict(), "Training") # type: ignore query, pagination = form_query(filter_args, query) return paginate([x for x in query], pagination) @@ -241,9 +238,7 @@ def delete_training_by_id( bucket_name, ) raise HTTPException(status_code=500, detail=str(err)) - s3_resource.meta.client.delete_object( - Bucket=bucket_name, Key=training.key_archive - ) + s3_resource.meta.client.delete_object(Bucket=bucket_name, Key=training.key_archive) crud.delete_instance(session, training) LOGGER.info("Training %d was deleted", request.id) return {"msg": "Training was deleted"} @@ -277,9 +272,7 @@ def prepare_annotation_dataset( if not training: LOGGER.info("Prepare dataset get not existing id %s", training_id) raise HTTPException(status_code=404, detail="Not existing training") - minio_path = prepare_dataset_info( - convert_request, x_current_tenant, token.token - ) + minio_path = prepare_dataset_info(convert_request, x_current_tenant, token.token) training.key_annotation_dataset = minio_path session.commit() LOGGER.info("Dataset creation for training %s is started", training_id) @@ -346,9 +339,7 @@ def start_training( with connect_colab(credentials) as ssh_client: bucket = convert_bucket_name_if_s3prefix(x_current_tenant) file_script, size_script = get_minio_object(bucket, key_script) - upload_file_to_colab( - ssh_client, file_script, size_script, TRAINING_SCRIPT_NAME - ) + upload_file_to_colab(ssh_client, file_script, size_script, TRAINING_SCRIPT_NAME) file_dataset, size_dataset = get_minio_object(bucket, key_dataset) upload_file_to_colab( ssh_client, file_dataset, size_dataset, ANNOTATION_DATASET_NAME @@ -397,9 +388,7 @@ def download_training_results( bucket_name = convert_bucket_name_if_s3prefix(x_current_tenant) training_exists = crud.is_id_existing(session, Training, training_id) if not training_exists: - LOGGER.info( - "Download_training_results get not existing id %s", training_id - ) + LOGGER.info("Download_training_results get not existing id %s", training_id) raise HTTPException(status_code=404, detail="Not existing training") home_directory = pathlib.Path.home() check_aws_credentials_file(home_directory) diff --git a/models/models/schemas.py b/models/models/schemas.py index 71f5e13f6..a3f020525 100644 --- a/models/models/schemas.py +++ b/models/models/schemas.py @@ -3,9 +3,8 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from pydantic import BaseModel, ConstrainedStr, Field, PositiveInt, validator - from models.db import StatusEnum +from pydantic import BaseModel, ConstrainedStr, Field, PositiveInt, validator class AtLeastOneChar(ConstrainedStr): @@ -107,9 +106,7 @@ class Model(ModelWithId): "it has been deployed already", example="ready", ) - created_by: str = Field( - description="Author who has created model", example="901" - ) + created_by: str = Field(description="Author who has created model", example="901") created_at: datetime = Field(example="2021-11-09T17:09:43.101004") tenant: str = Field(description="Author's tenant", example="tenant1") latest: bool = Field( @@ -133,9 +130,7 @@ class BasementBase(BaseModel): description="Unique name of docker image to build and run", example="custom:v1.1", ) - name: str = Field( - title="Human readable name", example="some describing name" - ) + name: str = Field(title="Human readable name", example="some describing name") supported_args: Optional[List[Dict[str, Any]]] = Field( example=[ { @@ -234,9 +229,7 @@ class HeaderResponse(BaseModel): class DeployedModelMainData(BaseModel): datetime_creation: str = Field(example="2021-11-09T17:09:43.101004") - status: str = Field( - description="Model status, it's running or not", example=True - ) + status: str = Field(description="Model status, it's running or not", example=True) name: str = Field(description="Name of the model", example="my_model") url: str = Field(description="Model url with details information") @@ -336,12 +329,8 @@ class ConvertRequestSchema(BaseModel): class TrainingCredentials(BaseModel): user: str = Field(..., description="Colab username", example="root") - password: str = Field( - ..., description="Colab user password", example="SECRET" - ) + password: str = Field(..., description="Colab user password", example="SECRET") host: str = Field( ..., description="Ngrok host to connect colab", example="tcp.ngrok.io" ) - port: int = Field( - ..., description="Ngrok port to connect colab", example="12345" - ) + port: int = Field(..., description="Ngrok port to connect colab", example="12345") diff --git a/models/models/utils.py b/models/models/utils.py index 3f580002e..7163464c5 100644 --- a/models/models/utils.py +++ b/models/models/utils.py @@ -1,16 +1,13 @@ from typing import Dict, List, Optional, Tuple import boto3 +import models.logger as logger from botocore.client import Config from botocore.exceptions import BotoCoreError, ClientError from botocore.response import StreamingBody from kubernetes import client, config from kubernetes.client.rest import ApiException from kubernetes.config import ConfigException -from sqlalchemy.orm import Session -from starlette.datastructures import UploadFile - -import models.logger as logger from models.constants import ( CONTAINER_NAME, DOCKER_REGISTRY_URL, @@ -28,6 +25,8 @@ from models.db import Basement, Model from models.errors import NoSuchTenant from models.schemas import DeployedModelPod, MinioHTTPMethod +from sqlalchemy.orm import Session +from starlette.datastructures import UploadFile logger_ = logger.get_logger(__name__) @@ -141,9 +140,7 @@ def create_ksvc( "value": config_path["file"], }, ], - "ports": [ - {"protocol": "TCP", "containerPort": 8000} - ], + "ports": [{"protocol": "TCP", "containerPort": 8000}], "resources": { "limits": { "cpu": pod_cpu_limit, @@ -376,9 +373,7 @@ def upload_to_object_storage( s3.upload_fileobj(Fileobj=obj, Key=file_path) except ClientError as err: if "404" in err.args[0]: - raise NoSuchTenant( - f"Bucket for tenant {bucket_name} does not exist" - ) + raise NoSuchTenant(f"Bucket for tenant {bucket_name} does not exist") raise diff --git a/models/tests/conftest.py b/models/tests/conftest.py index 35f041828..c357ff55f 100644 --- a/models/tests/conftest.py +++ b/models/tests/conftest.py @@ -61,9 +61,7 @@ def overrided_token_client(client, db_session) -> TestClient: def moto_minio() -> boto3.resource: """Creates and returns moto resource for s3 (minio) with test Bucket.""" with mock_s3(): - minio_resource = boto3.resource( - "s3", config=Config(signature_version="s3v4") - ) + minio_resource = boto3.resource("s3", config=Config(signature_version="s3v4")) minio_resource.create_bucket(Bucket=TEST_TENANT) yield minio_resource @@ -156,9 +154,7 @@ def setup_test_db(use_temp_env_var): @pytest.fixture(scope="module") def db_session(setup_test_db) -> Session: """yields SQLAlchemy session""" - session_local = sessionmaker( - autocommit=False, autoflush=False, bind=engine - ) + session_local = sessionmaker(autocommit=False, autoflush=False, bind=engine) session = session_local() yield session diff --git a/models/tests/test_basement_routers.py b/models/tests/test_basement_routers.py index 6756b340f..af8ccc680 100644 --- a/models/tests/test_basement_routers.py +++ b/models/tests/test_basement_routers.py @@ -27,13 +27,11 @@ def test_create_basement(exist, create): create.return_value = {"msg": "expected"} token = Mock() token.user_id.return_value = "token" - assert basements_routers.create_new_basement( - data, "session", token, "tenant" - ) == {"msg": "expected"} + assert basements_routers.create_new_basement(data, "session", token, "tenant") == { + "msg": "expected" + } exist.assert_called_once_with("session", Basement, "id") - create.assert_called_once_with( - "session", Basement, data, token.user_id, "tenant" - ) + create.assert_called_once_with("session", Basement, data, token.user_id, "tenant") @patch.object(basements_routers.crud, "create_instance") @@ -84,9 +82,7 @@ def test_get_basement_by_id_withot_basement(get): @patch.object(basements_routers.crud, "delete_instance") @patch.object(basements_routers.crud, "get_instance") def test_delete_basement_by_id(delete, get, client, monkeypatch): - monkeypatch.setattr( - "models.routers.basements_routers.get_minio_resource", Mock() - ) + monkeypatch.setattr("models.routers.basements_routers.get_minio_resource", Mock()) data = {"id": "id"} get.return_value = "expected" response = client.delete("/basements/delete", data=json.dumps(data)) @@ -97,9 +93,7 @@ def test_delete_basement_by_id(delete, get, client, monkeypatch): @patch.object(basements_routers.crud, "delete_instance") @patch.object(basements_routers.crud, "get_instance") def test_delete_basement_by_id_calls_crud(delete, get, monkeypatch): - monkeypatch.setattr( - "models.routers.basements_routers.get_minio_resource", Mock() - ) + monkeypatch.setattr("models.routers.basements_routers.get_minio_resource", Mock()) data = basements_routers.schemas.BasementDelete(id="id") get.return_value = "expected" basements_routers.delete_basement_by_id(data, "session") diff --git a/models/tests/test_colab_start_training.py b/models/tests/test_colab_start_training.py index 823a4741d..5f468a1d5 100644 --- a/models/tests/test_colab_start_training.py +++ b/models/tests/test_colab_start_training.py @@ -79,7 +79,9 @@ def test_start_training_no_key_script_error( @pytest.mark.integration -@pytest.mark.skip("Test should be fixed - got 'Annotation dataset for training 1 not ready' in response") +@pytest.mark.skip( + "Test should be fixed - got 'Annotation dataset for training 1 not ready' in response" +) @pytest.mark.parametrize( "prepare_db_start_training", [TRAINING_ARCHIVE_KEY], indirect=True ) @@ -114,7 +116,9 @@ def __exit__(self, exc_type, exc_val, exc_tb): @pytest.mark.integration -@pytest.mark.skip("Test should be fixed - got 'Annotation dataset for training 1 not ready' in response") +@pytest.mark.skip( + "Test should be fixed - got 'Annotation dataset for training 1 not ready' in response" +) @pytest.mark.parametrize( "prepare_db_start_training", [TRAINING_ARCHIVE_KEY], indirect=True ) @@ -132,9 +136,7 @@ def test_start_training_no_such_bucket_error( "models.utils.boto3.resource", Mock(return_value=moto_minio), ) - monkeypatch.setattr( - "models.routers.training_routers.connect_colab", MockSSHContext - ) + monkeypatch.setattr("models.routers.training_routers.connect_colab", MockSSHContext) response = overrided_token_client.post( START_TRAINING_PATH.format(EXIST_TRAINING_ID), json=TEST_CREDENTIALS, @@ -145,7 +147,9 @@ def test_start_training_no_such_bucket_error( @pytest.mark.integration -@pytest.mark.skip("Test should be fixed - got 'Annotation dataset for training 1 not ready' in response") +@pytest.mark.skip( + "Test should be fixed - got 'Annotation dataset for training 1 not ready' in response" +) @pytest.mark.parametrize( "prepare_db_start_training", [TRAINING_ARCHIVE_KEY], @@ -161,9 +165,7 @@ def test_start_training_boto3_error( "models.routers.training_routers.get_minio_object", Mock(side_effect=BotoCoreError()), ) - monkeypatch.setattr( - "models.routers.training_routers.connect_colab", MockSSHContext - ) + monkeypatch.setattr("models.routers.training_routers.connect_colab", MockSSHContext) response = overrided_token_client.post( START_TRAINING_PATH.format(EXIST_TRAINING_ID), json=TEST_CREDENTIALS, @@ -174,7 +176,9 @@ def test_start_training_boto3_error( @pytest.mark.integration -@pytest.mark.skip("Test should be fixed - got 'Annotation dataset for training 1 not ready' in response") +@pytest.mark.skip( + "Test should be fixed - got 'Annotation dataset for training 1 not ready' in response" +) @pytest.mark.parametrize( "prepare_db_start_training", [TRAINING_ARCHIVE_KEY], @@ -197,9 +201,7 @@ def test_start_training_integration( "models.utils.boto3.resource", Mock(return_value=save_start_training_minio_objects), ) - monkeypatch.setattr( - "models.routers.training_routers.connect_colab", MockSSHContext - ) + monkeypatch.setattr("models.routers.training_routers.connect_colab", MockSSHContext) response = overrided_token_client.post( START_TRAINING_PATH.format(EXIST_TRAINING_ID), json=TEST_CREDENTIALS, diff --git a/models/tests/test_crud.py b/models/tests/test_crud.py index 8a097ad01..f53b927fd 100644 --- a/models/tests/test_crud.py +++ b/models/tests/test_crud.py @@ -30,9 +30,7 @@ def test_is_id_existing_queries_db_and_calls_filter(): def test_create_instance_calls_add_and_commit_and_returns_id(): session = Mock() - basement = BasementBase( - id="id", name="name", gpu_support=True, limits=TEST_LIMITS - ) + basement = BasementBase(id="id", name="name", gpu_support=True, limits=TEST_LIMITS) crud.create_instance(session, Basement, basement, "author", "tenant") session.add.assert_called_once() session.commit.assert_called_once() @@ -50,9 +48,7 @@ def test_get_instance_queries_db_calls_get_and_returns_result_of_get(): def test_modify_instance_calls_commit(): session = Mock() - basement = BasementBase( - id="id", name="name", gpu_support=True, limits=TEST_LIMITS - ) + basement = BasementBase(id="id", name="name", gpu_support=True, limits=TEST_LIMITS) crud.get_instance = Mock(return_value="expected") crud.modify_instance(session, Basement, basement) session.commit.assert_called_once() diff --git a/models/tests/test_models_routers.py b/models/tests/test_models_routers.py index 893f273c3..4a57b9f57 100644 --- a/models/tests/test_models_routers.py +++ b/models/tests/test_models_routers.py @@ -269,9 +269,7 @@ def test_deploy_model_with_wrong_type(get, client): @patch.object(models_routers.crud, "modify_status") @patch.object(models_routers.utils, "is_model_deployed") @patch.object(models_routers.crud, "get_latest_model") -def test_deploy_already_deployed_model_returns_409( - get, is_deployed, modify, client -): +def test_deploy_already_deployed_model_returns_409(get, is_deployed, modify, client): data = {"id": "id"} models_routers.get_db = Mock() query = models_routers.schemas.ModelId(id="id") @@ -285,9 +283,7 @@ def test_deploy_already_deployed_model_returns_409( @patch.object(models_routers.crud, "modify_status") @patch.object(models_routers.utils, "is_model_deployed") @patch.object(models_routers.crud, "get_latest_model") -def test_deploy_already_deployed_model_modifies_status( - get, is_deployed, modify -): +def test_deploy_already_deployed_model_modifies_status(get, is_deployed, modify): data = models_routers.schemas.ModelId(id="id") models_routers.get_db = Mock() get.return_value = data @@ -316,9 +312,7 @@ def test_deploy_model_in_positive_case(get, is_deployed, deploy, client): @patch.object(models_routers.crud, "modify_status") @patch.object(models_routers.utils, "is_model_deployed") @patch.object(models_routers.crud, "get_latest_model") -def test_deploy_model_without_modifying_status( - get, is_deployed, modify, deploy -): +def test_deploy_model_without_modifying_status(get, is_deployed, modify, deploy): data = models_routers.schemas.ModelId(id="id") models_routers.get_db = Mock() get.return_value = data @@ -401,9 +395,7 @@ def test_undeploy_already_undeployed_model_modifies_status( @patch.object(models_routers.crud, "modify_status") @patch.object(models_routers.utils, "is_model_deployed") @patch.object(models_routers.crud, "get_latest_model") -def test_undeploy_model_calls_undeploying_function( - get, is_deployed, modify, undeploy -): +def test_undeploy_model_calls_undeploying_function(get, is_deployed, modify, undeploy): data = models_routers.schemas.ModelId(id="id") models_routers.get_db = Mock() get.return_value = data diff --git a/models/tests/test_schemas.py b/models/tests/test_schemas.py index 5bd2e7cd7..188b9c3d9 100644 --- a/models/tests/test_schemas.py +++ b/models/tests/test_schemas.py @@ -7,9 +7,7 @@ def test_empty_id_in_modelbase_raises_error(): minio_path = {"file": "file", "bucket": "bucket"} - with pytest.raises( - ValidationError, match="this value has at least 1 characters" - ): + with pytest.raises(ValidationError, match="this value has at least 1 characters"): schemas.ModelWithId( id="", name="name", @@ -109,23 +107,17 @@ def test_validation_of_model_id(): def test_empty_id_in_basementbase_raises_error(): - with pytest.raises( - ValidationError, match="this value has at least 1 characters" - ): + with pytest.raises(ValidationError, match="this value has at least 1 characters"): schemas.BasementBase(id="", name="base", gpu_support=True) def test_not_empty_id_in_basementbase_does_not_raise_error(): - schemas.BasementBase( - id="1", name="base", gpu_support=True, limits=TEST_LIMITS - ) + schemas.BasementBase(id="1", name="base", gpu_support=True, limits=TEST_LIMITS) def test_validation_of_bucket_in_minio_path(): underscore_bucket = "bucket_" - with pytest.raises( - ValidationError, match="Bucket cannot contain underscores" - ): + with pytest.raises(ValidationError, match="Bucket cannot contain underscores"): schemas.MinioPath(bucket=underscore_bucket, file="file") correct_bucket = "bucket" schemas.MinioPath(bucket=correct_bucket, file="file") diff --git a/models/tests/test_trainings_routers.py b/models/tests/test_trainings_routers.py index 42e148111..0fad4202d 100644 --- a/models/tests/test_trainings_routers.py +++ b/models/tests/test_trainings_routers.py @@ -51,13 +51,11 @@ def test_create_training_in_positive_case(exist, create, _get): create.return_value = {"id": "id"} token = Mock() token.user_id.return_value = "token" - assert training_routers.create_new_training( - data, "session", token, "tenant" - ) == {"id": "id"} + assert training_routers.create_new_training(data, "session", token, "tenant") == { + "id": "id" + } exist.assert_called_once_with("session", Basement, "basement") - create.assert_called_once_with( - "session", Training, data, token.user_id, "tenant" - ) + create.assert_called_once_with("session", Training, data, token.user_id, "tenant") @patch.object(training_routers.crud, "create_instance") diff --git a/models/tests/test_utils.py b/models/tests/test_utils.py index 187efbe12..5496f0a94 100644 --- a/models/tests/test_utils.py +++ b/models/tests/test_utils.py @@ -172,10 +172,7 @@ def test_put_object_via_presigned_url(moto_minio, monkeypatch): minio_response = requests.put(presigned_url, json=test_data) assert minio_response.status_code == 200 minio_object = ( - moto_minio.Object(TEST_TENANT, key) - .get()["Body"] - .read() - .decode("utf-8") + moto_minio.Object(TEST_TENANT, key).get()["Body"].read().decode("utf-8") ) assert json.loads(minio_object) == test_data @@ -459,9 +456,7 @@ def test_get_pods_with_terminating_status(): utils.client = Mock() utils.client.CoreV1Api.return_value = api Pods = namedtuple("Pods", {"items"}) - Metadata = namedtuple( - "Metadata", ("deletion_timestamp", "name", "namespace") - ) + Metadata = namedtuple("Metadata", ("deletion_timestamp", "name", "namespace")) Status = namedtuple("Status", ("start_time", "container_statuses")) Container = namedtuple("Container", ("name")) container = Container("name") @@ -489,12 +484,8 @@ def test_get_pods_with_running_status(): utils.client = Mock() utils.client.CoreV1Api.return_value = api Pods = namedtuple("Pods", {"items"}) - Metadata = namedtuple( - "Metadata", ("deletion_timestamp", "name", "namespace") - ) - Status = namedtuple( - "Status", ("start_time", "container_statuses", "phase") - ) + Metadata = namedtuple("Metadata", ("deletion_timestamp", "name", "namespace")) + Status = namedtuple("Status", ("start_time", "container_statuses", "phase")) Pod = namedtuple("Pod", ("metadata", "status")) Container = namedtuple("Container", ("name")) container = Container("name") @@ -525,9 +516,7 @@ def test_get_minio_object_wrong_tenant(monkeypatch, moto_minio) -> None: Mock(return_value=moto_minio), ) wrong_tenant = "wrong_tenant" - with pytest.raises( - NoSuchTenant, match=f"Bucket {wrong_tenant} does not exist" - ): + with pytest.raises(NoSuchTenant, match=f"Bucket {wrong_tenant} does not exist"): utils.get_minio_object(wrong_tenant, "file/file.txt") diff --git a/models/tests/utils.py b/models/tests/utils.py index f343db122..24e453719 100644 --- a/models/tests/utils.py +++ b/models/tests/utils.py @@ -16,7 +16,7 @@ def create_expected_models( status: StatusEnum = None, tenant: str = None, name: str = None, - description: str = '' + description: str = "", ) -> dict: return { "basement": basement_id, @@ -33,7 +33,7 @@ def create_expected_models( "training_id": training_id, "type": None, "version": version, - "description": description + "description": description, } diff --git a/pipelines/alembic/env.py b/pipelines/alembic/env.py index 3754b3c1c..11756dd6b 100644 --- a/pipelines/alembic/env.py +++ b/pipelines/alembic/env.py @@ -1,12 +1,12 @@ import os from logging.config import fileConfig -from sqlalchemy import engine_from_config, pool - import pipelines.config as settings -from alembic import context from pipelines.db.models import Base from pipelines.db.service import get_test_db_url +from sqlalchemy import engine_from_config, pool + +from alembic import context # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -70,9 +70,7 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure( - connection=connection, target_metadata=target_metadata - ) + context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() diff --git a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py index 6a1df2714..d6605ec1d 100644 --- a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py +++ b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py @@ -6,10 +6,10 @@ """ import sqlalchemy as sa +from pipelines.db import models from sqlalchemy import orm from alembic import op -from pipelines.db import models # revision identifiers, used by Alembic. revision = "0ab5e65cf34b" @@ -20,9 +20,9 @@ def upgrade() -> None: session = orm.Session(bind=op.get_bind()) - session.query(models.Pipeline).filter( - models.Pipeline.type.is_(None) - ).update({models.Pipeline.type: "inference"}, synchronize_session="fetch") + session.query(models.Pipeline).filter(models.Pipeline.type.is_(None)).update( + {models.Pipeline.type: "inference"}, synchronize_session="fetch" + ) session.commit() session.close() diff --git a/pipelines/alembic/versions/29f072fb5c9c_.py b/pipelines/alembic/versions/29f072fb5c9c_.py index 88b3be178..917fcd844 100644 --- a/pipelines/alembic/versions/29f072fb5c9c_.py +++ b/pipelines/alembic/versions/29f072fb5c9c_.py @@ -39,7 +39,5 @@ def downgrade() -> None: op.f("ix_pipeline_execution_task_job_id"), table_name="pipeline_execution_task", ) - op.drop_index( - op.f("ix_execution_step_task_id"), table_name="execution_step" - ) + op.drop_index(op.f("ix_execution_step_task_id"), table_name="execution_step") # ### end Alembic commands ### diff --git a/pipelines/alembic/versions/5fd9d1fdcf5b_init.py b/pipelines/alembic/versions/5fd9d1fdcf5b_init.py index 2b62f9e81..ef357d737 100644 --- a/pipelines/alembic/versions/5fd9d1fdcf5b_init.py +++ b/pipelines/alembic/versions/5fd9d1fdcf5b_init.py @@ -58,9 +58,7 @@ def upgrade() -> None: sa.Column("job_id", sa.Integer(), nullable=True), sa.Column("runner_id", postgresql.UUID(), nullable=True), sa.Column("status", sa.String(length=30), nullable=True), - sa.ForeignKeyConstraint( - ["pipeline_id"], ["pipeline.id"], ondelete="CASCADE" - ), + sa.ForeignKeyConstraint(["pipeline_id"], ["pipeline.id"], ondelete="CASCADE"), sa.PrimaryKeyConstraint("id"), ) op.create_table( diff --git a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py index 46a937c4c..d67b33e39 100644 --- a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py +++ b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py @@ -6,10 +6,10 @@ """ import sqlalchemy as sa +from pipelines.db import models from sqlalchemy import orm from alembic import op -from pipelines.db import models # revision identifiers, used by Alembic. revision = "764961499e2b" @@ -23,9 +23,7 @@ def upgrade() -> None: "pipeline", sa.Column("original_pipeline_id", sa.Integer(), nullable=True), ) - op.add_column( - "pipeline", sa.Column("is_latest", sa.Boolean(), nullable=True) - ) + op.add_column("pipeline", sa.Column("is_latest", sa.Boolean(), nullable=True)) session = orm.Session(bind=op.get_bind()) rows = ( diff --git a/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py b/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py index abefe8942..1dcd7539f 100644 --- a/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py +++ b/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py @@ -18,12 +18,8 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "pipeline", sa.Column("type", sa.String(length=30), nullable=True) - ) - op.add_column( - "pipeline", sa.Column("description", sa.Text(), nullable=True) - ) + op.add_column("pipeline", sa.Column("type", sa.String(length=30), nullable=True)) + op.add_column("pipeline", sa.Column("description", sa.Text(), nullable=True)) op.add_column("pipeline", sa.Column("summary", sa.Text(), nullable=True)) # ### end Alembic commands ### diff --git a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py index cf22b1af8..26e60bc04 100644 --- a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py +++ b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py @@ -6,10 +6,10 @@ """ import sqlalchemy as sa +from pipelines.db import models from sqlalchemy import orm from alembic import op -from pipelines.db import models # revision identifiers, used by Alembic. revision = "b0cbaebbddd8" @@ -28,11 +28,7 @@ def upgrade() -> None: session.close() session = orm.Session(bind=op.get_bind()) - rows = ( - session.query(models.Pipeline) - .options(orm.load_only("id", "meta")) - .all() - ) + rows = session.query(models.Pipeline).options(orm.load_only("id", "meta")).all() for row in rows: new_meta = dict(row.meta) new_meta["version"] = 1 @@ -83,11 +79,7 @@ def downgrade() -> None: session.close() session = orm.Session(bind=op.get_bind()) - rows = ( - session.query(models.Pipeline) - .options(orm.load_only("id", "meta")) - .all() - ) + rows = session.query(models.Pipeline).options(orm.load_only("id", "meta")).all() for row in rows: new_meta = dict(row.meta) new_meta["version"] = "v1" diff --git a/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py b/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py index 28d4c9eef..ac9bfdcf2 100644 --- a/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py +++ b/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py @@ -5,26 +5,31 @@ Create Date: 2022-04-05 16:13:05.539298 """ -from alembic import op import sqlalchemy as sa from sqlalchemy.dialects import postgresql +from alembic import op + # revision identifiers, used by Alembic. -revision = 'df42f45f4ddf' -down_revision = '29f072fb5c9c' +revision = "df42f45f4ddf" +down_revision = "29f072fb5c9c" branch_labels = None depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column('execution_step', sa.Column('parent_step', postgresql.UUID(), nullable=True)) - op.add_column('execution_step', sa.Column('tenant', sa.String(length=50), nullable=True)) + op.add_column( + "execution_step", sa.Column("parent_step", postgresql.UUID(), nullable=True) + ) + op.add_column( + "execution_step", sa.Column("tenant", sa.String(length=50), nullable=True) + ) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('execution_step', 'tenant') - op.drop_column('execution_step', 'parent_step') + op.drop_column("execution_step", "tenant") + op.drop_column("execution_step", "parent_step") # ### end Alembic commands ### diff --git a/pipelines/pipelines/app.py b/pipelines/pipelines/app.py index 5d78b239e..bac7f7ef3 100644 --- a/pipelines/pipelines/app.py +++ b/pipelines/pipelines/app.py @@ -1,20 +1,19 @@ import asyncio from typing import Any, Dict, List, Optional, Union -from fastapi import Depends, FastAPI, Header, HTTPException, status -from filter_lib import Page, form_query, map_request_to_filter, paginate -from pydantic import AnyUrl -from sqlalchemy.orm import Session -from sqlalchemy_filters.exceptions import BadFilterFormat -from tenant_dependency import TenantData, get_tenant_info - import pipelines.config as config import pipelines.db.models as dbm import pipelines.db.service as service import pipelines.execution as execution import pipelines.schemas as schemas +from fastapi import Depends, FastAPI, Header, HTTPException, status +from filter_lib import Page, form_query, map_request_to_filter, paginate from pipelines.kafka_utils import Kafka from pipelines.pipeline_runner import run_pipeline +from pydantic import AnyUrl +from sqlalchemy.orm import Session +from sqlalchemy_filters.exceptions import BadFilterFormat +from tenant_dependency import TenantData, get_tenant_info TOKEN = get_tenant_info(url=config.KEYCLOAK_URI, algorithm="RS256") @@ -204,9 +203,7 @@ async def get_task_by_id( task_id: int, session: Session = Depends(service.get_session) ) -> Any: """Get task by its id.""" - res = service.get_table_instance_by_id( - session, dbm.PipelineExecutionTask, task_id - ) + res = service.get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) if res: return res.as_dict() raise HTTPException(status_code=404, detail=NO_TASK) @@ -297,9 +294,7 @@ async def delete_task( task_id: int, session: Session = Depends(service.get_session) ) -> Dict[str, str]: """Delete task from db by its id.""" - res = service.get_table_instance_by_id( - session, dbm.PipelineExecutionTask, task_id - ) + res = service.get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) if res is None: raise HTTPException(status_code=404, detail=NO_TASK) service.delete_instances(session, [res]) @@ -317,9 +312,7 @@ async def get_task_steps_by_id( task_id: int, session: Session = Depends(service.get_session) ) -> List[Dict[str, str]]: """Get task steps by task id.""" - res = service.get_table_instance_by_id( - session, dbm.PipelineExecutionTask, task_id - ) + res = service.get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) if res is None: raise HTTPException(status_code=404, detail=NO_TASK) return [step.as_dict() for step in res.steps] diff --git a/pipelines/pipelines/config.py b/pipelines/pipelines/config.py index 22d725e16..a78a02449 100644 --- a/pipelines/pipelines/config.py +++ b/pipelines/pipelines/config.py @@ -62,8 +62,7 @@ def get_version() -> str: KEYCLOAK_REALM = os.getenv("KEYCLOAK_REALM", "master") CLIENT_SECRET = os.getenv("CLIENT_SECRET", "") KEYCLOAK_TOKEN_URI = ( - f"{KEYCLOAK_URI}/auth/realms/{KEYCLOAK_REALM}" - f"/protocol/openid-connect/token" + f"{KEYCLOAK_URI}/auth/realms/{KEYCLOAK_REALM}" f"/protocol/openid-connect/token" ) # Kafka settings diff --git a/pipelines/pipelines/db/logger.py b/pipelines/pipelines/db/logger.py index ec52b1085..ae628813f 100644 --- a/pipelines/pipelines/db/logger.py +++ b/pipelines/pipelines/db/logger.py @@ -1,13 +1,12 @@ import datetime -from sqlalchemy import event, insert -from sqlalchemy.engine import Connection -from sqlalchemy.orm import Mapper - import pipelines.db.models as models import pipelines.db.service as service import pipelines.pipeline_runner as runner import pipelines.schemas as schemas +from sqlalchemy import event, insert +from sqlalchemy.engine import Connection +from sqlalchemy.orm import Mapper def create_log(event_type: str, entity: models.Table) -> schemas.Log: @@ -32,9 +31,7 @@ def log_after_insert( ) -> None: """Listen for the insert event and log to MainEventLog.""" log_ = create_log(schemas.Event.INS, target).dict() - stmt = insert(models.MainEventLog).values( - runner_id=runner.runner_id, event=log_ - ) + stmt = insert(models.MainEventLog).values(runner_id=runner.runner_id, event=log_) connection.execute(stmt) @@ -46,9 +43,7 @@ def log_after_delete( ) -> None: """Listen for the insert event and log to MainEventLog.""" log_ = create_log(schemas.Event.DEL, target).dict() - stmt = insert(models.MainEventLog).values( - runner_id=runner.runner_id, event=log_ - ) + stmt = insert(models.MainEventLog).values(runner_id=runner.runner_id, event=log_) connection.execute(stmt) @@ -62,10 +57,6 @@ def log_after_update(update_context) -> None: # type: ignore k.key: v.isoformat() if isinstance(v, datetime.datetime) else v for k, v in update_context.values.items() } - log_ = schemas.Log( - entity=entity, event_type=schemas.Event.UPD, data=data - ).dict() - stmt = insert(models.MainEventLog).values( - runner_id=runner.runner_id, event=log_ - ) + log_ = schemas.Log(entity=entity, event_type=schemas.Event.UPD, data=data).dict() + stmt = insert(models.MainEventLog).values(runner_id=runner.runner_id, event=log_) update_context.session.execute(stmt) diff --git a/pipelines/pipelines/db/models.py b/pipelines/pipelines/db/models.py index 0611ebd7c..f0c6e136f 100644 --- a/pipelines/pipelines/db/models.py +++ b/pipelines/pipelines/db/models.py @@ -15,9 +15,7 @@ class Pipeline(Base): # type: ignore __tablename__ = "pipeline" id = sa.Column(sa.Integer, primary_key=True) - name = sa.Column( - sa.String(50), nullable=False, default=lambda: str(uuid.uuid4()) - ) + name = sa.Column(sa.String(50), nullable=False, default=lambda: str(uuid.uuid4())) version = sa.Column(sa.Integer, nullable=True) original_pipeline_id = sa.Column(sa.Integer, nullable=True) is_latest = sa.Column(sa.Boolean, default=True, nullable=True) @@ -61,9 +59,7 @@ class PipelineExecutionTask(Base): # type: ignore __tablename__ = "pipeline_execution_task" id = sa.Column(sa.Integer, primary_key=True) - name = sa.Column( - sa.String(50), nullable=False, default=lambda: str(uuid.uuid4()) - ) + name = sa.Column(sa.String(50), nullable=False, default=lambda: str(uuid.uuid4())) date = sa.Column(sa.DateTime, nullable=False, default=datetime.utcnow) pipeline_id = sa.Column( sa.Integer, @@ -112,9 +108,7 @@ class ExecutionStep(Base): # type: ignore index=True, nullable=False, ) - name = sa.Column( - sa.String(50), nullable=False, default=lambda: str(uuid.uuid4()) - ) + name = sa.Column(sa.String(50), nullable=False, default=lambda: str(uuid.uuid4())) step_id = sa.Column(UUID()) parent_step = sa.Column(UUID(), nullable=True) date = sa.Column( @@ -155,9 +149,7 @@ class ExecutorHeartbeat(Base): # type: ignore __tablename__ = "heartbeat" id = sa.Column(UUID(), primary_key=True, default=uuid.uuid4) - last_heartbeat = sa.Column( - sa.DateTime, nullable=False, default=datetime.utcnow - ) + last_heartbeat = sa.Column(sa.DateTime, nullable=False, default=datetime.utcnow) def __repr__(self) -> str: return ( diff --git a/pipelines/pipelines/db/service.py b/pipelines/pipelines/db/service.py index 8a07d0512..25a218d02 100644 --- a/pipelines/pipelines/db/service.py +++ b/pipelines/pipelines/db/service.py @@ -2,14 +2,13 @@ import datetime from typing import Any, Callable, Dict, Generator, List, Optional, Type, Union +import pipelines.db.models as dbm from aiokafka import AIOKafkaProducer +from pipelines import config, execution, log, schemas from pydantic import AnyUrl from sqlalchemy import create_engine from sqlalchemy.orm import Session, sessionmaker -import pipelines.db.models as dbm -from pipelines import config, execution, log, schemas - logger = log.get_logger(__file__) engine = create_engine( @@ -69,9 +68,7 @@ def _add_instance(session: Session, instance: dbm.Table) -> Union[int, str]: add_pipelines = add_steps = add_tasks = _add_instances -def get_all_table_instances( - session: Session, table: dbm.TableType -) -> dbm.TablesList: +def get_all_table_instances(session: Session, table: dbm.TableType) -> dbm.TablesList: """Get list of all table instances from the db. :param session: DB session. @@ -119,9 +116,7 @@ def get_pipelines( return query.all() # type: ignore -def get_task( - session: Session, name: str -) -> Optional[dbm.PipelineExecutionTask]: +def get_task(session: Session, name: str) -> Optional[dbm.PipelineExecutionTask]: """Get task by its name. Latest if multiple tasks found. :param session: DB session. @@ -144,9 +139,7 @@ def get_task_job_id(session: Session, task_id: int) -> Optional[int]: :param task_id: Task id. :return: Task job_id if found. """ - task = get_table_instance_by_id( - session, dbm.PipelineExecutionTask, task_id - ) + task = get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) return task.job_id if task else None @@ -157,9 +150,7 @@ def get_webhook(session: Session, task_id: int) -> Optional[str]: :param task_id: Task id. :return: webhook """ - task = get_table_instance_by_id( - session, dbm.PipelineExecutionTask, task_id - ) + task = get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) return task.webhook if task else None @@ -215,9 +206,7 @@ def update_table_instance_fields( :param id_: Instance id. :param args: Args to update. """ - session.query(table).filter(table.id == id_).update( - args, synchronize_session=False - ) + session.query(table).filter(table.id == id_).update(args, synchronize_session=False) session.commit() @@ -276,9 +265,7 @@ def get_pending_tasks( ) -def update_task_in_lock( - session: Session, task_id: int, runner_id: str -) -> None: +def update_task_in_lock(session: Session, task_id: int, runner_id: str) -> None: """Update task runner_id with 'for update' statement. :param session: DB session. @@ -314,9 +301,7 @@ def get_not_finished_tasks( ) -def get_heartbeat( - session: Session, id_: str -) -> Optional[dbm.ExecutorHeartbeat]: +def get_heartbeat(session: Session, id_: str) -> Optional[dbm.ExecutorHeartbeat]: """Return heartbeat with the given id. :param session: DB session. @@ -424,9 +409,7 @@ async def initialize_execution( return task_id # type: ignore -async def get_step_parent( - step_id: str, ids: Dict[str, List[str]] -) -> Optional[str]: +async def get_step_parent(step_id: str, ids: Dict[str, List[str]]) -> Optional[str]: """ Finds if step has any dependant steps """ diff --git a/pipelines/pipelines/execution.py b/pipelines/pipelines/execution.py index 390a99333..ac3a944a7 100644 --- a/pipelines/pipelines/execution.py +++ b/pipelines/pipelines/execution.py @@ -8,17 +8,24 @@ from typing import Any, DefaultDict, Dict, List, Optional, Union from uuid import uuid4 +import pipelines.db.models as dbm +import pipelines.db.service as service +import pipelines.result_processing as postprocessing import requests from aiokafka import AIOKafkaProducer from fastapi import HTTPException, status +from pipelines import ( + config, + http_utils, + log, + s3, + schemas, + service_token, + webhooks, +) from pydantic import BaseModel, Field from sqlalchemy import orm -import pipelines.db.models as dbm -import pipelines.db.service as service -import pipelines.result_processing as postprocessing -from pipelines import config, http_utils, log, s3, schemas, service_token, webhooks - logger = log.get_logger(__file__) minio_client = s3.get_minio_client() @@ -173,9 +180,7 @@ async def process_next_steps(self, producer: AIOKafkaProducer) -> None: ) ) - def update( - self, status: schemas.Status, result: Optional[Dict[str, Any]] - ) -> None: + def update(self, status: schemas.Status, result: Optional[Dict[str, Any]]) -> None: """Updates step status and result.""" self.status = status self.result = result @@ -205,9 +210,7 @@ def get_pipeline_step(self) -> Optional[PipelineStep]: def get_next_steps(self) -> List[ExecutionStep]: task = PipelineTask.get_by_id(self.task_id) - return [ - step for step in task.steps if step.parent_step == self.step_id - ] + return [step for step in task.steps if step.parent_step == self.step_id] def get_pipeline_type(self) -> schemas.PipelineTypes: task = PipelineTask.get_by_id(self.task_id) @@ -268,9 +271,7 @@ async def start(self, producer: AIOKafkaProducer) -> None: args = schemas.InputArguments.parse_obj(initial_step.init_args) tenant = s3.tenant_from_bucket(args.get_output_bucket()) if pipeline_type == schemas.PipelineTypes.INFERENCE: - preprecessing_passed = await self.check_preprocessing_status( - tenant - ) + preprecessing_passed = await self.check_preprocessing_status(tenant) if not preprecessing_passed: return logger.info(f"Start executing task with id = {self.id}") @@ -280,9 +281,7 @@ async def start(self, producer: AIOKafkaProducer) -> None: pipeline_type=pipeline_type, curr_step_id=str(initial_step.id) ) asyncio.create_task( - initial_step.step_execution_with_logging( - producer=producer, body=init_body - ) + initial_step.step_execution_with_logging(producer=producer, body=init_body) ) async def finish(self, failed: bool) -> None: @@ -292,9 +291,7 @@ async def finish(self, failed: bool) -> None: Args: failed: whether the task have failed steps. """ - initial_step = [ - step for step in self.steps if step.parent_step is None - ][0] + initial_step = [step for step in self.steps if step.parent_step is None][0] token = service_token.get_service_token() args = schemas.InputArguments.parse_obj(initial_step.init_args) bucket = args.get_output_bucket() @@ -302,8 +299,7 @@ async def finish(self, failed: bool) -> None: pipeline_type = self.get_pipeline_type() if not failed and pipeline_type == schemas.PipelineTypes.INFERENCE: logger.info( - "preparing to merge results and " - "send it to postprocessing/annotation" + "preparing to merge results and " "send it to postprocessing/annotation" ) path_ = args.get_path() filename = args.get_filename() @@ -325,13 +321,9 @@ async def finish(self, failed: bool) -> None: task_status = schemas.Status.FAIL if failed else schemas.Status.DONE self.change_status(task_status) - logger.info( - f"Task with id = {self.id} finished with status = {task_status}" - ) + logger.info(f"Task with id = {self.id} finished with status = {task_status}") tenant = s3.tenant_from_bucket(bucket) - self.send_status( - pipeline_type=pipeline_type, tenant=tenant, token=token - ) + self.send_status(pipeline_type=pipeline_type, tenant=tenant, token=token) def change_status(self, status: schemas.Status) -> None: """Changes status of the task in the db and in the instance.""" @@ -392,9 +384,7 @@ async def check_preprocessing_status(self, tenant: str) -> bool: max_retries = config.MAX_FILE_STATUS_RETRIES timeout = config.FILE_STATUS_TIMEOUT for retry in range(1, int(max_retries) + 1): - file_status = http_utils.get_file_status( - file_id=file_id, tenant=tenant - ) + file_status = http_utils.get_file_status(file_id=file_id, tenant=tenant) if file_status == schemas.PreprocessingStatus.PREPROCESSED: return True elif file_status is None: @@ -427,9 +417,7 @@ async def check_preprocessing_status(self, tenant: str) -> bool: await self.finish(failed=True) return False - def update_steps( - self, status: schemas.Status, result: Dict[str, Any] - ) -> None: + def update_steps(self, status: schemas.Status, result: Dict[str, Any]) -> None: """Updates all steps in case of they all have one result. For instance, it occurs when preprocessing is failed for steps file.""" @@ -526,9 +514,7 @@ class Pipeline(BaseModel): def get_ids(self) -> Dict[str, List[str]]: """Return ids of all steps.""" return { - k: v - for step in self.steps - for k, v in step.steps_identifiers().items() + k: v for step in self.steps for k, v in step.steps_identifiers().items() } def get_steps_dict(self) -> Dict[str, PipelineStep]: @@ -538,9 +524,7 @@ def get_steps_dict(self) -> Dict[str, PipelineStep]: """ steps_dict = { - k: v - for step in self.steps - for k, v in step.get_step_dict().items() + k: v for step in self.steps for k, v in step.get_step_dict().items() } if self.steps: for step in self.steps: @@ -616,8 +600,7 @@ def get_model_urls(model_ids: List[str]) -> Dict[str, str]: if mod.get("name") == id_: if ( config.DIFFERENT_PREPROCESSING_URLS - and model_types[id_] - == schemas.ModelTypes.PREPROCESSING + and model_types[id_] == schemas.ModelTypes.PREPROCESSING ): url_map[id_] = Pipeline._convert_preprocessing_uri( mod.get("url") @@ -665,9 +648,7 @@ def adjust_pipeline(self, model_ids: List[str]) -> None: def check_name(self, session: orm.Session) -> None: """Checks if a pipeline with the same name already exists in the DB.""" - pipelines_with_such_name = service.get_pipelines( - session, name=self.meta.name - ) + pipelines_with_such_name = service.get_pipelines(session, name=self.meta.name) if pipelines_with_such_name: raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail=PIPELINE_EXISTS diff --git a/pipelines/pipelines/http_utils.py b/pipelines/pipelines/http_utils.py index 8ee693a20..6b1771168 100644 --- a/pipelines/pipelines/http_utils.py +++ b/pipelines/pipelines/http_utils.py @@ -2,7 +2,6 @@ from typing import Any, Dict, List, Optional import requests - from pipelines import config, log, schemas, service_token logger = log.get_logger(__file__) @@ -55,9 +54,7 @@ def make_request_with_retry( return None -def get_file_status( - file_id: int, tenant: str -) -> Optional[schemas.PreprocessingStatus]: +def get_file_status(file_id: int, tenant: str) -> Optional[schemas.PreprocessingStatus]: logger.info(f"Sending request to the assets to get file {file_id} status.") body = {"filters": [{"field": "id", "operator": "eq", "value": file_id}]} url = f"{config.ASSETS_URI}/files/search" @@ -85,9 +82,7 @@ def get_model_types(model_ids: List[str]) -> Dict[str, str]: "filters": [{"field": "id", "operator": "in", "value": model_ids}], } model_search = config.MODELS_URI + config.MODELS_SEARCH_ENDPOINT - response = make_request_with_retry( - url=model_search, body=body, method="POST" - ) + response = make_request_with_retry(url=model_search, body=body, method="POST") result = response.json() items = result.get("data") return {item.get("id"): item.get("type") for item in items} diff --git a/pipelines/pipelines/kafka_utils.py b/pipelines/pipelines/kafka_utils.py index 44cc7f2f3..307107592 100644 --- a/pipelines/pipelines/kafka_utils.py +++ b/pipelines/pipelines/kafka_utils.py @@ -3,7 +3,6 @@ import aiokafka from kafka import admin, errors - from pipelines import config, log logger = log.get_logger(__name__) @@ -31,9 +30,7 @@ def consumer(self) -> aiokafka.AIOKafkaConsumer: logger.exception("Failed to initialize consumer.") raise self.consumer.subscribe(topics=[config.KAFKA_CONSUME_TOPIC]) - logger.info( - f"Consumer subscribed to topic {config.KAFKA_CONSUME_TOPIC}" - ) + logger.info(f"Consumer subscribed to topic {config.KAFKA_CONSUME_TOPIC}") return self._consumer @property diff --git a/pipelines/pipelines/pipeline_runner.py b/pipelines/pipelines/pipeline_runner.py index 432ae70f0..37fe95196 100644 --- a/pipelines/pipelines/pipeline_runner.py +++ b/pipelines/pipelines/pipeline_runner.py @@ -4,7 +4,6 @@ import aiokafka from aiokafka import AIOKafkaConsumer, AIOKafkaProducer - from pipelines import execution, schemas from pipelines.log import get_logger @@ -47,8 +46,7 @@ async def process_message( if received_step.status == schemas.Status.FAIL: error = received_step.result["error"] # type: ignore logger.error( - f"Received failed step with id = {received_step.id}, " - f"Error: {error}" + f"Received failed step with id = {received_step.id}, " f"Error: {error}" ) failed = True elif task.is_completed(): @@ -60,9 +58,7 @@ async def process_message( asyncio.create_task(task.finish(failed=failed)) -async def run_pipeline( - consumer: AIOKafkaConsumer, producer: AIOKafkaProducer -) -> None: +async def run_pipeline(consumer: AIOKafkaConsumer, producer: AIOKafkaProducer) -> None: """ Launch Kafka consumer and process received pipeline steps """ diff --git a/pipelines/pipelines/result_processing.py b/pipelines/pipelines/result_processing.py index e5cce42bb..71f579f29 100644 --- a/pipelines/pipelines/result_processing.py +++ b/pipelines/pipelines/result_processing.py @@ -8,9 +8,8 @@ import urllib3.exceptions from minio import Minio from minio import error as minioerr -from pydantic import BaseModel, ValidationError - from pipelines import config, http_utils, log +from pydantic import BaseModel, ValidationError logger = log.get_logger(__file__) @@ -144,9 +143,7 @@ def update_id( ] if unique_obj.links is not None: for link in unique_obj.links: - link.update( - {"category": unique_obj.category, "to": unique_obj.id} - ) + link.update({"category": unique_obj.category, "to": unique_obj.id}) @staticmethod def group_objs_by_id( @@ -159,9 +156,7 @@ def group_objs_by_id( return grouped_objs @staticmethod - def merge( - objs: List[GeometryObject], id_: Union[str, int] = 0 - ) -> GeometryObject: + def merge(objs: List[GeometryObject], id_: Union[str, int] = 0) -> GeometryObject: """Merge Geometry Objects into one. :param objs: Geometry Objects to merge. @@ -248,9 +243,7 @@ def get_pipeline_leaves_data( """ try: path_objects = list_object_names(client, bucket, path_) - files_data = [ - get_file_data(client, bucket, path_) for path_ in path_objects - ] + files_data = [get_file_data(client, bucket, path_) for path_ in path_objects] except (minioerr.S3Error, urllib3.exceptions.MaxRetryError) as err: logger.error("error %s", str(err)) return None @@ -364,9 +357,7 @@ def manage_result_for_annotator( "input": merged_data.dict(exclude_none=True), } headers = {"X-Current-Tenant": tenant, "Authorization": f"Bearer {token}"} - postprocessed_data = postprocess_result( - data_for_postprocessor, headers=headers - ) + postprocessed_data = postprocess_result(data_for_postprocessor, headers=headers) if postprocessed_data is None: logger.info("result for postprocessing data is None") return False diff --git a/pipelines/pipelines/schemas.py b/pipelines/pipelines/schemas.py index 9609276ed..82566a22d 100644 --- a/pipelines/pipelines/schemas.py +++ b/pipelines/pipelines/schemas.py @@ -5,10 +5,9 @@ from enum import Enum from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator - import pipelines.db.models as dbm from pipelines import log +from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator logger = log.get_logger(__file__) @@ -108,9 +107,7 @@ def output_bucket_validator(cls, values: Dict[str, Any]) -> Dict[str, Any]: return values @validator("input_path", "output_path") - def path_validator( # pylint: disable=E0213 - cls, v: Optional[str] - ) -> Optional[str]: + def path_validator(cls, v: Optional[str]) -> Optional[str]: # pylint: disable=E0213 """Path validator.""" if v is None: return v @@ -127,9 +124,7 @@ def file_path_validator(cls, v: str) -> str: # pylint: disable=E0213 """File path validator.""" mod_v = v.strip().rstrip("/") if mod_v.count("/") != 2: - raise ValueError( - "File path should be like 'files/fileId/fileId.fileExt'" - ) + raise ValueError("File path should be like 'files/fileId/fileId.fileExt'") return mod_v def next_step_args( @@ -164,15 +159,11 @@ def prepare_for_init( ) -> InputArguments: """Prepare args as init by creating copy with modified output path.""" if pipeline_type == PipelineTypes.INFERENCE: - output_path = self.append_path( - curr_step_id, self.output_path, ext=".json" - ) + output_path = self.append_path(curr_step_id, self.output_path, ext=".json") elif pipeline_type == PipelineTypes.PREPROCESSING: output_path = self.output_path return InputArguments( - input_path=self.input_path - if self.input_path - else self.output_path, + input_path=self.input_path if self.input_path else self.output_path, input=self.input if self.input else {}, file=self.file, bucket=self.bucket, @@ -181,9 +172,7 @@ def prepare_for_init( output_bucket=self.output_bucket, ) - def append_path( - self, stem: str, path_: Optional[str] = None, ext: str = "" - ) -> str: + def append_path(self, stem: str, path_: Optional[str] = None, ext: str = "") -> str: """Join path_ and stem. Takes self._path if not provided""" return urllib.parse.urljoin((path_ or self._path) + "/", stem) + ext @@ -208,9 +197,7 @@ def get_filename(self) -> str: """Get filename without extension.""" return self.file.strip("/").rsplit("/", 1)[-1].split(".", 1)[0] - def create_input_by_label( - self, label: Optional[List[str]] - ) -> InputArguments: + def create_input_by_label(self, label: Optional[List[str]]) -> InputArguments: """Return copy of the instance with changed input.""" if not self.input or self._is_init or not label: return self.copy(deep=True) diff --git a/pipelines/pipelines/service_token.py b/pipelines/pipelines/service_token.py index 8faaebe88..79e5ad34e 100644 --- a/pipelines/pipelines/service_token.py +++ b/pipelines/pipelines/service_token.py @@ -33,14 +33,10 @@ def get_service_token() -> Optional[str]: try: response_json = response.json() except json.JSONDecodeError: - logger.exception( - f"Response {response} from {url} cannot be converted to json." - ) + logger.exception(f"Response {response} from {url} cannot be converted to json.") try: token = response_json[ACCESS_TOKEN] except AttributeError: - logger.exception( - f"Unable to extract token from response {response} from {url}" - ) + logger.exception(f"Unable to extract token from response {response} from {url}") return token diff --git a/pipelines/tests/conftest.py b/pipelines/tests/conftest.py index 663ae2db3..3e7e71087 100644 --- a/pipelines/tests/conftest.py +++ b/pipelines/tests/conftest.py @@ -73,9 +73,7 @@ def testing_app(testing_engine, testing_session, setup_token): session = sessionmaker(bind=testing_engine) app.app.dependency_overrides[app.TOKEN] = lambda: setup_token with patch("pipelines.db.service.LocalSession", session): - app.app.dependency_overrides[ - service.get_session - ] = lambda: testing_session + app.app.dependency_overrides[service.get_session] = lambda: testing_session client = TestClient(app.app) yield client @@ -132,8 +130,6 @@ async def check_preprocessing_status_mock(x, y): @pytest.fixture def adjust_mock(): - with patch.object( - execution.Pipeline, "check_valid_ids", return_value={"a": True} - ): + with patch.object(execution.Pipeline, "check_valid_ids", return_value={"a": True}): with patch.object(execution.Pipeline, "adjust_pipeline") as mock: yield mock diff --git a/pipelines/tests/db/test_logger.py b/pipelines/tests/db/test_logger.py index 0ffead07a..a0bcc8cbd 100644 --- a/pipelines/tests/db/test_logger.py +++ b/pipelines/tests/db/test_logger.py @@ -88,9 +88,9 @@ def test_log_after_update(testing_session): ) testing_session.add(pipeline) testing_session.commit() - testing_session.query(models.Pipeline).filter( - models.Pipeline.id == 1 - ).update({models.Pipeline.version: 2}) + testing_session.query(models.Pipeline).filter(models.Pipeline.id == 1).update( + {models.Pipeline.version: 2} + ) testing_session.commit() log = schemas.Log( entity="Pipeline", event_type=schemas.Event.UPD, data={"version": 2} diff --git a/pipelines/tests/db/test_service.py b/pipelines/tests/db/test_service.py index 8793484d8..62731d9ed 100644 --- a/pipelines/tests/db/test_service.py +++ b/pipelines/tests/db/test_service.py @@ -64,19 +64,14 @@ def test_get_table_instance_by_id(testing_session): """Testing get_table_instance_by_id.""" testing_session.add(dbm.Pipeline(type="inference")) obj = service.get_table_instance_by_id(testing_session, dbm.Pipeline, 1) - none_obj = service.get_table_instance_by_id( - testing_session, dbm.Pipeline, 2 - ) + none_obj = service.get_table_instance_by_id(testing_session, dbm.Pipeline, 2) assert obj assert none_obj is None def test_get_table_instance_by_id_not_found(testing_session): """Testing get_table_instance_by_id when instance not found.""" - assert ( - service.get_table_instance_by_id(testing_session, dbm.Pipeline, 1) - is None - ) + assert service.get_table_instance_by_id(testing_session, dbm.Pipeline, 1) is None def test_get_pipelines(testing_session): @@ -112,9 +107,7 @@ def test_get_task_not_found(testing_session): def test_get_task_job_id(testing_session): """Testing get_task_job_id.""" - task = dbm.PipelineExecutionTask( - pipeline=dbm.Pipeline(type="inference"), job_id=42 - ) + task = dbm.PipelineExecutionTask(pipeline=dbm.Pipeline(type="inference"), job_id=42) testing_session.add(task) assert service.get_task_job_id(testing_session, 1) == 42 @@ -165,9 +158,7 @@ def test_update_table_instance_fields(testing_session): 1, {dbm.PipelineExecutionTask.name: "bar"}, ) - assert ( - testing_session.query(dbm.PipelineExecutionTask).get(1).name == "bar" - ) + assert testing_session.query(dbm.PipelineExecutionTask).get(1).name == "bar" def test_update_status(testing_session): @@ -177,9 +168,7 @@ def test_update_status(testing_session): ) testing_session.add(task) service.update_status(testing_session, dbm.PipelineExecutionTask, 1, PEND) - assert ( - testing_session.query(dbm.PipelineExecutionTask).get(1).status == PEND - ) + assert testing_session.query(dbm.PipelineExecutionTask).get(1).status == PEND def test_update_statuses(testing_session): @@ -188,15 +177,9 @@ def test_update_statuses(testing_session): task_1 = dbm.PipelineExecutionTask(pipeline=pipeline, status=PEND) task_2 = dbm.PipelineExecutionTask(pipeline=pipeline, status=RUN) testing_session.add_all([task_1, task_2]) - service.update_statuses( - testing_session, dbm.PipelineExecutionTask, [1, 2], DONE - ) - assert ( - testing_session.query(dbm.PipelineExecutionTask).get(1).status == DONE - ) - assert ( - testing_session.query(dbm.PipelineExecutionTask).get(2).status == DONE - ) + service.update_statuses(testing_session, dbm.PipelineExecutionTask, [1, 2], DONE) + assert testing_session.query(dbm.PipelineExecutionTask).get(1).status == DONE + assert testing_session.query(dbm.PipelineExecutionTask).get(2).status == DONE def test_get_pending_tasks(testing_session): @@ -259,7 +242,9 @@ def test_get_expired_heartbeats(testing_session): """Testing get_expired_heartbeats.""" eff_date = datetime.datetime.utcnow() last_heartbeat = eff_date - datetime.timedelta(minutes=1) - testing_session.add(dbm.ExecutorHeartbeat(id=str(uuid.uuid4()), last_heartbeat=last_heartbeat)) + testing_session.add( + dbm.ExecutorHeartbeat(id=str(uuid.uuid4()), last_heartbeat=last_heartbeat) + ) result = service.get_expired_heartbeats(testing_session, eff_date) assert result[0].last_heartbeat == last_heartbeat @@ -289,9 +274,7 @@ def test_task_runner_id_status_in_lock(testing_session): @pytest.mark.asyncio async def test_initialize(testing_session): """Testing initialize_execution.""" - with patch.object( - execution.Pipeline, "from_orm", return_value=td.pipeline - ): + with patch.object(execution.Pipeline, "from_orm", return_value=td.pipeline): pipeline_db_ = td.pipeline.to_orm() testing_session.add(pipeline_db_) result = await service.initialize_execution( @@ -308,12 +291,8 @@ async def test_initialize(testing_session): task = testing_session.query(dbm.PipelineExecutionTask).get(1) assert task.name == "f" assert task.job_id == 1 - assert testing_session.query(dbm.ExecutionStep).get(1).init_args == { - "a": 1 - } - assert ( - testing_session.query(dbm.ExecutionStep).get(2).init_args is None - ) + assert testing_session.query(dbm.ExecutionStep).get(1).init_args == {"a": 1} + assert testing_session.query(dbm.ExecutionStep).get(2).init_args is None @pytest.mark.parametrize( diff --git a/pipelines/tests/test_app.py b/pipelines/tests/test_app.py index 28a8affbb..c8dd35831 100644 --- a/pipelines/tests/test_app.py +++ b/pipelines/tests/test_app.py @@ -63,9 +63,7 @@ def test_add_pipeline_autogen_ids(testing_app, adjust_mock): ({"name": "bar", "version": 2}, td.pipeline_dict_2), ], ) -def test_get_pipeline( - q_params: Dict[str, str], testing_app, adjust_mock, pipeline -): +def test_get_pipeline(q_params: Dict[str, str], testing_app, adjust_mock, pipeline): """Testing get_pipeline.""" testing_app.post("/pipeline", json=pipeline) response = testing_app.get("/pipeline", params=q_params) @@ -99,9 +97,7 @@ def test_get_pipelines(testing_app, adjust_mock): ({"name": "bar", "version": 2}, td.pipeline_dict_2), ], ) -def test_delete_pipelines( - q_params: Dict[str, str], pipeline, testing_app, adjust_mock -): +def test_delete_pipelines(q_params: Dict[str, str], pipeline, testing_app, adjust_mock): """Testing delete_pipelines.""" testing_app.post("/pipeline", json=pipeline) response = testing_app.delete("/pipelines", params=q_params) @@ -263,9 +259,7 @@ def test_get_task_steps_by_id(testing_task, testing_app, testing_session): assert response.json()[0]["status"] == "pending" -def test_get_task_steps_by_id_not_found( - testing_task, testing_app, testing_session -): +def test_get_task_steps_by_id_not_found(testing_task, testing_app, testing_session): """Testing get_task_steps_by_id when there's no such task.""" step = dbm.ExecutionStep(task=testing_task, name="bar", status="pending") service.add_step(testing_session, step) diff --git a/pipelines/tests/test_execution.py b/pipelines/tests/test_execution.py index 3ade7dbad..072bb238c 100644 --- a/pipelines/tests/test_execution.py +++ b/pipelines/tests/test_execution.py @@ -33,9 +33,7 @@ def uuid_mock(): yield uuid_mock -@patch( - "pipelines.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock -) +@patch("pipelines.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock) @patch("pipelines.execution.ExecutionStep.step_execution") @pytest.mark.asyncio async def test_step_execution_with_logging( @@ -48,12 +46,8 @@ async def test_step_execution_with_logging( step_exec_mock.return_value = None pipeline_step.return_value = property_mock exec_step = td.test_exec_step - body = schemas.InputArguments.parse_obj( - {**td.exec_input_args, "result": "foo"} - ) - await exec_step.step_execution_with_logging( - body=body, producer=AIOKafkaProducer - ) + body = schemas.InputArguments.parse_obj({**td.exec_input_args, "result": "foo"}) + await exec_step.step_execution_with_logging(body=body, producer=AIOKafkaProducer) assert step_exec_mock.call_count == 1 @@ -62,24 +56,16 @@ async def test_step_execution_with_logging( "Test should be fixed - it 'blinks'. " "It passes when run separately, but fails when all tests are run." ) -@patch( - "pipelines.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock -) +@patch("pipelines.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock) @patch("pipelines.execution.ExecutionStep.send") @pytest.mark.asyncio -async def test_step_execution( - mock_send, model_url, caplog, run_in_session_mock -): +async def test_step_execution(mock_send, model_url, caplog, run_in_session_mock): """Testing step_execution.""" - property_mock = ExecStepPropertyMock.parse_obj( - {"model_url": "https://foo.com/bar"} - ) + property_mock = ExecStepPropertyMock.parse_obj({"model_url": "https://foo.com/bar"}) model_url.return_value = property_mock mock_send.return_value = None exec_step = td.test_exec_step - await exec_step.step_execution( - producer=AIOKafkaProducer, body=td.input_args_1 - ) + await exec_step.step_execution(producer=AIOKafkaProducer, body=td.input_args_1) assert mock_send.called assert caplog.messages[0] == "Step with id = 58 sent." @@ -230,9 +216,7 @@ def test_adjust_pipeline(): return_value={"bar": "http://bar.dev1.gcov.ru"}, ): td.pipeline.adjust_pipeline(td.pipeline.get_model_ids()) - assert ( - td.pipeline.meta.categories.sort() == ["text", "chart"].sort() - ) + assert td.pipeline.meta.categories.sort() == ["text", "chart"].sort() @pytest.mark.skip( diff --git a/pipelines/tests/test_http_utils.py b/pipelines/tests/test_http_utils.py index 81ca2889a..22e292cfa 100644 --- a/pipelines/tests/test_http_utils.py +++ b/pipelines/tests/test_http_utils.py @@ -25,9 +25,7 @@ def test_make_request(request_mock): ) def test_make_request_with_retry(s_effect, expected, call_count, request_mock): """Testing make_request_with_retry.""" - with patch( - "pipelines.http_utils.make_request", side_effect=s_effect - ) as req_mock: + with patch("pipelines.http_utils.make_request", side_effect=s_effect) as req_mock: assert http_utils.make_request_with_retry("", {}, start=0) == expected assert req_mock.call_count == call_count diff --git a/pipelines/tests/test_result_processing.py b/pipelines/tests/test_result_processing.py index d052c0a6b..20e11fbcb 100644 --- a/pipelines/tests/test_result_processing.py +++ b/pipelines/tests/test_result_processing.py @@ -99,9 +99,7 @@ def test_unite_geometry_objects(): category=2, children=["some_uiid_3"], ) - obj_3 = processing.GeometryObject( - id="some_uiid_3", bbox=(1, 1, 1, 1), category=3 - ) + obj_3 = processing.GeometryObject(id="some_uiid_3", bbox=(1, 1, 1, 1), category=3) res = obj_1.unite_geometry_objects([obj_1, obj_2, obj_3], id_start=1) assert res == [r_obj_1, r_obj_2, r_obj_3] @@ -130,15 +128,9 @@ def test_group_objs_by_id(): """Testing group_objs_by_id of GeometryObject.""" obj_1 = processing.GeometryObject(id=1, bbox=(1, 1, 1, 1), category="some") obj_2 = processing.GeometryObject(id=1, bbox=(1, 1, 1, 1), category="some") - obj_3 = processing.GeometryObject( - id="asd", bbox=(1, 1, 1, 1), category="some" - ) - obj_4 = processing.GeometryObject( - id="asd", bbox=(1, 1, 1, 1), category="some" - ) - res = processing.GeometryObject.group_objs_by_id( - [obj_1, obj_2, obj_3, obj_4] - ) + obj_3 = processing.GeometryObject(id="asd", bbox=(1, 1, 1, 1), category="some") + obj_4 = processing.GeometryObject(id="asd", bbox=(1, 1, 1, 1), category="some") + res = processing.GeometryObject.group_objs_by_id([obj_1, obj_2, obj_3, obj_4]) assert len(res) == 2 assert res[1] == [obj_1, obj_2] assert res["asd"] == [obj_3, obj_4] @@ -297,9 +289,7 @@ def test_merge_pipeline_leaves_data_no_files_data(): with patch( "pipelines.result_processing.get_pipeline_leaves_data", return_value=None ): - assert ( - processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None - ) + assert processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None def test_merge_pipeline_leaves_data_cannot_parse_data(): @@ -308,20 +298,14 @@ def test_merge_pipeline_leaves_data_cannot_parse_data(): "pipelines.result_processing.ModelOutput.parse_models", return_value=None ): with patch("pipelines.result_processing.get_pipeline_leaves_data"): - assert ( - processing.merge_pipeline_leaves_data(MagicMock(), "", "") - is None - ) + assert processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None def test_merge_pipeline_leaves_data_cannot_merge_data(): """Testing merge_pipeline_leaves_data when data cannot be merged.""" with patch("pipelines.result_processing.get_pipeline_leaves_data"): with patch("pipelines.result_processing.ModelOutput.parse_models"): - assert ( - processing.merge_pipeline_leaves_data(MagicMock(), "", "") - is None - ) + assert processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None def test_delete_objects(): @@ -352,9 +336,7 @@ def test_postprocess_result(): "pipelines.result_processing.http_utils.make_request_with_retry", return_value=m, ) as req_mock: - with patch( - "pipelines.result_processing.config.POSTPROCESSING_URI", "foo.com" - ): + with patch("pipelines.result_processing.config.POSTPROCESSING_URI", "foo.com"): res = processing.postprocess_result({"foo": 1}) assert res == {"foo": 42} req_mock.assert_called_once_with( diff --git a/pipelines/tests/test_schemas.py b/pipelines/tests/test_schemas.py index df147e83f..a893537b6 100644 --- a/pipelines/tests/test_schemas.py +++ b/pipelines/tests/test_schemas.py @@ -29,9 +29,7 @@ def test_next_step_args_inference(): def test_next_step_args_preprocessing(): """Testing next_step_args of InputArguments.""" args = td.input_args_1 - res = args.next_step_args( - schemas.PipelineTypes.PREPROCESSING, "zxc", {"c": 3} - ) + res = args.next_step_args(schemas.PipelineTypes.PREPROCESSING, "zxc", {"c": 3}) assert res.input == {"c": 3} assert res.input_path == args.output_path assert res.output_path == args.output_path @@ -40,9 +38,7 @@ def test_next_step_args_preprocessing(): def test_prepare_for_init_inference(): """Testing prepare_for_init of InputArguments.""" - res = td.input_args_1.prepare_for_init( - schemas.PipelineTypes.INFERENCE, "baz" - ) + res = td.input_args_1.prepare_for_init(schemas.PipelineTypes.INFERENCE, "baz") d = td.input_args_1.dict() d.update({"input_path": td.input_args_1.output_path}) expected = { @@ -54,9 +50,7 @@ def test_prepare_for_init_inference(): def test_prepare_for_init_preprocessing(): """Testing prepare_for_init of InputArguments.""" - res = td.input_args_1.prepare_for_init( - schemas.PipelineTypes.PREPROCESSING, "baz" - ) + res = td.input_args_1.prepare_for_init(schemas.PipelineTypes.PREPROCESSING, "baz") d = td.input_args_1.dict() d.update({"input_path": td.input_args_1.output_path}) expected = { @@ -176,6 +170,4 @@ def test_invalid_entity(): ], ) def test_filter_dict_by_categories(data, args, result): - assert ( - schemas.InputArguments.filter_dict_by_categories(data, args) == result - ) + assert schemas.InputArguments.filter_dict_by_categories(data, args) == result diff --git a/pipelines/tests/testing_data.py b/pipelines/tests/testing_data.py index 4dc4b273b..ba4e59133 100644 --- a/pipelines/tests/testing_data.py +++ b/pipelines/tests/testing_data.py @@ -83,8 +83,7 @@ heartbeat_db = dbm.ExecutorHeartbeat() pipeline_db_repr = ( - "" + "" ) task_db_repr = ( " None: ) with connectable.connect() as connection: - context.configure( - connection=connection, target_metadata=target_metadata - ) + context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() diff --git a/processing/processing/health_check_easy_ocr.py b/processing/processing/health_check_easy_ocr.py index b9f0b78bb..c61021727 100644 --- a/processing/processing/health_check_easy_ocr.py +++ b/processing/processing/health_check_easy_ocr.py @@ -3,7 +3,6 @@ from fastapi import HTTPException from minio.error import MinioException - from processing.utils.aiohttp_utils import send_request from processing.utils.logger import get_logger from processing.utils.minio_utils import ( @@ -48,9 +47,7 @@ async def health_check_preprocessing( await asyncio.gather( *(run_preprocessing(model_url, file, languages) for file in file_ids) ) - result = all( - check_results(file, pages) for file, pages in file_ids.items() - ) + result = all(check_results(file, pages) for file, pages in file_ids.items()) for file, pages in file_ids.items(): clear_data(file, pages) return result @@ -60,9 +57,7 @@ def is_data_prepared() -> bool: try: for file_id in file_ids: minio_client.stat_object(bucket, f"files/{file_id}/{file_id}.pdf") - minio_client.stat_object( - bucket, f"files/{file_id}/expected/1.json" - ) + minio_client.stat_object(bucket, f"files/{file_id}/expected/1.json") except MinioException: return False return True @@ -93,9 +88,7 @@ def check_results(file_id: str, pages: List[int]) -> bool: logger.error("Preprocessing works incorrect") return False except MinioException: - logger.error( - "MinioException had happened while checking easy-ocr health" - ) + logger.error("MinioException had happened while checking easy-ocr health") return False finally: test_page.close() diff --git a/processing/processing/main.py b/processing/processing/main.py index 71679bed4..8a2268b5c 100644 --- a/processing/processing/main.py +++ b/processing/processing/main.py @@ -11,9 +11,6 @@ Response, status, ) -from sqlalchemy.orm import Session -from tenant_dependency import TenantData, get_tenant_info - from processing import db, schema from processing.config import settings from processing.health_check_easy_ocr import health_check_preprocessing @@ -23,6 +20,8 @@ from processing.utils.logger import get_logger from processing.utils.minio_utils import convert_bucket_name_if_s3prefix from processing.utils.utils import map_finish_status_for_assets +from sqlalchemy.orm import Session +from tenant_dependency import TenantData, get_tenant_info logger = get_logger(__name__) app = FastAPI( @@ -62,12 +61,8 @@ def run_text_matching( ) def get_preprocessing_result( file_id: int = Path(..., example=4), - pages: Optional[Set[int]] = Query( - None, min_items=1, ge=1, example={3, 4, 1} - ), - current_tenant: str = Header( - ..., example="tenant", alias="X-Current-Tenant" - ), + pages: Optional[Set[int]] = Query(None, min_items=1, ge=1, example={3, 4, 1}), + current_tenant: str = Header(..., example="tenant", alias="X-Current-Tenant"), ) -> Response: """ Take preprocess data from MinIO for `file_id`, and return it as @@ -136,9 +131,9 @@ async def update_task_status( current_tenant: str = Header(..., alias="X-Current-Tenant"), session: Session = Depends(db.service.session_scope), ) -> Dict[str, str]: - task: Optional[ - db.models.DbPreprocessingTask - ] = db.service.get_task_by_execution_id(task_id, session) + task: Optional[db.models.DbPreprocessingTask] = db.service.get_task_by_execution_id( + task_id, session + ) if task is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="No such task" @@ -149,8 +144,8 @@ async def update_task_status( task.file_id, task.batch_id, session ) if finished: - assets_status: schema.PreprocessingStatus = ( - map_finish_status_for_assets(file_status) + assets_status: schema.PreprocessingStatus = map_finish_status_for_assets( + file_status ) await PreprocessingTask.update_file_statuses( [task.file_id], assets_status, current_tenant, token_data.token diff --git a/processing/processing/schema.py b/processing/processing/schema.py index 80f1711d3..e1f1fe106 100644 --- a/processing/processing/schema.py +++ b/processing/processing/schema.py @@ -58,9 +58,7 @@ class Page(BaseModel): size: PageSize objs: List[Dict[str, Any]] = Field( ..., - example=[ - {"id": 1, "bbox": [1, 2, 3, 4], "category": "1", "text": "string"} - ], + example=[{"id": 1, "bbox": [1, 2, 3, 4], "category": "1", "text": "string"}], ) diff --git a/processing/processing/send_preprocess_results.py b/processing/processing/send_preprocess_results.py index 955807047..ab3d026de 100644 --- a/processing/processing/send_preprocess_results.py +++ b/processing/processing/send_preprocess_results.py @@ -4,7 +4,6 @@ from fastapi import HTTPException from minio.error import MinioException - from processing.utils.logger import get_logger from processing.utils.minio_utils import MinioCommunicator @@ -41,10 +40,7 @@ def get_pages(bucket: str, path: str, pages: Optional[Set[int]]) -> Set[int]: raise HTTPException(status_code=400, detail=str(err)) return set( - ( - page.object_name.rsplit("/", maxsplit=1)[-1][:-5] - for page in pages_in_minio - ) + (page.object_name.rsplit("/", maxsplit=1)[-1][:-5] for page in pages_in_minio) ) diff --git a/processing/processing/tasks.py b/processing/processing/tasks.py index ac5b17cec..ea566cf9a 100644 --- a/processing/processing/tasks.py +++ b/processing/processing/tasks.py @@ -8,8 +8,6 @@ from cache import AsyncTTL from fastapi import HTTPException, status -from sqlalchemy.orm import Session - from processing.config import settings from processing.schema import PreprocessingStatus, Status from processing.utils.aiohttp_utils import send_request @@ -20,6 +18,7 @@ get_model_url, split_iterable, ) +from sqlalchemy.orm import Session logger = get_logger(__name__) @@ -95,9 +94,7 @@ def __init__( async def _execute(self) -> None: logger.info("Fetch data from assets %s", self) - files_data, _ = await get_files_data( - self.file_ids, self.tenant, self.jw_token - ) + files_data, _ = await get_files_data(self.file_ids, self.tenant, self.jw_token) logger.debug(files_data) logger.info("Execute pipeline %s", self) await execute_pipeline( @@ -125,9 +122,7 @@ async def update_file_statuses( for id_ in ids: body = {"file": id_, "status": task_status} task = asyncio.create_task( - send_request( - "PUT", url=settings.assets_url, json=body, headers=headers - ) + send_request("PUT", url=settings.assets_url, json=body, headers=headers) ) tasks.append(task) @@ -139,9 +134,7 @@ def prepare_data_for_pipelines( ) -> Iterator[FilesData]: for file_data in files_data: - file_data["output_path"] = str( - Path(file_data["path"]).parent / "ocr" - ) + file_data["output_path"] = str(Path(file_data["path"]).parent / "ocr") if file_data["pages"] <= settings.pages_per_batch: file_data["pages"] = list(range(1, file_data["pages"] + 1)) diff --git a/processing/processing/text_merge.py b/processing/processing/text_merge.py index 2858d9fb5..a3d06691f 100644 --- a/processing/processing/text_merge.py +++ b/processing/processing/text_merge.py @@ -11,7 +11,6 @@ from fastapi import HTTPException from minio.error import MinioException - from processing import schema from processing.schema import AnnotationData, MatchedPage, Page, ParagraphBbox from processing.third_party_code.box_util import stitch_boxes_into_lines @@ -175,9 +174,7 @@ def merge_words_to_paragraph(request_data: AnnotationData) -> AnnotationData: matched_pages: List[MatchedPage] = [] for page in request_data.input.pages: preprocessed_page = convert_points_to_pixels( - page=json.loads( - (ocr_path / f"{page.page_num}.json").read_text() - ), + page=json.loads((ocr_path / f"{page.page_num}.json").read_text()), new_width=page.size.width, new_height=page.size.height, ) diff --git a/processing/processing/third_party_code/table.py b/processing/processing/third_party_code/table.py index 7bf5e6939..df6932b34 100644 --- a/processing/processing/third_party_code/table.py +++ b/processing/processing/third_party_code/table.py @@ -40,9 +40,7 @@ def merge(self, bb: BorderBox) -> BorderBox: bottom_right_y=max(self.bottom_right_y, bb.bottom_right_y), ) - def box_is_inside_another( - self, bb2: BorderBox, threshold: float = 0.9 - ) -> bool: + def box_is_inside_another(self, bb2: BorderBox, threshold: float = 0.9) -> bool: ( intersection_area, bb1_area, @@ -50,13 +48,9 @@ def box_is_inside_another( ) = self.get_boxes_intersection_area(other_box=bb2) if intersection_area == 0: return False - return any( - (intersection_area / bb) > threshold for bb in (bb1_area, bb2_area) - ) + return any((intersection_area / bb) > threshold for bb in (bb1_area, bb2_area)) - def box_is_inside_box( - self, bb2: BorderBox, threshold: float = 0.95 - ) -> bool: + def box_is_inside_box(self, bb2: BorderBox, threshold: float = 0.95) -> bool: ( intersection_area, bb1_area, diff --git a/processing/processing/utils/aiohttp_utils.py b/processing/processing/utils/aiohttp_utils.py index 9f7abec55..9a3719e3a 100644 --- a/processing/processing/utils/aiohttp_utils.py +++ b/processing/processing/utils/aiohttp_utils.py @@ -4,15 +4,12 @@ import aiohttp from aiohttp import ContentTypeError from fastapi import HTTPException - from processing.config import settings from processing.utils.logger import get_logger logger = get_logger(__name__) -Response = NamedTuple( - "Response", [("status_code", int), ("json", Dict[Any, Any])] -) +Response = NamedTuple("Response", [("status_code", int), ("json", Dict[Any, Any])]) async def send_request(method: str, url: str, **kwargs: Any) -> Response: @@ -22,9 +19,7 @@ async def send_request(method: str, url: str, **kwargs: Any) -> Response: ) logger.info("Send request to %s. %s, %s", url, method, kwargs) for attempt in range(settings.retry_attempts): - async with http_session.request( - method=method, url=url, **kwargs - ) as resp: + async with http_session.request(method=method, url=url, **kwargs) as resp: if resp.status in settings.retry_statuses: logger.error("Bad status code: %s from %s", resp.status, url) if attempt != settings.retry_attempts - 1: diff --git a/processing/processing/utils/logger.py b/processing/processing/utils/logger.py index de0dcebb7..54ec95a64 100644 --- a/processing/processing/utils/logger.py +++ b/processing/processing/utils/logger.py @@ -10,8 +10,7 @@ "formatters": { "default": { "()": "uvicorn.logging.DefaultFormatter", - "fmt": "[%(asctime)s] - [%(name)s] - " - "[%(levelname)s] - %(message)s", + "fmt": "[%(asctime)s] - [%(name)s] - " "[%(levelname)s] - %(message)s", "datefmt": "%d-%m-%Y %H:%M:%S", }, }, diff --git a/processing/processing/utils/minio_utils.py b/processing/processing/utils/minio_utils.py index 166fcf5e1..130438d75 100644 --- a/processing/processing/utils/minio_utils.py +++ b/processing/processing/utils/minio_utils.py @@ -1,6 +1,5 @@ from minio import Minio from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider - from processing.config import settings from processing.utils.logger import get_logger @@ -33,20 +32,14 @@ def create_minio_config(): elif settings.s3_credentials_provider == "aws_config": # environmental variable AWS_PROFILE_NAME should be set minio_config.update( - { - "credentials": AWSConfigProvider( - profile=settings.aws_profile_name - ) - } + {"credentials": AWSConfigProvider(profile=settings.aws_profile_name)} ) else: raise NotConfiguredException( "s3 connection is not properly configured - " "s3_credentials_provider is not set" ) - logger.info( - f"S3_Credentials provider - {settings.s3_credentials_provider}" - ) + logger.info(f"S3_Credentials provider - {settings.s3_credentials_provider}") return minio_config diff --git a/processing/processing/utils/utils.py b/processing/processing/utils/utils.py index b9753cd50..9a40cc474 100644 --- a/processing/processing/utils/utils.py +++ b/processing/processing/utils/utils.py @@ -2,12 +2,11 @@ from urllib.parse import urljoin from cache import AsyncTTL -from sqlalchemy.orm import Session - from processing import db, schema from processing.config import settings from processing.utils.aiohttp_utils import send_request from processing.utils.logger import get_log_exception_msg, get_logger +from sqlalchemy.orm import Session logger = get_logger(__name__) T = TypeVar("T") @@ -19,9 +18,7 @@ def get_internal_url(url: str) -> str: def split_iterable(list_a: List[T], chunk_size: int) -> List[List[T]]: """Splits a list passed in chunks with no more, than elements""" - return [ - list_a[x : chunk_size + x] for x in range(0, len(list_a), chunk_size) - ] + return [list_a[x : chunk_size + x] for x in range(0, len(list_a), chunk_size)] @AsyncTTL(time_to_live=60 * 5, maxsize=8) @@ -52,16 +49,12 @@ async def get_files_data( Returns list of dictionaries with data for each file with ids passed in request_body""" elements_per_page_in_dataset_manager = 100 - splatted_files_ids = split_iterable( - files_ids, elements_per_page_in_dataset_manager - ) + splatted_files_ids = split_iterable(files_ids, elements_per_page_in_dataset_manager) all_files_data = [] for batch in splatted_files_ids: params = { "pagination": { - "page_num": len(files_ids) - // elements_per_page_in_dataset_manager - + 1, + "page_num": len(files_ids) // elements_per_page_in_dataset_manager + 1, "page_size": elements_per_page_in_dataset_manager, }, "filters": [{"field": "id", "operator": "in", "value": batch}], diff --git a/processing/tests/integration/test_integration.py b/processing/tests/integration/test_integration.py index b6bcbe7a3..277f2938f 100644 --- a/processing/tests/integration/test_integration.py +++ b/processing/tests/integration/test_integration.py @@ -57,9 +57,7 @@ def preprocessing_url(module_scoped_container_getter): request_session = requests.Session() retries = Retry(total=5, backoff_factor=1) request_session.mount("http://", HTTPAdapter(max_retries=retries)) - service = module_scoped_container_getter.get("preprocessing").network_info[ - 0 - ] + service = module_scoped_container_getter.get("preprocessing").network_info[0] api_url = f"http://{service.hostname}:{service.host_port}" return api_url @@ -97,11 +95,11 @@ def file_id(minio_client): return file_id -@pytest.mark.skip("Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests.") +@pytest.mark.skip( + "Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests." +) def test_minio_ok(minio_url, minio_client, file_id): - objs = minio_client.list_objects( - BUCKET, f"files/{file_id}", recursive=True - ) + objs = minio_client.list_objects(BUCKET, f"files/{file_id}", recursive=True) file_names = [i.object_name for i in objs] assert set(file_names) == { "files/52/52.pdf", @@ -110,14 +108,18 @@ def test_minio_ok(minio_url, minio_client, file_id): } -@pytest.mark.skip("Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests.") +@pytest.mark.skip( + "Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests." +) def test_url(minio_url, processing_url, preprocessing_url): assert "0.0.0.0:9000" in str(minio_url) assert "0.0.0.0:8080" in str(processing_url) assert "0.0.0.0:65432" in str(preprocessing_url) -@pytest.mark.skip("Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests.") +@pytest.mark.skip( + "Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests." +) def test_get_preprocessing_results_all_pages(processing_url, file_id): response = requests.get( url=processing_url.rstrip("/") + f"/tokens/{file_id}", @@ -127,7 +129,9 @@ def test_get_preprocessing_results_all_pages(processing_url, file_id): assert response.json() == [json.load(file1), json.load(file2)] -@pytest.mark.skip("Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests.") +@pytest.mark.skip( + "Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests." +) def test_get_preprocessing_results_some_pages(processing_url, file_id): response = requests.get( url=processing_url.rstrip("/") + f"/tokens/{file_id}", @@ -140,7 +144,9 @@ def test_get_preprocessing_results_some_pages(processing_url, file_id): assert response.json() == [file2_json] -@pytest.mark.skip("Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests.") +@pytest.mark.skip( + "Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests." +) def test_send_request_to_preprocessing( preprocessing_url, processing_url, minio_client, monkeypatch ): @@ -153,9 +159,7 @@ def test_send_request_to_preprocessing( assert response.status_code == 202 sleep(1) - objs = set( - minio_client.list_objects(BUCKET, "files/1/ocr", recursive=True) - ) + objs = set(minio_client.list_objects(BUCKET, "files/1/ocr", recursive=True)) assert set((i.object_name for i in objs)) == { "files/1/ocr/2.json", "files/1/ocr/1.json", diff --git a/processing/tests/test_text_merge.py b/processing/tests/test_text_merge.py index 512bd6c9e..047ffcec2 100644 --- a/processing/tests/test_text_merge.py +++ b/processing/tests/test_text_merge.py @@ -74,9 +74,7 @@ def test_match_empty_annotations(self): {"type": "text", "bbox": (3, 3, 8, 8), "text": "1 2 3"}, ], } - page = Page( - page_num=1, size=PageSize(width=1000, height=1000), objs=[] - ) + page = Page(page_num=1, size=PageSize(width=1000, height=1000), objs=[]) assert match_page(words=words, page=page) == MatchedPage( page_num=1, paragraph_bboxes={} ) @@ -230,12 +228,8 @@ def test_download(self, _1, tmp_path): bucket="some_bucket", input=Input( pages=[ - Page( - page_num=1, size=PageSize(width=10, height=10), objs=[] - ), - Page( - page_num=2, size=PageSize(width=10, height=10), objs=[] - ), + Page(page_num=1, size=PageSize(width=10, height=10), objs=[]), + Page(page_num=2, size=PageSize(width=10, height=10), objs=[]), ] ), ) diff --git a/processing/tests/test_utils/test_utils.py b/processing/tests/test_utils/test_utils.py index 4f8422c58..339ea6617 100644 --- a/processing/tests/test_utils/test_utils.py +++ b/processing/tests/test_utils/test_utils.py @@ -125,10 +125,7 @@ def test_positive_get_files_data_from_separate_files(jw_token): [1, 2], ) - assert ( - utils.get_files_data([1, 2], "test_tenant", jw_token) - == expected_result - ) + assert utils.get_files_data([1, 2], "test_tenant", jw_token) == expected_result @pytest.mark.skip @@ -177,9 +174,7 @@ def test_get_files_data_from_separate_files_100_elements(jw_token): json=large_mock_files_data, status=200, ) - assert utils.get_files_data( - list(range(1, 101)), "test_tenant", jw_token - ) == ( + assert utils.get_files_data(list(range(1, 101)), "test_tenant", jw_token) == ( large_mock_files_data["data"], list(range(1, 101)), ) @@ -288,9 +283,7 @@ def test_get_files_data_from_separate_files_101_elements(jw_token): } for i in range(1, 102) ] - assert utils.get_files_data( - list(range(1, 102)), "test_tenant", jw_token - ) == ( + assert utils.get_files_data(list(range(1, 102)), "test_tenant", jw_token) == ( expected_files_data, list(range(1, 102)), ) @@ -400,9 +393,7 @@ def test_get_files_data_from_separate_files_111_elements(jw_token): }, status=200, ) - assert utils.get_files_data( - list(range(1, 111)), "test_tenant", jw_token - ) == ( + assert utils.get_files_data(list(range(1, 111)), "test_tenant", jw_token) == ( expected_files_data, list(range(1, 111)), ) @@ -414,9 +405,7 @@ def test_get_files_data_from_separate_files_111_elements(jw_token): def test_get_files_data_from_separate_files_501_code(jw_token): request_body = { "pagination": {"page_num": 1, "page_size": 15}, - "filters": [ - {"field": "id", "operator": "eq", "value": "some invalid file id"} - ], + "filters": [{"field": "id", "operator": "eq", "value": "some invalid file id"}], "sorting": [{"field": "id", "direction": "asc"}], } responses.add( @@ -436,9 +425,7 @@ def test_get_files_data_from_separate_files_501_code(jw_token): # --------------------- TESTING execute_pipeline ------------------------- @pytest.mark.skip @responses.activate -def test_execute_pipeline_negative( - jw_token, files_data_for_pipeline, db_test_session -): +def test_execute_pipeline_negative(jw_token, files_data_for_pipeline, db_test_session): responses.add( responses.POST, diff --git a/scheduler/alembic/env.py b/scheduler/alembic/env.py index f09d345e8..e865ca545 100644 --- a/scheduler/alembic/env.py +++ b/scheduler/alembic/env.py @@ -1,11 +1,12 @@ -from logging import config as logging_config import os +from logging import config as logging_config + import sqlalchemy -from alembic import context +from scheduler import config as scheduler_config from scheduler.db import models from scheduler.db.service import get_test_db_url -from scheduler import config as scheduler_config +from alembic import context # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -70,9 +71,7 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure( - connection=connection, target_metadata=target_metadata - ) + context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() diff --git a/scheduler/alembic/versions/0cadbdb7f0ea_.py b/scheduler/alembic/versions/0cadbdb7f0ea_.py index 9cfa6eac5..f0a0ada17 100644 --- a/scheduler/alembic/versions/0cadbdb7f0ea_.py +++ b/scheduler/alembic/versions/0cadbdb7f0ea_.py @@ -6,9 +6,10 @@ """ import sqlalchemy as sa -from alembic import op from sqlalchemy.dialects import postgresql +from alembic import op + # revision identifiers, used by Alembic. revision = "0cadbdb7f0ea" down_revision = "556834bc19db" @@ -24,9 +25,7 @@ def upgrade() -> None: sa.Column("last_heartbeat", sa.DateTime(), nullable=False), sa.PrimaryKeyConstraint("id"), ) - op.add_column( - "units", sa.Column("runner_id", postgresql.UUID(), nullable=True) - ) + op.add_column("units", sa.Column("runner_id", postgresql.UUID(), nullable=True)) # ### end Alembic commands ### diff --git a/scheduler/alembic/versions/449be82736bd_.py b/scheduler/alembic/versions/449be82736bd_.py index baddc1db8..57e2d1924 100644 --- a/scheduler/alembic/versions/449be82736bd_.py +++ b/scheduler/alembic/versions/449be82736bd_.py @@ -6,6 +6,7 @@ """ import sqlalchemy as sa + from alembic import op # revision identifiers, used by Alembic. @@ -17,9 +18,7 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "units", sa.Column("response_topic", sa.String(), nullable=True) - ) + op.add_column("units", sa.Column("response_topic", sa.String(), nullable=True)) # ### end Alembic commands ### diff --git a/scheduler/scheduler/app.py b/scheduler/scheduler/app.py index 99c8b402f..136b02670 100644 --- a/scheduler/scheduler/app.py +++ b/scheduler/scheduler/app.py @@ -3,15 +3,12 @@ import tenant_dependency from fastapi import Depends, FastAPI, Header, HTTPException, status - from scheduler import config, heartbeat, kafka_utils, log, runner, schemas from scheduler.db import service logger = log.get_logger(__name__) -tenant = tenant_dependency.get_tenant_info( - url=config.KEYCLOAK_URI, algorithm="RS256" -) +tenant = tenant_dependency.get_tenant_info(url=config.KEYCLOAK_URI, algorithm="RS256") app = FastAPI( title="Scheduler", diff --git a/scheduler/scheduler/db/models.py b/scheduler/scheduler/db/models.py index da6c2000f..720f2880e 100644 --- a/scheduler/scheduler/db/models.py +++ b/scheduler/scheduler/db/models.py @@ -23,9 +23,7 @@ class Unit(Base): # type: ignore created = sqlalchemy.Column( sqlalchemy.DateTime, nullable=False, default=datetime.datetime.utcnow ) - updated = sqlalchemy.Column( - sqlalchemy.DateTime, onupdate=datetime.datetime.utcnow - ) + updated = sqlalchemy.Column(sqlalchemy.DateTime, onupdate=datetime.datetime.utcnow) def __repr__(self) -> str: return ( @@ -49,9 +47,7 @@ def as_dict(self) -> Dict[str, Any]: class Heartbeat(Base): # type: ignore __tablename__ = "heartbeat" - id = sqlalchemy.Column( - postgresql.UUID(), primary_key=True, default=uuid.uuid4 - ) + id = sqlalchemy.Column(postgresql.UUID(), primary_key=True, default=uuid.uuid4) last_heartbeat = sqlalchemy.Column( sqlalchemy.DateTime, nullable=False, default=datetime.datetime.utcnow ) diff --git a/scheduler/scheduler/db/service.py b/scheduler/scheduler/db/service.py index 51b83ceb4..eb2db3566 100644 --- a/scheduler/scheduler/db/service.py +++ b/scheduler/scheduler/db/service.py @@ -2,14 +2,11 @@ from typing import Any, Dict, List, Union import sqlalchemy -from sqlalchemy import orm - from scheduler import config, unit from scheduler.db import models +from sqlalchemy import orm -engine = sqlalchemy.create_engine( - config.DB_URL, pool_size=int(config.POOL_SIZE) -) +engine = sqlalchemy.create_engine(config.DB_URL, pool_size=int(config.POOL_SIZE)) Session = orm.sessionmaker(bind=engine, expire_on_commit=False) @@ -51,9 +48,7 @@ def get_expired_heartbeats( ) -def get_not_finished_units( - session: orm.Session, runner_id: str -) -> List[models.Unit]: +def get_not_finished_units(session: orm.Session, runner_id: str) -> List[models.Unit]: """Get units with statuses 'RECEIVED' and 'IN_PROGRESS' with the given runner_id. """ @@ -75,9 +70,9 @@ def change_unit_runner_id_in_lock(session: orm.Session, id_: str) -> None: change status to 'RECEIVED' with 'for update' statement. """ args = {"runner_id": None, "status": unit.UnitStatus.RECEIVED} - session.query(models.Unit).filter( - models.Unit.id == id_ - ).with_for_update().update(args) + session.query(models.Unit).filter(models.Unit.id == id_).with_for_update().update( + args + ) def delete_instances(session: orm.Session, objs: models.TablesList) -> None: diff --git a/scheduler/scheduler/heartbeat.py b/scheduler/scheduler/heartbeat.py index 1fbf94e3e..617fa5d8f 100644 --- a/scheduler/scheduler/heartbeat.py +++ b/scheduler/scheduler/heartbeat.py @@ -3,10 +3,9 @@ import random from aiokafka import AIOKafkaProducer -from sqlalchemy import orm - from scheduler import config, log, runner from scheduler.db import models, service +from sqlalchemy import orm logger = log.get_logger(__name__) @@ -19,9 +18,7 @@ def expire_date() -> datetime.datetime: return datetime.datetime.utcnow() - heartbeat_threshold -def manage_expired_runners( - session: orm.Session, producer: AIOKafkaProducer -) -> None: +def manage_expired_runners(session: orm.Session, producer: AIOKafkaProducer) -> None: """Get expired heartbeats, remove runner_id and change status to 'RECEIVED' from corresponding units. Remove expired heartbeats from db. Runs unfinished units if there are any. @@ -29,13 +26,9 @@ def manage_expired_runners( expired_heartbeats = service.get_expired_heartbeats(session, expire_date()) for expired_heartbeat in expired_heartbeats: runner_id_ = expired_heartbeat.id - not_finished_units = service.get_not_finished_units( - session, runner_id_ - ) + not_finished_units = service.get_not_finished_units(session, runner_id_) for not_finished_unit in not_finished_units: - service.change_unit_runner_id_in_lock( - session, not_finished_unit.id - ) + service.change_unit_runner_id_in_lock(session, not_finished_unit.id) runner.run_orm_unit(producer, not_finished_unit) service.delete_instances(session, expired_heartbeats) @@ -59,7 +52,5 @@ async def heartbeat(producer: AIOKafkaProducer) -> None: with service.Session.begin() as session: service.update_heartbeat_timestamp(session, runner.runner_id) manage_expired_runners(session, producer) - sleep_time_after_heartbeat = ( - time_to_sleep - sleep_time_before_heartbeat - ) + sleep_time_after_heartbeat = time_to_sleep - sleep_time_before_heartbeat await asyncio.sleep(sleep_time_after_heartbeat) diff --git a/scheduler/scheduler/runner.py b/scheduler/scheduler/runner.py index 6d8c39791..8627fc497 100644 --- a/scheduler/scheduler/runner.py +++ b/scheduler/scheduler/runner.py @@ -2,7 +2,6 @@ import uuid import aiokafka - from scheduler import exceptions, log, unit from scheduler.db import models @@ -10,9 +9,7 @@ runner_id: str = str(uuid.uuid4()) -async def fetch_and_send( - producer: aiokafka.AIOKafkaProducer, unit_: unit.Unit -) -> None: +async def fetch_and_send(producer: aiokafka.AIOKafkaProducer, unit_: unit.Unit) -> None: """Perform request to the url and send the result to the response topic if the response topic is specified. @@ -41,9 +38,7 @@ async def fetch_and_send( ) -def run_orm_unit( - producer: aiokafka.AIOKafkaProducer, orm_unit: models.Unit -) -> None: +def run_orm_unit(producer: aiokafka.AIOKafkaProducer, orm_unit: models.Unit) -> None: unit_ = unit.Unit.from_orm(orm_unit) asyncio.create_task(fetch_and_send(producer, unit_)) diff --git a/scheduler/tests/test_heartbeat.py b/scheduler/tests/test_heartbeat.py index b035c7f06..465306a33 100644 --- a/scheduler/tests/test_heartbeat.py +++ b/scheduler/tests/test_heartbeat.py @@ -22,15 +22,11 @@ def test_expire_date(): def test_manage_expired_runners(testing_session): """Testing manage_expired_runners.""" id_ = str(uuid.uuid4()) - unit_1 = models.Unit( - id="unit_1_id", runner_id=id_, status=unit.UnitStatus.DONE - ) + unit_1 = models.Unit(id="unit_1_id", runner_id=id_, status=unit.UnitStatus.DONE) unit_2 = models.Unit( id="unit_2_id", runner_id=id_, status=unit.UnitStatus.IN_PROGRESS ) - heartbeat_ = models.Heartbeat( - id=id_, last_heartbeat=datetime.datetime(2022, 1, 1) - ) + heartbeat_ = models.Heartbeat(id=id_, last_heartbeat=datetime.datetime(2022, 1, 1)) testing_session.add_all([unit_1, unit_2, heartbeat_]) with mock.patch("scheduler.runner.run_orm_unit"): diff --git a/scheduler/tests/test_service.py b/scheduler/tests/test_service.py index 31a32cf22..16f8c4ae4 100644 --- a/scheduler/tests/test_service.py +++ b/scheduler/tests/test_service.py @@ -14,9 +14,7 @@ def test_get_unit_by_id(testing_session, testing_unit_instance): testing_session.add(testing_unit_instance) testing_session.flush() testing_session.commit() - instance = service.get_unit_by_id( - testing_session, testing_unit_instance.id - ) + instance = service.get_unit_by_id(testing_session, testing_unit_instance.id) assert isinstance(instance, models.Unit) assert instance.id == testing_unit_instance.id diff --git a/search/search/config.py b/search/search/config.py index db6bba644..5d6027e7d 100644 --- a/search/search/config.py +++ b/search/search/config.py @@ -68,9 +68,7 @@ def annotation_categories_search_url(self) -> str: @property def jobs_search_url(self) -> str: - return "/".join( - (self.jobs_url.rstrip("/"), self.jobs_search.lstrip("/")) - ) + return "/".join((self.jobs_url.rstrip("/"), self.jobs_search.lstrip("/"))) class Config: env_file: str = find_dotenv(".env") diff --git a/search/search/es.py b/search/search/es.py index 1905b07f3..6140474e8 100644 --- a/search/search/es.py +++ b/search/search/es.py @@ -3,7 +3,6 @@ import aiohttp from elasticsearch import AsyncElasticsearch from elasticsearch.exceptions import NotFoundError, RequestError - from search.config import settings INDEX_SETTINGS = { @@ -42,14 +41,10 @@ class NoCategory(NoSuchTenant): pass -async def prepare_index( - es_instance: AsyncElasticsearch, index_name: str -) -> None: +async def prepare_index(es_instance: AsyncElasticsearch, index_name: str) -> None: if not await es_instance.indices.exists(index=index_name): try: - await es_instance.indices.create( - index=index_name, body=INDEX_SETTINGS - ) + await es_instance.indices.create(index=index_name, body=INDEX_SETTINGS) except RequestError as exc: if exc.error == "resource_already_exists_exception": pass @@ -135,22 +130,16 @@ async def build_query( terms_filter = {"terms": {"category": categories_ids}} query["query"]["bool"]["filter"].append(terms_filter) for parameter, value in search_parameters.items(): - query["query"]["bool"]["filter"].append( - {"term": {parameter: {"value": value}}} - ) + query["query"]["bool"]["filter"].append({"term": {parameter: {"value": value}}}) return query -async def add_child_categories( - category_id: str, tenant: str, token: str -) -> List[str]: +async def add_child_categories(category_id: str, tenant: str, token: str) -> List[str]: """Helper function which makes GET request into "annotation" service endpoint and returns list of provided category_id with ids of all subcategories from endpoint's response. """ - child_category_url = ( - f"{settings.annotation_categories_url}/{category_id}/child" - ) + child_category_url = f"{settings.annotation_categories_url}/{category_id}/child" header = {"X-Current-Tenant": tenant, "Authorization": f"Bearer {token}"} try: diff --git a/search/search/harvester.py b/search/search/harvester.py index d29ff3898..53ae27e25 100644 --- a/search/search/harvester.py +++ b/search/search/harvester.py @@ -2,11 +2,10 @@ from typing import Iterator, Optional import boto3 -from botocore.errorfactory import ClientError -from elasticsearch import helpers - import search.es as es import search.schemas as schemas +from botocore.errorfactory import ClientError +from elasticsearch import helpers from search.config import settings from search.logger import logger @@ -40,8 +39,7 @@ def create_boto3_config(): "s3 connection is not properly configured " "- s3_credentials_provider is not set" ) - logger.info( - f"S3_Credentials provider - {settings.s3_credentials_provider}") + logger.info(f"S3_Credentials provider - {settings.s3_credentials_provider}") return boto3_config @@ -89,9 +87,7 @@ def prepare_es_document( es_document["category"] = document["category"] es_document["bbox"] = document.get("bbox") es_document["tokens"] = document.get("tokens") - return schemas.pieces.GeomObject.parse_obj( - es_document - ) # for input data validation + return schemas.pieces.GeomObject.parse_obj(es_document) # for input data validation def extract_manifest_data( @@ -171,7 +167,5 @@ async def old_pieces_cleaner( async def start_harvester( tenant: str, job_id: int, file_id: Optional[int] = None ) -> None: - await helpers.async_bulk( - es.ES, old_pieces_cleaner(tenant, job_id, file_id) - ) + await helpers.async_bulk(es.ES, old_pieces_cleaner(tenant, job_id, file_id)) await helpers.async_bulk(es.ES, harvester(tenant, job_id, file_id)) diff --git a/search/search/main.py b/search/search/main.py index 86185784f..4d9394a92 100644 --- a/search/search/main.py +++ b/search/search/main.py @@ -2,15 +2,14 @@ from typing import Optional import fastapi -from botocore.exceptions import BotoCoreError -from elasticsearch.exceptions import ElasticsearchException -from tenant_dependency import TenantData, get_tenant_info - import search.es as es import search.harvester as harvester import search.kafka_listener as kafka_listener import search.schemas as schemas +from botocore.exceptions import BotoCoreError +from elasticsearch.exceptions import ElasticsearchException from search.config import settings +from tenant_dependency import TenantData, get_tenant_info tags = [ { @@ -19,9 +18,7 @@ }, ] -TOKEN = get_tenant_info( - url=settings.keycloak_url, algorithm=settings.jwt_algorithm -) +TOKEN = get_tenant_info(url=settings.keycloak_url, algorithm=settings.jwt_algorithm) app = fastapi.FastAPI( title=settings.app_title, @@ -62,9 +59,7 @@ def elastic_exception_handler_es_error( @app.exception_handler(BotoCoreError) -def minio_exception_handler_bc_error( - request: fastapi.Request, exc: BotoCoreError -): +def minio_exception_handler_bc_error(request: fastapi.Request, exc: BotoCoreError): return fastapi.responses.JSONResponse( status_code=500, content={"detail": f"Error: connection error ({exc})"}, @@ -184,8 +179,6 @@ async def search_facets( ) -> schemas.facets.FacetsResponse: query = request.build_es_query() elastic_response = await es.ES.search(index=x_current_tenant, body=query) - response = schemas.facets.FacetsResponse.parse_es_response( - elastic_response - ) + response = schemas.facets.FacetsResponse.parse_es_response(elastic_response) await response.adjust_facet_result(x_current_tenant, token.token) return response diff --git a/search/search/schemas/facets.py b/search/search/schemas/facets.py index f6c16d8e3..ae1bc6762 100644 --- a/search/search/schemas/facets.py +++ b/search/search/schemas/facets.py @@ -2,9 +2,8 @@ from typing import Any, Dict, List, Optional, Tuple, Union import aiocache.serializers -from pydantic import BaseModel, Field - import search.common_utils as utils +from pydantic import BaseModel, Field from search.config import settings from search.es import INDEX_SETTINGS, fetch @@ -52,13 +51,9 @@ def apply_filter(self, query: Dict[str, Any]) -> Dict[str, Any]: continue if self.operator == FacetOperator.IN: - facet_body["filter"]["bool"]["must"].append( - self.filter_template - ) + facet_body["filter"]["bool"]["must"].append(self.filter_template) if self.operator == FacetOperator.NOT_IN: - facet_body["filter"]["bool"]["must_not"].append( - self.filter_template - ) + facet_body["filter"]["bool"]["must_not"].append(self.filter_template) return query @@ -81,9 +76,7 @@ def facet_template(self) -> Dict[str, Any]: self.name: { "filter": {"bool": {"must": [], "must_not": []}}, "aggs": { - self.name: { - "terms": {"field": self.name, "size": self.limit} - } + self.name: {"terms": {"field": self.name, "size": self.limit}} }, } } @@ -95,12 +88,8 @@ class FacetsRequest(BaseModel): description="*Match query in a text type field*", example="Elasticsearch", ) - facets: List[FacetParams] = Field( - description="*An array for ES aggregations*" - ) - filters: Optional[List[FilterParams]] = Field( - description="*Filters for facets*" - ) + facets: List[FacetParams] = Field(description="*An array for ES aggregations*") + filters: Optional[List[FilterParams]] = Field(description="*Filters for facets*") def _build_facets(self, query: Dict[str, Any]) -> Dict[str, Any]: for facet in self.facets: @@ -137,26 +126,18 @@ def build_es_query(self) -> Dict[str, Any]: class AggResult(BaseModel): - id: Union[int, str] = Field( - description="*Aggregation key id*", example="Header" - ) + id: Union[int, str] = Field(description="*Aggregation key id*", example="Header") count: int = Field(description="*Count of aggregated docs*", example=10) name: Optional[str] = Field(description="*A name of a category or a job*") @staticmethod def parse_es_agg_doc(es_doc: Dict[str, Any]) -> "AggResult": - return AggResult( - id=es_doc.get("key", ""), count=es_doc.get("doc_count", 0) - ) + return AggResult(id=es_doc.get("key", ""), count=es_doc.get("doc_count", 0)) class FacetBodyResponse(BaseModel): - name: str = Field( - description="*A name of aggregation*", example="category" - ) - values: List[AggResult] = Field( - description="*An array aggregation results*" - ) + name: str = Field(description="*A name of aggregation*", example="category") + values: List[AggResult] = Field(description="*An array aggregation results*") async def adjust_facet(self, tenant: str, token: str) -> None: if self.name not in settings.computed_fields: diff --git a/search/search/schemas/pieces.py b/search/search/schemas/pieces.py index d2931b2d5..f4a95d49a 100644 --- a/search/search/schemas/pieces.py +++ b/search/search/schemas/pieces.py @@ -6,7 +6,6 @@ from typing import Any, Dict, List, Optional, Union import pydantic - import search.common_utils as utils import search.es as es @@ -19,9 +18,7 @@ def pieces_condition(properties: Dict[str, Any]) -> List[str]: return [ - el - for el in properties - if properties[el].get("type") not in __excluded_types + el for el in properties if properties[el].get("type") not in __excluded_types ] @@ -32,23 +29,15 @@ def pieces_condition(properties: Dict[str, Any]) -> List[str]: class GeomObject(pydantic.BaseModel): category: str = pydantic.Field(..., example="Header") content: str = pydantic.Field(..., example="ElasticSearch") - document_id: pydantic.conint(ge=1) = pydantic.Field( - ..., example=1 - ) # type: ignore - page_number: pydantic.conint(ge=1) = pydantic.Field( - ..., example=1 - ) # type: ignore - bbox: Optional[ - pydantic.conlist(float, min_items=4, max_items=4) - ] = pydantic.Field( + document_id: pydantic.conint(ge=1) = pydantic.Field(..., example=1) # type: ignore + page_number: pydantic.conint(ge=1) = pydantic.Field(..., example=1) # type: ignore + bbox: Optional[pydantic.conlist(float, min_items=4, max_items=4)] = pydantic.Field( None, example=[1.5, 1.5, 1.5, 1.5] ) # type: ignore tokens: Optional[List[str]] = pydantic.Field( None, example=["token1", "token2", "token3"] ) - job_id: pydantic.conint(ge=1) = pydantic.Field( - ..., example=1 - ) # type: ignore + job_id: pydantic.conint(ge=1) = pydantic.Field(..., example=1) # type: ignore class SearchResultSchema(pydantic.BaseModel): @@ -94,16 +83,12 @@ def get_filter_template(self) -> Dict[str, Any]: def is_include(self) -> bool: return self.operator in (PieceOperators.IN, PieceOperators.EQ) - async def adjust_for_child_categories( - self, tenant: str, token: str - ) -> List[str]: + async def adjust_for_child_categories(self, tenant: str, token: str) -> List[str]: if not isinstance(self.value, list): self.value = [self.value] tasks = [] for category in self.value: - task = asyncio.create_task( - es.add_child_categories(category, tenant, token) - ) + task = asyncio.create_task(es.add_child_categories(category, tenant, token)) tasks.append(task) res = await asyncio.gather(*tasks) new_categories = list(reduce(lambda a, b: a & b, map(set, res))) @@ -140,13 +125,9 @@ def _build_sorts(self) -> List[Dict[str, Any]]: def _apply_filters(self, query: Dict[str, Any]) -> Dict[str, Any]: for filter_ in self.filters: if filter_.is_include: - query["query"]["bool"]["must"].append( - filter_.get_filter_template() - ) + query["query"]["bool"]["must"].append(filter_.get_filter_template()) if not filter_.is_include: - query["query"]["bool"]["must_not"].append( - filter_.get_filter_template() - ) + query["query"]["bool"]["must_not"].append(filter_.get_filter_template()) return query def _apply_sort(self, query: Dict[str, Any]) -> Dict[str, Any]: @@ -159,9 +140,7 @@ def _apply_es_pagination(self, query: Dict[str, Any]) -> Dict[str, Any]: def _apply_query(self, query: Dict[str, Any]) -> Dict[str, Any]: match = { - "match": { - "content": {"query": self.query, "minimum_should_match": "81%"} - } + "match": {"content": {"query": self.query, "minimum_should_match": "81%"}} } query["query"]["bool"]["must"].append(match) return query @@ -217,9 +196,7 @@ def __make_pag_params( pages = SearchResultSchema2.__calculate_num_pages( pag_in.page_size, total_results ) - return PaginationParams( - pag_in.page_num, pag_in.page_size, pages, total_results - ) + return PaginationParams(pag_in.page_num, pag_in.page_size, pages, total_results) @staticmethod def __calculate_num_pages(page_size: int, total_results: int) -> int: diff --git a/search/tests/conftest.py b/search/tests/conftest.py index 7a1d2436c..70ed1a321 100644 --- a/search/tests/conftest.py +++ b/search/tests/conftest.py @@ -23,9 +23,7 @@ @pytest_asyncio.fixture async def es(): - es_ = AsyncElasticsearch( - hosts=settings.es_host_test, port=settings.es_port_test - ) + es_ = AsyncElasticsearch(hosts=settings.es_host_test, port=settings.es_port_test) yield es_ await es_.indices.delete(index=INDEX_NAME) await es_.close() @@ -33,9 +31,7 @@ async def es(): @pytest_asyncio.fixture async def index_test_data(monkeypatch) -> None: - es_ = AsyncElasticsearch( - hosts=settings.es_host_test, port=settings.es_port_test - ) + es_ = AsyncElasticsearch(hosts=settings.es_host_test, port=settings.es_port_test) monkeypatch.setattr("search.main.ES", es_) await es_.indices.create(index=INDEX_NAME, ignore=400, body=INDEX_SETTINGS) for test_object in TEST_DATA + list(CHILD_CATEGORIES_DATA.values()): @@ -137,12 +133,8 @@ def drop_es_index(moto_s3) -> boto3.resource: @pytest_asyncio.fixture -async def drop_parametrized_index( - moto_s3, request, monkeypatch -) -> boto3.resource: - es_ = AsyncElasticsearch( - hosts=settings.es_host_test, port=settings.es_port_test - ) +async def drop_parametrized_index(moto_s3, request, monkeypatch) -> boto3.resource: + es_ = AsyncElasticsearch(hosts=settings.es_host_test, port=settings.es_port_test) monkeypatch.setattr("search.harvester.ES", es_) yield moto_s3 await es_.indices.delete(index=request.param) diff --git a/search/tests/test_facets.py b/search/tests/test_facets.py index 02d704b4a..e26c096bc 100644 --- a/search/tests/test_facets.py +++ b/search/tests/test_facets.py @@ -48,9 +48,7 @@ class TestData: wrong_facet_request_2 = { "query": "some", "facets": [{"name": "some", "limit": 5}], - "filters": [ - {"field": "some", "operator": "in", "value": ["some1", "some2"]} - ], + "filters": [{"field": "some", "operator": "in", "value": ["some1", "some2"]}], } agg_result_1 = {"key": "Header", "doc_count": 10} agg_result_2 = {"key": "Title", "doc_count": 10} @@ -86,9 +84,9 @@ def test_filter_param_template(): obj = facets.FilterParams.parse_obj(TestData.valid_filter_params_in) assert obj.filter_template == { "terms": { - TestData.valid_filter_params_in[ - "field" - ]: TestData.valid_filter_params_in["value"] + TestData.valid_filter_params_in["field"]: TestData.valid_filter_params_in[ + "value" + ] } } @@ -146,9 +144,7 @@ def test_facet_request_build_es_query(): "must_not": [{"terms": {"job_id": [10, 100]}}], } }, - "aggs": { - "category": {"terms": {"field": "category", "size": 10}} - }, + "aggs": {"category": {"terms": {"field": "category", "size": 10}}}, }, "job_id": { "filter": { @@ -186,9 +182,7 @@ def test_facet_request_build_es_query(): "aggs": { "category": { "filter": {"bool": {"must": [], "must_not": []}}, - "aggs": { - "category": {"terms": {"field": "category", "size": 5}} - }, + "aggs": {"category": {"terms": {"field": "category", "size": 5}}}, } }, "size": 0, diff --git a/search/tests/test_get.py b/search/tests/test_get.py index 76ba251b6..23c10bb04 100644 --- a/search/tests/test_get.py +++ b/search/tests/test_get.py @@ -382,9 +382,7 @@ def test_get_child_categories( expected_total_objects: int, expected_text_pieces: List[dict], ): - with patch( - "search.es.add_child_categories", return_value=annotation_response - ): + with patch("search.es.add_child_categories", return_value=annotation_response): response = client.get( settings.text_pieces_path, params=url_params, @@ -417,9 +415,7 @@ def test_no_such_tenant_index(tenant: str): @pytest.mark.asyncio @pytest.mark.unittest -@pytest.mark.parametrize( - "child_categories", [("category_1", "category_2"), tuple()] -) +@pytest.mark.parametrize("child_categories", [("category_1", "category_2"), tuple()]) async def test_add_child_categories(child_categories): with patch( "search.es.fetch", @@ -482,8 +478,7 @@ def test_requests_exception(monkeypatch): ) assert response.status_code == 500 expected_error_response = ( - f"Can't get subcategories for {category_id} " - f"due to error {error_message}" + f"Can't get subcategories for {category_id} " f"due to error {error_message}" ) assert expected_error_response in response.text @@ -503,18 +498,14 @@ def test_facets_endpoint(): } } } - with patch( - "search.main.es.ES.search", return_value=asyncio.Future() - ) as mock: + with patch("search.main.es.ES.search", return_value=asyncio.Future()) as mock: with patch( "search.main.schemas.facets.FacetsResponse.adjust_facet_result", return_value=asyncio.Future(), ) as mock1: mock.return_value.set_result(es_response) mock1.return_value.set_result(None) - resp = client.post( - "/facets", json=mock_es_query, headers=TEST_HEADERS - ) + resp = client.post("/facets", json=mock_es_query, headers=TEST_HEADERS) assert resp.json() == { "facets": [ { diff --git a/search/tests/test_harvester.py b/search/tests/test_harvester.py index 029d486c9..aeadad8a1 100644 --- a/search/tests/test_harvester.py +++ b/search/tests/test_harvester.py @@ -29,9 +29,7 @@ "original_annotation_id": 1, "links": ["link_1", "link_2", "link_3"], "category": "Paragraph", - "text": ( - "Elasticsearch is a search engine based on the Lucene library." - ), + "text": ("Elasticsearch is a search engine based on the Lucene library."), "bbox": [20.2, 30.3, 145.5, 120.7], "tokens": None, }, @@ -232,8 +230,7 @@ { "document_id": 1, "page_number": 1, - "content": "Elasticsearch is a search engine " - "based on the Lucene library.", + "content": "Elasticsearch is a search engine " "based on the Lucene library.", "category": "Paragraph", "bbox": [20.2, 30.3, 145.5, 120.7], "job_id": 1, @@ -289,9 +286,7 @@ async def test_start_harvester_total_amount( expected_result: int, es, ): - monkeypatch.setattr( - "search.harvester.connect_s3", Mock(return_value=moto_s3) - ) + monkeypatch.setattr("search.harvester.connect_s3", Mock(return_value=moto_s3)) monkeypatch.setattr("search.es.ES", es) for i in range(amount_of_uploads): await start_harvester(INDEX_NAME, **ids) @@ -320,9 +315,7 @@ async def test_start_harvester_elastic_content( ids, expected_result, ): - monkeypatch.setattr( - "search.harvester.connect_s3", Mock(return_value=moto_s3) - ) + monkeypatch.setattr("search.harvester.connect_s3", Mock(return_value=moto_s3)) monkeypatch.setattr("search.es.ES", es) await start_harvester(INDEX_NAME, **ids) await es.indices.refresh(index=INDEX_NAME) @@ -332,9 +325,7 @@ async def test_start_harvester_elastic_content( @pytest.mark.asyncio @pytest.mark.integration -async def test_start_harvester_no_text_objects( - monkeypatch, moto_s3_fail_cases, es -): +async def test_start_harvester_no_text_objects(monkeypatch, moto_s3_fail_cases, es): monkeypatch.setattr( "search.harvester.connect_s3", Mock(return_value=moto_s3_fail_cases), diff --git a/search/tests/test_indexation_endpoint.py b/search/tests/test_indexation_endpoint.py index f2b493efa..5f8fb1295 100644 --- a/search/tests/test_indexation_endpoint.py +++ b/search/tests/test_indexation_endpoint.py @@ -18,9 +18,7 @@ @mark.integration @mark.parametrize("job_id", (1, 2, 100)) def test_successful_response(monkeypatch, drop_es_index, job_id, es): - monkeypatch.setattr( - "search.harvester.connect_s3", Mock(return_value=drop_es_index) - ) + monkeypatch.setattr("search.harvester.connect_s3", Mock(return_value=drop_es_index)) monkeypatch.setattr("search.harvester.ES", es) response = client.post( f"{settings.indexation_path}/{job_id}", @@ -58,9 +56,7 @@ def test_no_such_tenant_bucket(drop_parametrized_index, tenant): @mark.integration def test_minio_connection_error(monkeypatch, moto_s3): - monkeypatch.setattr( - "search.harvester.connect_s3", Mock(side_effect=BotoCoreError) - ) + monkeypatch.setattr("search.harvester.connect_s3", Mock(side_effect=BotoCoreError)) response = client.post( f"{settings.indexation_path}/1", headers=TEST_HEADERS, @@ -71,9 +67,7 @@ def test_minio_connection_error(monkeypatch, moto_s3): @mark.integration def test_elasticsearch_connection_error(monkeypatch, moto_s3): - monkeypatch.setattr( - "search.harvester.connect_s3", Mock(return_value=moto_s3) - ) + monkeypatch.setattr("search.harvester.connect_s3", Mock(return_value=moto_s3)) monkeypatch.setattr( "search.harvester.old_pieces_cleaner", Mock(side_effect=ElasticsearchException("ElasticsearchException")), diff --git a/search/tests/test_pieces.py b/search/tests/test_pieces.py index b5bffbb98..0da166547 100644 --- a/search/tests/test_pieces.py +++ b/search/tests/test_pieces.py @@ -83,9 +83,7 @@ def test_filter_eq(): def test_filter_not_in(): fil = pieces.PieceFilter.validate(TestData.filter_2) assert not fil.is_include - assert fil.get_filter_template() == { - "terms": {"category": ["Header", "Table"]} - } + assert fil.get_filter_template() == {"terms": {"category": ["Header", "Table"]}} @pytest.mark.unittest @@ -118,16 +116,8 @@ def test_request_1(): "from": 0, "size": 50, "sort": [ - { - pieces.PIECES_ENUM.CATEGORY: { - "order": pieces.PieceSortDirections.ASC - } - }, - { - pieces.PIECES_ENUM.JOB_ID: { - "order": pieces.PieceSortDirections.DESC - } - }, + {pieces.PIECES_ENUM.CATEGORY: {"order": pieces.PieceSortDirections.ASC}}, + {pieces.PIECES_ENUM.JOB_ID: {"order": pieces.PieceSortDirections.DESC}}, ], } @@ -159,27 +149,15 @@ def test_request_2(): }, ], "must_not": [ - { - "terms": { - pieces.PIECES_ENUM.PAGE_NUMBER: [10000, 1000000] - } - } + {"terms": {pieces.PIECES_ENUM.PAGE_NUMBER: [10000, 1000000]}} ], } }, "from": 0, "size": 50, "sort": [ - { - pieces.PIECES_ENUM.CATEGORY: { - "order": pieces.PieceSortDirections.ASC - } - }, - { - pieces.PIECES_ENUM.JOB_ID: { - "order": pieces.PieceSortDirections.DESC - } - }, + {pieces.PIECES_ENUM.CATEGORY: {"order": pieces.PieceSortDirections.ASC}}, + {pieces.PIECES_ENUM.JOB_ID: {"order": pieces.PieceSortDirections.DESC}}, ], } @@ -188,9 +166,7 @@ def test_request_2(): @pytest.mark.unittest async def test_adjust_categories(): filter_ = pieces.PieceFilter.validate(TestData.filter_1) - with patch( - "search.es.add_child_categories", return_value=["Table", "Cell"] - ): + with patch("search.es.add_child_categories", return_value=["Table", "Cell"]): await filter_.adjust_for_child_categories("foo", "bar") assert sorted(filter_.value) == sorted(["Header", "Table", "Cell"]) @@ -198,9 +174,7 @@ async def test_adjust_categories(): @pytest.mark.unittest def test_parse_es_response(): pag = pieces.PiecePagination(page_num=1, page_size=10) - resp = pieces.SearchResultSchema2.parse_es_response( - TestData.es_response, pag - ) + resp = pieces.SearchResultSchema2.parse_es_response(TestData.es_response, pag) assert resp.dict() == { "pagination": {"page_num": 1, "page_size": 10, "total": 1, "pages": 1}, "data": [ diff --git a/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py b/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py index 0a5a8062f..ea1d48100 100644 --- a/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py +++ b/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py @@ -27,9 +27,7 @@ def upgrade() -> None: ["taxonomy_id", "taxonomy_version"], ["taxonomy.id", "taxonomy.version"], ), - sa.PrimaryKeyConstraint( - "taxonomy_id", "taxonomy_version", "category_id" - ), + sa.PrimaryKeyConstraint("taxonomy_id", "taxonomy_version", "category_id"), ) op.drop_column("taxonomy", "category_id") # ### end Alembic commands ### @@ -39,9 +37,7 @@ def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.add_column( "taxonomy", - sa.Column( - "category_id", sa.VARCHAR(), autoincrement=False, nullable=False - ), + sa.Column("category_id", sa.VARCHAR(), autoincrement=False, nullable=False), ) op.drop_table("association_taxonomy_category") # ### end Alembic commands ### diff --git a/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py b/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py index 3adec516a..38b2ad97f 100644 --- a/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py +++ b/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py @@ -49,12 +49,8 @@ def upgrade() -> None: sa.Column("taxonomy_id", sa.VARCHAR(), nullable=True), sa.Column("taxonomy_version", sa.Integer(), nullable=True), sa.Column("parent_id", sa.VARCHAR(), nullable=True), - sa.Column( - "tree", sqlalchemy_utils.types.ltree.LtreeType(), nullable=True - ), - sa.ForeignKeyConstraint( - ["parent_id"], ["taxon.id"], ondelete="cascade" - ), + sa.Column("tree", sqlalchemy_utils.types.ltree.LtreeType(), nullable=True), + sa.ForeignKeyConstraint(["parent_id"], ["taxon.id"], ondelete="cascade"), sa.ForeignKeyConstraint( ["taxonomy_id", "taxonomy_version"], ["taxonomy.id", "taxonomy.version"], @@ -68,18 +64,14 @@ def upgrade() -> None: unique=False, postgresql_using="gist", ) - op.create_index( - op.f("ix_taxon_parent_id"), "taxon", ["parent_id"], unique=False - ) + op.create_index(op.f("ix_taxon_parent_id"), "taxon", ["parent_id"], unique=False) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_index(op.f("ix_taxon_parent_id"), table_name="taxon") - op.drop_index( - "index_taxon_tree", table_name="taxon", postgresql_using="gist" - ) + op.drop_index("index_taxon_tree", table_name="taxon", postgresql_using="gist") op.drop_table("taxon") op.drop_table("association_taxonomy_job") op.drop_table("taxonomy") diff --git a/taxonomy/documentation/update_docs.py b/taxonomy/documentation/update_docs.py index 8e10cf8ab..7b2e0a0bb 100644 --- a/taxonomy/documentation/update_docs.py +++ b/taxonomy/documentation/update_docs.py @@ -1,13 +1,10 @@ import yaml - from taxonomy.main import app def str_presenter(dumper, data): if "\n" in data: - return dumper.represent_scalar( - "tag:yaml.org,2002:str", data, style="|" - ) + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") return dumper.represent_scalar("tag:yaml.org,2002:str", data) diff --git a/taxonomy/taxonomy/errors.py b/taxonomy/taxonomy/errors.py index 3f806a619..0fad611d4 100644 --- a/taxonomy/taxonomy/errors.py +++ b/taxonomy/taxonomy/errors.py @@ -54,9 +54,7 @@ def check_field_error_handler(request: Request, exc: CheckFieldError): ) -def field_constraint_error_handler( - request: Request, exc: FieldConstraintError -): +def field_constraint_error_handler(request: Request, exc: FieldConstraintError): return JSONResponse( status_code=400, content={"detail": f"Error: {exc.message}"}, diff --git a/taxonomy/taxonomy/schemas/taxon.py b/taxonomy/taxonomy/schemas/taxon.py index b5d352e09..687f72fc8 100644 --- a/taxonomy/taxonomy/schemas/taxon.py +++ b/taxonomy/taxonomy/schemas/taxon.py @@ -1,7 +1,6 @@ from typing import List, Optional from pydantic import BaseModel, Field, validator - from taxonomy.errors import CheckFieldError @@ -44,9 +43,7 @@ class Config: class ParentsConcatenateResponseSchema(BaseModel): taxon_id: str = Field(..., example="my_taxon_id") taxon_name: str = Field(..., example="taxon_name") - parent_ids_concat: Optional[str] = Field( - ..., example="parent_id_1.parent_id_2" - ) + parent_ids_concat: Optional[str] = Field(..., example="parent_id_1.parent_id_2") parent_names_concat: Optional[str] = Field( ..., example="parent_name_1.parent_name_2" ) diff --git a/taxonomy/taxonomy/schemas/taxonomy.py b/taxonomy/taxonomy/schemas/taxonomy.py index 0eb590b42..0bde6ae00 100644 --- a/taxonomy/taxonomy/schemas/taxonomy.py +++ b/taxonomy/taxonomy/schemas/taxonomy.py @@ -27,9 +27,7 @@ class CategoryLinkSchema(BaseModel): category_id: str = Field( ..., example="123abc", description="Category id to link taxonomy to" ) - job_id: str = Field( - ..., example="123abc", description="Job id to link taxonomy to" - ) + job_id: str = Field(..., example="123abc", description="Job id to link taxonomy to") taxonomy_id: str = Field(..., example="my_taxonomy_id") taxonomy_version: Optional[int] = Field( description="Version of taxonomy", example=1 @@ -37,9 +35,7 @@ class CategoryLinkSchema(BaseModel): class JobTaxonomySchema(BaseModel): - name: str = Field( - ..., example="taxonomy_name", description="Taxonomy name" - ) + name: str = Field(..., example="taxonomy_name", description="Taxonomy name") id: str = Field(..., example="my_taxonomy_id", description="Taxonomy id") version: int = Field(..., example=1, description="Version of taxonomy") category_id: str = Field( diff --git a/taxonomy/taxonomy/taxon/services.py b/taxonomy/taxonomy/taxon/services.py index 2ec097a34..6b042e47c 100644 --- a/taxonomy/taxonomy/taxon/services.py +++ b/taxonomy/taxonomy/taxon/services.py @@ -6,7 +6,6 @@ from sqlalchemy.orm import Session from sqlalchemy.orm.query import Query from sqlalchemy_utils import Ltree - from taxonomy.errors import CheckFieldError, NoTaxonError, SelfParentError from taxonomy.filters import TaxonFilter from taxonomy.models import Taxon @@ -122,9 +121,7 @@ def insert_taxon_tree( taxon_response = TaxonResponseSchema.from_orm(taxon_db) if taxon_response.parent_id: - taxon_response.parents = [ - set_parents_is_leaf(taxon) for taxon in parents - ] + taxon_response.parents = [set_parents_is_leaf(taxon) for taxon in parents] taxon_response.is_leaf = is_leaf return taxon_response @@ -158,9 +155,7 @@ def update_taxon_db( raise SelfParentError("Taxon cannot be its own parent.") update_query["parent_id"] = ( - update_query["parent_id"] - if update_query["parent_id"] != "null" - else None + update_query["parent_id"] if update_query["parent_id"] != "null" else None ) ex_parent_id = taxon.parent_id new_parent_id = update_query["parent_id"] @@ -244,12 +239,8 @@ def _get_obj_from_request( return taxon_query.all(), pagination -def _extract_taxon( - path: str, taxons: Dict[str, Taxon] -) -> List[TaxonResponseSchema]: - return [ - set_parents_is_leaf(taxons[node]) for node in path.split(".")[0:-1] - ] +def _extract_taxon(path: str, taxons: Dict[str, Taxon]) -> List[TaxonResponseSchema]: + return [set_parents_is_leaf(taxons[node]) for node in path.split(".")[0:-1]] def _get_parents(db: Session, taxons: List[Taxon], tenant: str) -> Parents: @@ -270,9 +261,7 @@ def _get_parents(db: Session, taxons: List[Taxon], tenant: str) -> Parents: return path_to_taxon -def fetch_bunch_taxons_db( - db: Session, taxon_ids: Set[str], tenant: str -) -> List[Taxon]: +def fetch_bunch_taxons_db(db: Session, taxon_ids: Set[str], tenant: str) -> List[Taxon]: taxons = ( db.query(Taxon) .filter( @@ -283,9 +272,7 @@ def fetch_bunch_taxons_db( ) .all() ) - taxons_not_exist = {taxon.id for taxon in taxons}.symmetric_difference( - taxon_ids - ) + taxons_not_exist = {taxon.id for taxon in taxons}.symmetric_difference(taxon_ids) error_message = ", ".join(sorted(taxons_not_exist)) if taxons_not_exist: raise NoTaxonError(f"No such taxons: {error_message}") @@ -311,9 +298,7 @@ def filter_taxons( tenant: str, query: Optional[Query] = None, ) -> Page[Union[TaxonResponseSchema, str, dict]]: - taxons_request, pagination = _get_obj_from_request( - db, request, tenant, query - ) + taxons_request, pagination = _get_obj_from_request(db, request, tenant, query) if request.filters and "distinct" in [ item.operator.value for item in request.filters diff --git a/taxonomy/taxonomy/taxonomy/resources.py b/taxonomy/taxonomy/taxonomy/resources.py index 954ccf69d..7e6934386 100644 --- a/taxonomy/taxonomy/taxonomy/resources.py +++ b/taxonomy/taxonomy/taxonomy/resources.py @@ -4,7 +4,6 @@ from filter_lib import Page from sqlalchemy.orm import Session from sqlalchemy_filters.exceptions import BadFilterFormat - from taxonomy.database import get_db from taxonomy.filters import TaxonomyFilter from taxonomy.logging_setup import LOGGER @@ -62,9 +61,7 @@ def create_new_taxonomy( raise HTTPException( status_code=400, detail="Header x-current-tenant is required" ) - latest_taxonomy = get_latest_taxonomy( - session, taxonomy.id, x_current_tenant - ) + latest_taxonomy = get_latest_taxonomy(session, taxonomy.id, x_current_tenant) if latest_taxonomy: LOGGER.info( "save_taxonomy find taxonomy with id %s. " @@ -172,12 +169,8 @@ def associate_taxonomy_to_category( else: latests.append(category_link) - taxonomies: dict = batch_versioned_taxonomies( - session, versions, x_current_tenant - ) - taxonomies.update( - batch_latest_taxonomies(session, latests, x_current_tenant) - ) + taxonomies: dict = batch_versioned_taxonomies(session, versions, x_current_tenant) + taxonomies.update(batch_latest_taxonomies(session, latests, x_current_tenant)) not_found_taxonomies = [ link.taxonomy_id @@ -229,9 +222,7 @@ def delete_category_link( session: Session = Depends(get_db), x_current_tenant: str = X_CURRENT_TENANT_HEADER, ) -> Response: - bulk_delete_category_association( - session, x_current_tenant, job_id, category_id - ) + bulk_delete_category_association(session, x_current_tenant, job_id, category_id) return Response(status_code=status.HTTP_204_NO_CONTENT) @@ -270,9 +261,7 @@ def update_taxonomy( if not taxonomy: LOGGER.error("update_taxonomy get not existing id %s", query.id) raise HTTPException(status_code=404, detail="Not existing taxonomy") - taxonomy_db = update_taxonomy_instance( - session, taxonomy, query, x_current_tenant - ) + taxonomy_db = update_taxonomy_instance(session, taxonomy, query, x_current_tenant) return TaxonomyResponseSchema.from_orm(taxonomy_db) @@ -304,9 +293,7 @@ def update_taxonomy_by_id_and_version( (taxonomy_id, version), ) raise HTTPException(status_code=404, detail="Not existing taxonomy") - taxonomy_db = update_taxonomy_instance( - session, taxonomy, query, x_current_tenant - ) + taxonomy_db = update_taxonomy_instance(session, taxonomy, query, x_current_tenant) return TaxonomyResponseSchema.from_orm(taxonomy_db) @@ -415,9 +402,7 @@ def get_taxonomy_by_job_and_category_id( session: Session = Depends(get_db), x_current_tenant: str = X_CURRENT_TENANT_HEADER, ) -> List[TaxonomyResponseSchema]: - taxonomy = get_linked_taxonomies( - session, job_id, category_id, x_current_tenant - ) + taxonomy = get_linked_taxonomies(session, job_id, category_id, x_current_tenant) if not taxonomy: LOGGER.error( "get_taxonomy_by_job_and_category_id get not existing combination" diff --git a/taxonomy/taxonomy/taxonomy/services.py b/taxonomy/taxonomy/taxonomy/services.py index 3daf7ec0e..bf281036f 100644 --- a/taxonomy/taxonomy/taxonomy/services.py +++ b/taxonomy/taxonomy/taxonomy/services.py @@ -3,7 +3,6 @@ from filter_lib import Page, form_query, map_request_to_filter, paginate from sqlalchemy import and_, desc, null, or_ from sqlalchemy.orm import Query, Session - from taxonomy.errors import CheckFieldError from taxonomy.filters import TaxonomyFilter from taxonomy.models import AssociationTaxonomyCategory, Taxonomy @@ -229,12 +228,8 @@ def bulk_delete_category_association( AssociationTaxonomyCategory.category_id == category_id, ) taxonomy_links.filter( - AssociationTaxonomyCategory.taxonomy_id.in_( - tenant_taxonomy.subquery() - ), - AssociationTaxonomyCategory.taxonomy_version.in_( - tenant_taxonomy.subquery() - ), + AssociationTaxonomyCategory.taxonomy_id.in_(tenant_taxonomy.subquery()), + AssociationTaxonomyCategory.taxonomy_version.in_(tenant_taxonomy.subquery()), ) taxonomy_links.delete(synchronize_session=False) session.commit() @@ -260,8 +255,6 @@ def filter_taxonomies( tenant: str, query: Optional[Query] = None, ) -> Page[Union[TaxonomyResponseSchema, str, dict]]: - taxonomies_request, pagination = _get_obj_from_request( - db, request, tenant, query - ) + taxonomies_request, pagination = _get_obj_from_request(db, request, tenant, query) return paginate(taxonomies_request, pagination) diff --git a/taxonomy/tests/conftest.py b/taxonomy/tests/conftest.py index a06acc5de..c489fdd2f 100644 --- a/taxonomy/tests/conftest.py +++ b/taxonomy/tests/conftest.py @@ -80,9 +80,7 @@ def setup_test_db(use_temp_env_var, db_test_engine): # 3. Install 'ltree' extension with db_test_engine.connect() as conn: - conn.execute( - sqlalchemy.sql.text("CREATE EXTENSION IF NOT EXISTS ltree") - ) + conn.execute(sqlalchemy.sql.text("CREATE EXTENSION IF NOT EXISTS ltree")) # 4. run 'alembic upgrade head' alembic_cfg = Config("alembic.ini") @@ -96,9 +94,7 @@ def setup_test_db(use_temp_env_var, db_test_engine): @pytest.fixture -def db_session( - db_test_engine, setup_test_db -) -> Generator[Session, None, None]: +def db_session(db_test_engine, setup_test_db) -> Generator[Session, None, None]: """Creates all tables on setUp, yields SQLAlchemy session and removes tables on tearDown. """ @@ -132,9 +128,7 @@ def taxon_input_data(prepared_taxonomy_record_in_db): @pytest.fixture -def prepared_taxonomy_record_in_db( - taxonomy_input_data, db_session -) -> Taxonomy: +def prepared_taxonomy_record_in_db(taxonomy_input_data, db_session) -> Taxonomy: return taxonomy_services.create_taxonomy_instance( db_session, TEST_TENANTS[0], @@ -240,9 +234,7 @@ def prepare_two_taxons_different_names( @pytest.fixture -def prepare_three_taxons_parent_each_other( - db_session, taxon_input_data -) -> List[Taxon]: +def prepare_three_taxons_parent_each_other(db_session, taxon_input_data) -> List[Taxon]: first_taxon = deepcopy(taxon_input_data) first_id = uuid4().hex @@ -352,9 +344,7 @@ def common_taxon(db_session, prepare_common_tenant_taxonomy): @pytest.fixture -def overrided_token_client( - client, db_session -) -> Generator[TestClient, None, None]: +def overrided_token_client(client, db_session) -> Generator[TestClient, None, None]: app.dependency_overrides[TOKEN] = override app.dependency_overrides[get_db] = lambda: db_session diff --git a/taxonomy/tests/test_taxon_crud.py b/taxonomy/tests/test_taxon_crud.py index c5b4d4271..c9a79ce85 100644 --- a/taxonomy/tests/test_taxon_crud.py +++ b/taxonomy/tests/test_taxon_crud.py @@ -81,12 +81,9 @@ def prepare_parents_concatenate_expected_response(taxons: List[Taxon]) -> dict: { "taxon_id": taxon.id, "taxon_name": taxon.name, - "parent_ids_concat": ".".join(taxon.tree.path.split(".")[:-1]) - or None, + "parent_ids_concat": ".".join(taxon.tree.path.split(".")[:-1]) or None, # Names equal to ids in this test - "parent_names_concat": ".".join( - taxon.tree.path.split(".")[:-1] - ) + "parent_names_concat": ".".join(taxon.tree.path.split(".")[:-1]) or None, } for taxon in taxons @@ -101,9 +98,7 @@ def test_add_taxon_taxonomy_does_not_exist(overrided_token_client): name=uuid.uuid4().hex, taxonomy_id=uuid.uuid4().hex, ) - response = overrided_token_client.post( - TAXON_PATH, json=data, headers=TEST_HEADER - ) + response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) assert response.status_code == 400 assert "Taxonomy with this id doesn't exist" in response.text @@ -120,9 +115,7 @@ def test_add_taxon_self_parent( taxonomy_id=prepared_taxonomy_record_in_db.id, parent_id=taxon_id, ) - response = overrided_token_client.post( - TAXON_PATH, json=data, headers=TEST_HEADER - ) + response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) assert response.status_code == 400 assert "Taxon cannot be its own parent" in response.text @@ -154,9 +147,7 @@ def test_add_taxon_name_empty_string( name="", taxonomy_id=prepared_taxonomy_record_in_db.id, ) - response = overrided_token_client.post( - TAXON_PATH, json=data, headers=TEST_HEADER - ) + response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) assert response.status_code == 400 assert "Taxon name can not be empty" in response.text @@ -172,9 +163,7 @@ def test_add_taxon_specify_version( taxonomy_id=prepared_taxonomy_record_in_db.id, taxonomy_version=prepared_taxonomy_record_in_db.version, ) - response = overrided_token_client.post( - TAXON_PATH, json=data, headers=TEST_HEADER - ) + response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) assert response.status_code == 201 assert response_schema_from_request(data) == response.json() @@ -194,9 +183,7 @@ def test_add_unique_name( taxonomy_id=prepared_taxonomy_record_in_db.id, taxonomy_version=prepared_taxonomy_record_in_db.version, ) - response = overrided_token_client.post( - TAXON_PATH, json=data, headers=TEST_HEADER - ) + response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) assert response.status_code == 201 assert response_schema_from_request(data) == response.json() @@ -211,9 +198,7 @@ def test_add_taxon_id_exists( id_=prepared_taxon_entity_in_db.id, taxonomy_id=prepared_taxonomy_record_in_db.id, ) - response = overrided_token_client.post( - TAXON_PATH, json=data, headers=TEST_HEADER - ) + response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) assert response.status_code == 400 assert "Taxon id must be unique" in response.text @@ -269,9 +254,7 @@ def test_get_taxon_parents_isleaf( @pytest.mark.integration def test_get_taxon_does_not_exist(overrided_token_client): id_ = uuid.uuid4().hex - response = overrided_token_client.get( - f"{TAXON_PATH}/{id_}", headers=TEST_HEADER - ) + response = overrided_token_client.get(f"{TAXON_PATH}/{id_}", headers=TEST_HEADER) assert response.status_code == 404 assert f"Taxon with id: {id_} doesn't exist" in response.text @@ -295,9 +278,7 @@ def test_update_taxon_duplicate_name( prepare_two_taxons_different_names, ): id_ = prepare_two_taxons_different_names[1].id - taxon_update = prepare_taxon_body( - name=prepare_two_taxons_different_names[0].name - ) + taxon_update = prepare_taxon_body(name=prepare_two_taxons_different_names[0].name) taxon_update.pop("id") response = overrided_token_client.put( @@ -411,10 +392,7 @@ def test_delete_taxon_does_not_exist( f"{TAXON_PATH}/{uuid.uuid4().hex}", headers=TEST_HEADER ) assert delete_response.status_code == 404 - assert ( - "Cannot delete taxon that doesn't exist" - in delete_response.json()["detail"] - ) + assert "Cannot delete taxon that doesn't exist" in delete_response.json()["detail"] @pytest.mark.integration @@ -618,9 +596,7 @@ def test_search_parents_recursive_tree( assert len(taxons) == 2 - assert parent_1 == response_schema_from_request( - root.to_dict(), is_leaf=False - ) + assert parent_1 == response_schema_from_request(root.to_dict(), is_leaf=False) assert parent_2 == response_schema_from_request( second.to_dict(), parents=[response_schema_from_request(root.to_dict(), is_leaf=False)], @@ -645,9 +621,7 @@ def test_get_parents_concatenated_not_found( @pytest.mark.integration -def test_get_parents_concatenated( - overrided_token_client, prepared_taxon_hierarchy -): +def test_get_parents_concatenated(overrided_token_client, prepared_taxon_hierarchy): taxons_search_from = prepared_taxon_hierarchy[:5] taxon_ids = [taxon.id for taxon in taxons_search_from] @@ -661,6 +635,6 @@ def test_get_parents_concatenated( taxons = response.json() assert len(taxons) == 5 - assert prepare_parents_concatenate_expected_response( - taxons_search_from - ) == sorted(response.json(), key=lambda x: x["taxon_id"]) + assert prepare_parents_concatenate_expected_response(taxons_search_from) == sorted( + response.json(), key=lambda x: x["taxon_id"] + ) diff --git a/users/tests/keycloak/test_query.py b/users/tests/keycloak/test_query.py index bf9084421..17fbfa374 100644 --- a/users/tests/keycloak/test_query.py +++ b/users/tests/keycloak/test_query.py @@ -32,9 +32,7 @@ async def test_get_token_v2(request_mock): @pytest.mark.asyncio -async def test_introspect_token_test( - request_mock, mocked_token1, mocked_token1_data -): +async def test_introspect_token_test(request_mock, mocked_token1, mocked_token1_data): request_mock.return_value.__aenter__.return_value.json.return_value = ( mocked_token1_data ) @@ -43,9 +41,7 @@ async def test_introspect_token_test( @pytest.mark.asyncio -async def test_get_master_realm_auth_data( - request_mock, mocked_admin_auth_data -): +async def test_get_master_realm_auth_data(request_mock, mocked_admin_auth_data): request_mock.return_value.__aenter__.return_value.json.return_value = ( mocked_admin_auth_data ) diff --git a/users/tests/test_main.py b/users/tests/test_main.py index ea0cbe15e..5568eb44d 100644 --- a/users/tests/test_main.py +++ b/users/tests/test_main.py @@ -96,9 +96,7 @@ def does_not_raise(): roles=["admin"], tenants=["tenant"], ), - TenantData( - token="token", user_id="user_id", roles=[], tenants=["tenant"] - ), + TenantData(token="token", user_id="user_id", roles=[], tenants=["tenant"]), ], ) def test_check_authorization_role_is_missing(mock_tenant_data): @@ -212,9 +210,7 @@ def test_login_status_code(token_schema, request_body, status_code): class TestGetUserGWT: def test_get_user_jwt_body(self, mock_user, user_representation): response = client.get("/users/current") - assert response.json() == user_representation( - user_id="1", user_name="user" - ) + assert response.json() == user_representation(user_id="1", user_name="user") def test_get_user_jwt_status_code(self, mock_user): response = client.get("/users/current") @@ -225,18 +221,14 @@ def test_get_user_jwt_status_code(self, mock_user): class TestGetUser: def test_get_user_body(self, mock_user, user_representation): response = client.get("/users/user-id") - assert response.json() == user_representation( - user_id="1", user_name="user" - ) + assert response.json() == user_representation(user_id="1", user_name="user") def test_get_user_status_code(self, mock_user): response = client.get("/users/user-id") assert response.status_code == 200 -def test_get_user_info_from_token_introspection( - mocked_token1, mocked_token1_data -): +def test_get_user_info_from_token_introspection(mocked_token1, mocked_token1_data): with patch( "users.keycloak.query.introspect_token", return_value=mocked_token1_data ): @@ -343,9 +335,7 @@ def test_add_user_to_tenant2( ("group_1", {"detail": "User has been removed from the tenant"}), ], ) -def test_remove_user_from_tenant_body( - mock_user, update_user, tenant, expected_result -): +def test_remove_user_from_tenant_body(mock_user, update_user, tenant, expected_result): response = client.delete(f"/tenants/{tenant}/users/user_1") assert response.json() == expected_result @@ -366,9 +356,7 @@ def test_remove_user_from_tenant_status_code( @patch("users.keycloak.query.get_users_v2", return_value=mock_all_users) -@patch( - "users.keycloak.query.get_users_by_role", return_value=mock_users_with_role -) +@patch("users.keycloak.query.get_users_by_role", return_value=mock_users_with_role) class TestUsersSearch: @pytest.mark.parametrize("request_body", [{}, {"filters": []}]) def test_get_all_users_body( @@ -397,11 +385,7 @@ def test_filter_users_by_name_body( ): response = client.post( "/users/search", - json={ - "filters": [ - {"field": "name", "operator": "like", "value": "r"} - ] - }, + json={"filters": [{"field": "name", "operator": "like", "value": "r"}]}, ) assert response.json() == [ user_representation(user_id="1", user_name="user"), @@ -413,11 +397,7 @@ def test_filter_users_by_name_status_code( ): response = client.post( "/users/search", - json={ - "filters": [ - {"field": "name", "operator": "like", "value": "r"} - ] - }, + json={"filters": [{"field": "name", "operator": "like", "value": "r"}]}, ) assert response.status_code == 200 @@ -460,20 +440,14 @@ def test_filter_users_by_empty_name_status_code( ): response = client.post( "/users/search", - json={ - "filters": [{"field": "name", "operator": "like", "value": ""}] - }, + json={"filters": [{"field": "name", "operator": "like", "value": ""}]}, ) assert response.status_code == 422 @pytest.mark.parametrize( "request_body", [ - { - "filters": [ - {"field": "id", "operator": "in", "value": ["1", "2"]} - ] - }, + {"filters": [{"field": "id", "operator": "in", "value": ["1", "2"]}]}, { "filters": [ { @@ -505,11 +479,7 @@ def test_filter_users_by_id_body( @pytest.mark.parametrize( "request_body", [ - { - "filters": [ - {"field": "id", "operator": "in", "value": ["1", "2"]} - ] - }, + {"filters": [{"field": "id", "operator": "in", "value": ["1", "2"]}]}, { "filters": [ { @@ -538,11 +508,7 @@ def test_filter_users_by_id_status_code( "request_body", [ {"filters": [{"field": "id", "operator": "in", "value": []}]}, - { - "filters": [ - {"field": "id", "operator": "in", "value": ["wrong_id"]} - ] - }, + {"filters": [{"field": "id", "operator": "in", "value": ["wrong_id"]}]}, ], ) def test_filter_users_by_wrong_or_empty_id_body( @@ -562,11 +528,7 @@ def test_filter_users_by_wrong_or_empty_id_body( "request_body", [ {"filters": [{"field": "id", "operator": "in", "value": []}]}, - { - "filters": [ - {"field": "id", "operator": "in", "value": ["wrong_id"]} - ] - }, + {"filters": [{"field": "id", "operator": "in", "value": ["wrong_id"]}]}, ], ) def test_filter_users_by_wrong_or_empty_id_status_code( @@ -579,9 +541,7 @@ def test_filter_users_by_wrong_or_empty_id_status_code( response = client.post( "/users/search", json={ - "filters": [ - {"field": "id", "operator": "in", "value": ["wrong_id"]} - ] + "filters": [{"field": "id", "operator": "in", "value": ["wrong_id"]}] }, ) assert response.status_code == 200 @@ -629,9 +589,7 @@ def test_filter_users_by_wrong_role_body( response = client.post( "/users/search", json={ - "filters": [ - {"field": "role", "operator": "eq", "value": "wrong_role"} - ] + "filters": [{"field": "role", "operator": "eq", "value": "wrong_role"}] }, ) assert response.status_code == 422 diff --git a/users/users/config.py b/users/users/config.py index f56795777..4b65f1ead 100644 --- a/users/users/config.py +++ b/users/users/config.py @@ -4,9 +4,7 @@ load_dotenv() -KEYCLOAK_ENDPOINT = os.getenv( - "KEYCLOAK_DIRECT_ENDPOINT", "http://dev2.badgerdoc.com" -) +KEYCLOAK_ENDPOINT = os.getenv("KEYCLOAK_DIRECT_ENDPOINT", "http://dev2.badgerdoc.com") KEYCLOAK_REALM = os.getenv("KEYCLOAK_REALM", "master") KEYCLOAK_ROLE_ADMIN = os.getenv("KEYCLOAK_ROLE_ADMIN", "") KEYCLOAK_USERS_PUBLIC_KEY = os.getenv("KEYCLOAK_USERS_PUBLIC_KEY", "") diff --git a/users/users/keycloak/query.py b/users/users/keycloak/query.py index 0b34284d4..c1769bcf4 100644 --- a/users/users/keycloak/query.py +++ b/users/users/keycloak/query.py @@ -38,9 +38,7 @@ def create_bearer_header(token: str) -> Dict[str, str]: return {"Authorization": f"Bearer {token}"} -async def create_user( - token: str, realm: str, username: str, email: str -) -> None: +async def create_user(token: str, realm: str, username: str, email: str) -> None: """Create user""" url = resources.users_uri.substitute(realm=realm) method = "POST" @@ -56,9 +54,7 @@ async def create_user( return -async def get_users_by_role( - token: str, realm: str, role: str -) -> List[schemas.User]: +async def get_users_by_role(token: str, realm: str, role: str) -> List[schemas.User]: """Get list of users from keycloak by role""" url = resources.users_by_role_uri.substitute(realm=realm, role=role) @@ -97,9 +93,7 @@ async def get_token_v2( return schemas.TokenResponse.parse_obj(await resp.json()) -async def get_users_v2( - realm: str, token: str, **filters: Any -) -> List[schemas.User]: +async def get_users_v2(realm: str, token: str, **filters: Any) -> List[schemas.User]: """Get users from realm, filtered according to filters. :param realm: Keycloak realm. @@ -178,9 +172,7 @@ async def introspect_token(token: str) -> Token_Data: ) -async def get_groups( - realm: str, token: str, name: str = None -) -> List[schemas.Group]: +async def get_groups(realm: str, token: str, name: str = None) -> List[schemas.Group]: """Get group from realm by its name. :param realm: Keycloak realm. @@ -224,9 +216,7 @@ async def create_group(realm: str, token: str, group: schemas.Group) -> None: return -async def update_user( - realm: str, token: str, user_id: str, upd: schemas.User -) -> None: +async def update_user(realm: str, token: str, user_id: str, upd: schemas.User) -> None: """Update user. :param realm: Keycloak realm. @@ -250,9 +240,7 @@ async def update_user( async def execute_action_email(token: str, realm: str, user_id: str) -> None: """Send email to user for updating user profile""" - url = resources.execute_actions_email_uri.substitute( - realm=realm, id=user_id - ) + url = resources.execute_actions_email_uri.substitute(realm=realm, id=user_id) method = "PUT" headers = create_bearer_header(token) payload = ["UPDATE_PROFILE", "UPDATE_PASSWORD"] @@ -289,9 +277,7 @@ async def get_master_realm_auth_data() -> AuthData: data=payload, ) as resp: data = await resp.json() - data_to_return: AuthData = ( - data # casting into TypedDict for linter checks - ) + data_to_return: AuthData = data # casting into TypedDict for linter checks return data_to_return except aiohttp.ClientConnectionError as e: diff --git a/users/users/keycloak/resources.py b/users/users/keycloak/resources.py index bf07f4c94..8eb7f560e 100644 --- a/users/users/keycloak/resources.py +++ b/users/users/keycloak/resources.py @@ -38,7 +38,5 @@ def join_paths(*args: str) -> str: join_paths(_base_uri, _oidc_uri, "token", "introspect") ) identity_providers_uri = Template( - join_paths( - _base_uri, "admin", _realm_uri, "identity-provider", "instances" - ) + join_paths(_base_uri, "admin", _realm_uri, "identity-provider", "instances") ) diff --git a/users/users/keycloak/schemas.py b/users/users/keycloak/schemas.py index a46d9d5c4..bf791e8d9 100644 --- a/users/users/keycloak/schemas.py +++ b/users/users/keycloak/schemas.py @@ -283,6 +283,7 @@ class Config: class OAuthRequest(BaseModel): """Base class for authorization requests""" + client_id: Optional[str] grant_type: str client_secret: Optional[str] @@ -317,6 +318,7 @@ def from_fastapi_form( class RefreshTokenRequest(OAuthRequest): """Represents Keycloak token refreshment request""" + client_id: str = "admin-cli" grant_type: str = "refresh_token" refresh_token: str diff --git a/users/users/main.py b/users/users/main.py index 122587c09..bb7df442b 100644 --- a/users/users/main.py +++ b/users/users/main.py @@ -12,6 +12,8 @@ from fastapi import Depends, FastAPI, Header, HTTPException, Query, Request from fastapi.responses import JSONResponse from fastapi.security import OAuth2PasswordRequestForm +from tenant_dependency import TenantData, get_tenant_info +from urllib3.exceptions import MaxRetryError from users import s3, utils from users.config import ( KEYCLOAK_ROLE_ADMIN, @@ -20,16 +22,12 @@ ) from users.logger import Logger from users.schemas import Users -from tenant_dependency import TenantData, get_tenant_info -from urllib3.exceptions import MaxRetryError app = FastAPI(title="users", root_path=ROOT_PATH, version="0.1.2") realm = conf.KEYCLOAK_REALM minio_client = s3.get_minio_client() -tenant = get_tenant_info( - KEYCLOAK_USERS_PUBLIC_KEY, algorithm="RS256", debug=True -) +tenant = get_tenant_info(KEYCLOAK_USERS_PUBLIC_KEY, algorithm="RS256", debug=True) def check_authorization(token: TenantData, role: str) -> None: @@ -38,19 +36,13 @@ def check_authorization(token: TenantData, role: str) -> None: @app.middleware("http") -async def request_error_handler( - request: Request, call_next: Callable[..., Any] -) -> Any: +async def request_error_handler(request: Request, call_next: Callable[..., Any]) -> Any: try: return await call_next(request) except aiohttp.ClientResponseError as err: - return JSONResponse( - status_code=err.status, content={"detail": err.message} - ) + return JSONResponse(status_code=err.status, content={"detail": err.message}) except AIOHTTPException as err: - return JSONResponse( - status_code=err.status_code, content={"detail": err.reason} - ) + return JSONResponse(status_code=err.status_code, content={"detail": err.reason}) @app.post( @@ -103,9 +95,7 @@ async def user_registration( realm=realm, token=token.token, email=email, exact="true" ) user_id = user[0].id - await kc_query.execute_action_email( - token=token.token, realm=realm, user_id=user_id - ) + await kc_query.execute_action_email(token=token.token, realm=realm, user_id=user_id) return {"detail": "User has been created"} @@ -148,17 +138,13 @@ async def get_user( return await kc_query.get_user(realm, token.token, user_id) -@app.get( - "/tenants", status_code=200, response_model=List[str], tags=["tenants"] -) +@app.get("/tenants", status_code=200, response_model=List[str], tags=["tenants"]) async def get_tenants( token: TenantData = Depends(tenant), current_tenant: Optional[str] = Header(None, alias="X-Current-Tenant"), ) -> List[str]: """Get all tenants.""" - return [ - group.name for group in await kc_query.get_groups(realm, token.token) - ] + return [group.name for group in await kc_query.get_groups(realm, token.token)] @app.post( @@ -178,9 +164,7 @@ async def create_tenant( try: s3.create_bucket(minio_client, bucket) except MaxRetryError: - raise HTTPException( - status_code=503, detail="Cannot connect to the Minio." - ) + raise HTTPException(status_code=503, detail="Cannot connect to the Minio.") tenant_ = kc_schemas.Group(name=tenant) await kc_query.create_group(realm, token.token, tenant_) return {"detail": "Tenant has been created"} @@ -246,9 +230,7 @@ async def get_users_by_filter( role=filters.get("role").value, # type: ignore ) else: - users_list = await kc_query.get_users_v2( - realm=realm, token=token.token - ) + users_list = await kc_query.get_users_v2(realm=realm, token=token.token) users_list = kc_schemas.User.filter_users( users=users_list, From 3d12dd2f081a7b0745b90ce103a40d32096d4309 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 17:16:05 +0400 Subject: [PATCH 13/22] refactor: formatting --- annotation/alembic/env.py | 8 +- ...72a5043_add_categories_editor_url_data_.py | 8 +- ..._alter_categories_id_integer_to_varchar.py | 28 ++- ...3136551008d8_agreement_metrics_relation.py | 4 +- .../36bff2d016f7_expand_validationschema.py | 20 +- .../versions/3a083a1fbba0_first_revision.py | 24 ++- .../versions/4272d0a43ff1_agreement_score.py | 8 +- .../66cd6054c2d0_add_categories_tree.py | 8 +- ...8e6343_add_extensive_coverage_parameter.py | 6 +- .../7cc1ed83c309_compare_agreement_scores.py | 4 +- ...6b8ebe84_remove_annotateddoc_constraint.py | 12 +- ..._category_change_editor_data_attributes.py | 4 +- .../cf633ca94498_add_statuses_to_job.py | 4 +- ...963_drop_is_link_add_type_to_categories.py | 16 +- annotation/annotation/annotations/main.py | 64 ++++-- .../annotation/annotations/resources.py | 51 +++-- annotation/annotation/categories/resources.py | 17 +- annotation/annotation/categories/services.py | 50 +++-- annotation/annotation/distribution/main.py | 66 ++++-- .../annotation/distribution/resources.py | 28 ++- annotation/annotation/errors.py | 19 +- annotation/annotation/filters.py | 8 +- annotation/annotation/jobs/resources.py | 63 ++++-- annotation/annotation/jobs/services.py | 29 ++- annotation/annotation/main.py | 15 +- annotation/annotation/metadata/resources.py | 4 +- .../assets_communication.py | 17 +- .../jobs_communication.py | 9 +- .../microservice_communication/search.py | 9 +- annotation/annotation/models.py | 33 +-- annotation/annotation/revisions/resources.py | 4 +- annotation/annotation/schemas/annotations.py | 36 +++- annotation/annotation/schemas/categories.py | 7 +- annotation/annotation/schemas/jobs.py | 19 +- annotation/annotation/schemas/tasks.py | 23 +- annotation/annotation/tasks/resources.py | 70 ++++--- annotation/annotation/tasks/services.py | 72 +++++-- annotation/annotation/tasks/validation.py | 38 +++- annotation/documentation/update_docs.py | 4 +- annotation/tests/conftest.py | 49 +++-- .../tests/test_annotators_overall_load.py | 53 +++-- annotation/tests/test_assets_communication.py | 14 +- annotation/tests/test_category_crud.py | 198 +++++++++++++----- annotation/tests/test_cross_validation.py | 4 +- annotation/tests/test_delete_batch_tasks.py | 8 +- annotation/tests/test_distribution.py | 12 +- annotation/tests/test_finish_task.py | 47 +++-- .../tests/test_get_accumulated_revisions.py | 4 +- ..._get_annotation_for_particular_revision.py | 8 +- annotation/tests/test_get_child_categories.py | 8 +- annotation/tests/test_get_job.py | 12 +- annotation/tests/test_get_job_files.py | 10 +- .../tests/test_get_jobs_info_by_files.py | 4 +- annotation/tests/test_get_pages_info.py | 8 +- annotation/tests/test_get_revisions.py | 4 +- .../test_get_revisions_without_annotation.py | 4 +- annotation/tests/test_job_categories.py | 23 +- annotation/tests/test_microservices_search.py | 12 +- annotation/tests/test_post.py | 44 +++- annotation/tests/test_post_annotation.py | 90 ++++++-- annotation/tests/test_post_job.py | 13 +- .../tests/test_post_unassgined_files.py | 8 +- annotation/tests/test_search_kafka.py | 19 +- annotation/tests/test_start_job.py | 16 +- annotation/tests/test_tasks_crud_cr.py | 102 ++++++--- annotation/tests/test_tasks_crud_ud.py | 13 +- annotation/tests/test_update_job.py | 38 +++- annotation/tests/test_validation.py | 22 +- assets/alembic/env.py | 8 +- .../versions/afa33cc83d57_new_fields.py | 8 +- assets/assets/db/models.py | 16 +- assets/assets/db/service.py | 42 +++- assets/assets/routers/bonds_router.py | 12 +- assets/assets/routers/datasets_router.py | 20 +- assets/assets/routers/files_router.py | 17 +- assets/assets/routers/minio_router.py | 32 ++- assets/assets/routers/s3_router.py | 7 +- assets/assets/utils/common_utils.py | 12 +- assets/assets/utils/convert_service_utils.py | 8 +- assets/assets/utils/minio_utils.py | 38 +++- assets/assets/utils/s3_utils.py | 16 +- assets/tests/conftest.py | 32 ++- assets/tests/test_helpers.py | 16 +- assets/tests/test_main.py | 97 ++++++--- assets/tests/test_utils.py | 20 +- .../minio_service/minio_service/minio_api.py | 28 ++- common/model_api/model_api/common/models.py | 4 +- common/model_api/model_api/preprocessing.py | 12 +- .../model_api/model_api/storage_exchange.py | 8 +- common/model_api/model_api/utils.py | 12 +- common/model_api/tests/test_api.py | 12 +- common/model_api/tests/test_preprocessing.py | 12 +- common/model_api/tests/test_smoke.py | 8 +- .../page_rendering/page_rendering.py | 4 +- convert/convert/coco_export/convert.py | 23 +- convert/convert/coco_import/convert.py | 16 +- convert/convert/coco_import/import_service.py | 8 +- convert/convert/config.py | 4 +- .../badgerdoc_format/annotation_converter.py | 13 +- .../annotation_converter_practic.py | 8 +- .../badgerdoc_format/pdf_renderer.py | 14 +- .../badgerdoc_format/plain_text_converter.py | 4 +- .../badgerdoc_to_label_studio_use_case.py | 12 +- .../label_studio_to_badgerdoc_use_case.py | 76 ++++--- .../labelstudio_format/label_studio_format.py | 12 +- convert/convert/models/coco.py | 4 +- convert/convert/routers/coco.py | 8 +- convert/convert/routers/label_studio.py | 4 +- convert/convert/utils/json_utils.py | 12 +- convert/convert/utils/render_pdf_page.py | 12 +- convert/convert/utils/s3_utils.py | 20 +- .../tests/test_label_studio/test_export.py | 15 +- .../tests/test_label_studio/test_import.py | 8 +- .../test_label_studio/test_text_wrapper.py | 5 +- dev_runner/dev_runner/runners/base_runner.py | 3 +- dev_runner/start.py | 4 +- jobs/alembic/env.py | 10 +- jobs/alembic/versions/3f5b2d199d38_.py | 4 +- jobs/alembic/versions/7511c6790067_.py | 12 +- jobs/alembic/versions/9229e70d2791_.py | 12 +- ...add_start_manual_job_automatically_flag.py | 5 +- ...0dd492b17f_add_extensive_coverage_param.py | 4 +- jobs/jobs/create_job_funcs.py | 13 +- jobs/jobs/db_service.py | 9 +- jobs/jobs/main.py | 18 +- jobs/jobs/schemas.py | 64 ++++-- jobs/jobs/utils.py | 34 ++- jobs/tests/conftest.py | 11 +- ...t_ExtractionWithAnnotationJob_workflows.py | 33 ++- .../test_args_validation.py | 36 +++- .../test_change_job-proxy_to_annotation.py | 12 +- .../test_API_functions/test_change_job.py | 8 +- .../test_API_functions/test_create_job.py | 8 +- .../test_other_API_functions.py | 20 +- .../test_API_functions/test_search_jobs.py | 20 +- jobs/tests/test_db.py | 12 +- jobs/tests/test_utils.py | 40 +++- lib/filter_lib/src/dict_parser.py | 4 +- lib/filter_lib/src/enum_generator.py | 4 +- lib/filter_lib/src/query_modificator.py | 28 ++- lib/filter_lib/src/schema_generator.py | 8 +- lib/filter_lib/tests/test_dict_parser.py | 12 +- lib/filter_lib/tests/test_enum_generator.py | 8 +- lib/filter_lib/tests/test_pagination.py | 4 +- lib/filter_lib/tests/test_query_modifier.py | 8 +- lib/filter_lib/tests/test_schema_generator.py | 4 +- lib/filter_lib/usage_example/app.py | 8 +- lib/tenants/src/dependency.py | 24 ++- lib/tenants/tests/conftest.py | 16 +- lib/tenants/tests/test_dependency_rs256.py | 4 +- lib/tenants/tests/test_schema.py | 4 +- models/alembic/env.py | 4 +- ...4fd362de_add_description_field_to_model.py | 4 +- .../5c3092bc3517_add_columns_to_basement.py | 8 +- ...a_added_archive_field_to_training_model.py | 4 +- .../versions/683f401ed33e_create_tables.py | 8 +- .../826680104247_pod_limits_column.py | 7 +- .../8fd15e9edd28_pod_cpu_limits_change.py | 6 +- ...eff4c79fd3_modify_basement_and_training.py | 4 +- ...add_latest_and_version_columns_to_model.py | 4 +- ...85a73c2_set_basement_concurrency_limits.py | 6 +- ...9f68f00d4_add_field_type_to_table_model.py | 4 +- models/models/colab_ssh_utils.py | 4 +- models/models/crud.py | 4 +- models/models/errors.py | 16 +- models/models/routers/basements_routers.py | 4 +- .../models/routers/deployed_models_routers.py | 4 +- models/models/routers/models_routers.py | 37 +++- models/models/routers/training_routers.py | 16 +- models/models/schemas.py | 20 +- models/models/utils.py | 8 +- models/tests/conftest.py | 8 +- models/tests/test_basement_routers.py | 18 +- models/tests/test_colab_start_training.py | 12 +- models/tests/test_crud.py | 8 +- models/tests/test_models_routers.py | 16 +- models/tests/test_schemas.py | 16 +- models/tests/test_trainings_routers.py | 10 +- models/tests/test_utils.py | 21 +- pipelines/alembic/env.py | 4 +- ...5e65cf34b_fix_default_type_to_inference.py | 6 +- pipelines/alembic/versions/29f072fb5c9c_.py | 4 +- .../alembic/versions/5fd9d1fdcf5b_init.py | 4 +- ..._add_original_pipeline_id_and_is_latest.py | 4 +- ...69_add_type_description_and_summary_to_.py | 8 +- ...aebbddd8_change_pipeline_version_to_int.py | 12 +- ...dd_parent_step_and_tenant_to_execution_.py | 6 +- pipelines/pipelines/app.py | 12 +- pipelines/pipelines/config.py | 3 +- pipelines/pipelines/db/logger.py | 16 +- pipelines/pipelines/db/models.py | 16 +- pipelines/pipelines/db/service.py | 32 ++- pipelines/pipelines/execution.py | 54 +++-- pipelines/pipelines/http_utils.py | 8 +- pipelines/pipelines/kafka_utils.py | 4 +- pipelines/pipelines/pipeline_runner.py | 7 +- pipelines/pipelines/result_processing.py | 16 +- pipelines/pipelines/schemas.py | 24 ++- pipelines/pipelines/service_token.py | 8 +- pipelines/tests/conftest.py | 8 +- pipelines/tests/db/test_logger.py | 6 +- pipelines/tests/db/test_service.py | 65 ++++-- pipelines/tests/test_app.py | 12 +- pipelines/tests/test_execution.py | 34 ++- pipelines/tests/test_http_utils.py | 4 +- pipelines/tests/test_result_processing.py | 66 ++++-- pipelines/tests/test_schemas.py | 16 +- pipelines/tests/test_webhooks.py | 3 +- pipelines/tests/testing_data.py | 3 +- processing/alembic/env.py | 8 +- .../processing/health_check_easy_ocr.py | 12 +- processing/processing/main.py | 18 +- processing/processing/schema.py | 4 +- .../processing/send_preprocess_results.py | 5 +- processing/processing/tasks.py | 12 +- processing/processing/text_merge.py | 4 +- .../processing/third_party_code/table.py | 12 +- processing/processing/utils/aiohttp_utils.py | 8 +- processing/processing/utils/logger.py | 3 +- processing/processing/utils/minio_utils.py | 10 +- processing/processing/utils/utils.py | 12 +- .../tests/integration/test_integration.py | 12 +- processing/tests/test_assets_status.py | 5 +- processing/tests/test_text_merge.py | 12 +- processing/tests/test_utils/test_utils.py | 25 ++- scheduler/alembic/env.py | 4 +- scheduler/alembic/versions/0cadbdb7f0ea_.py | 4 +- scheduler/alembic/versions/449be82736bd_.py | 4 +- scheduler/scheduler/app.py | 4 +- scheduler/scheduler/db/models.py | 8 +- scheduler/scheduler/db/service.py | 14 +- scheduler/scheduler/heartbeat.py | 16 +- scheduler/scheduler/runner.py | 8 +- scheduler/tests/test_heartbeat.py | 8 +- scheduler/tests/test_service.py | 4 +- search/search/config.py | 4 +- search/search/es.py | 20 +- search/search/harvester.py | 12 +- search/search/main.py | 12 +- search/search/schemas/facets.py | 36 +++- search/search/schemas/pieces.py | 32 ++- search/tests/conftest.py | 16 +- search/tests/test_facets.py | 18 +- search/tests/test_get.py | 19 +- search/tests/test_harvester.py | 19 +- search/tests/test_indexation_endpoint.py | 12 +- search/tests/test_pieces.py | 42 +++- taxonomy/alembic/env.py | 6 +- ...ecbed_add_association_taxonomy_category.py | 8 +- .../versions/bdea8a93cafe_first_revision.py | 16 +- taxonomy/documentation/update_docs.py | 4 +- taxonomy/taxonomy/errors.py | 4 +- taxonomy/taxonomy/schemas/taxon.py | 4 +- taxonomy/taxonomy/schemas/taxonomy.py | 8 +- taxonomy/taxonomy/taxon/services.py | 28 ++- taxonomy/taxonomy/taxonomy/resources.py | 28 ++- taxonomy/taxonomy/taxonomy/services.py | 12 +- taxonomy/tests/conftest.py | 33 ++- taxonomy/tests/test_taxon_crud.py | 58 +++-- users/tests/keycloak/test_query.py | 8 +- users/tests/keycloak/test_utils.py | 6 +- users/tests/test_main.py | 75 +++++-- users/users/config.py | 4 +- users/users/keycloak/query.py | 46 ++-- users/users/keycloak/resources.py | 4 +- users/users/main.py | 36 +++- users/users/schemas.py | 4 +- 267 files changed, 3436 insertions(+), 1243 deletions(-) diff --git a/annotation/alembic/env.py b/annotation/alembic/env.py index 19c1480c4..79acffc7c 100644 --- a/annotation/alembic/env.py +++ b/annotation/alembic/env.py @@ -1,11 +1,11 @@ import os from logging.config import fileConfig -from annotation.database import SQLALCHEMY_DATABASE_URL -from annotation.utils import get_test_db_url from sqlalchemy import engine_from_config, pool from alembic import context # type: ignore +from annotation.database import SQLALCHEMY_DATABASE_URL +from annotation.utils import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -30,7 +30,9 @@ if not os.getenv("USE_TEST_DB"): config.set_main_option("sqlalchemy.url", SQLALCHEMY_DATABASE_URL) else: - config.set_main_option("sqlalchemy.url", get_test_db_url(SQLALCHEMY_DATABASE_URL)) + config.set_main_option( + "sqlalchemy.url", get_test_db_url(SQLALCHEMY_DATABASE_URL) + ) def run_migrations_offline(): diff --git a/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py b/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py index 5dac3a82e..f48b2e818 100644 --- a/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py +++ b/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py @@ -18,10 +18,14 @@ def upgrade(): - op.add_column("categories", sa.Column("editor_url", sa.VARCHAR(), nullable=True)) + op.add_column( + "categories", sa.Column("editor_url", sa.VARCHAR(), nullable=True) + ) op.add_column( "categories", - sa.Column("data_attributes", postgresql.ARRAY(sa.VARCHAR()), nullable=True), + sa.Column( + "data_attributes", postgresql.ARRAY(sa.VARCHAR()), nullable=True + ), ) diff --git a/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py b/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py index 896fd34be..e01797b0b 100644 --- a/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py +++ b/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py @@ -20,7 +20,9 @@ def upgrade(): - op.drop_constraint("categories_parent_fkey", "categories", type_="foreignkey") + op.drop_constraint( + "categories_parent_fkey", "categories", type_="foreignkey" + ) op.drop_constraint( "association_jobs_categories_category_id_fkey", "association_jobs_categories", @@ -83,14 +85,18 @@ def upgrade(): new_column_name="parent", server_default="null", ) - op.alter_column("categories", "id_temp", new_column_name="id", nullable=False) + op.alter_column( + "categories", "id_temp", new_column_name="id", nullable=False + ) op.alter_column( "association_jobs_categories", "category_id_temp", new_column_name="category_id", nullable=False, ) - op.create_check_constraint("is_not_self_parent", "categories", "id != parent") + op.create_check_constraint( + "is_not_self_parent", "categories", "id != parent" + ) op.create_index( op.f("ix_categories_parent"), "categories", @@ -108,7 +114,9 @@ def check_exist_sequence(): def clear_tables(): conn = op.get_bind() inspector = Inspector.from_engine(conn) - tables = [data[0] for data in inspector.get_sorted_table_and_fkc_names()[-2::-1]] + tables = [ + data[0] for data in inspector.get_sorted_table_and_fkc_names()[-2::-1] + ] tables.remove("alembic_version") for table in tables: conn.execute(f"DELETE FROM {table}") @@ -118,7 +126,9 @@ def downgrade(): clear_tables() if not check_exist_sequence(): op.execute(sa.schema.CreateSequence(categories_id_seq)) - op.drop_constraint("categories_parent_fkey", "categories", type_="foreignkey") + op.drop_constraint( + "categories_parent_fkey", "categories", type_="foreignkey" + ) op.drop_constraint( "association_jobs_categories_category_id_fkey", "association_jobs_categories", @@ -175,14 +185,18 @@ def downgrade(): "parent_temp", new_column_name="parent", ) - op.alter_column("categories", "id_temp", new_column_name="id", nullable=False) + op.alter_column( + "categories", "id_temp", new_column_name="id", nullable=False + ) op.alter_column( "association_jobs_categories", "category_id_temp", new_column_name="category_id", nullable=False, ) - op.create_check_constraint("is_not_self_parent", "categories", "id != parent") + op.create_check_constraint( + "is_not_self_parent", "categories", "id != parent" + ) op.create_index( op.f("ix_categories_parent"), "categories", diff --git a/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py b/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py index bb3a3ebb0..2fbb85886 100644 --- a/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py +++ b/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py @@ -34,7 +34,9 @@ def downgrade(): nullable=False, ), sa.Column("job_id", sa.INTEGER(), autoincrement=False, nullable=False), - sa.Column("task_id", sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column( + "task_id", sa.INTEGER(), autoincrement=False, nullable=False + ), sa.Column( "agreement_score", postgresql.JSONB(astext_type=sa.Text()), diff --git a/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py b/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py index a36f49140..2a3d17f77 100644 --- a/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py +++ b/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py @@ -75,7 +75,9 @@ def upgrade(): ondelete="cascade", ) - op.drop_constraint(JOB_OWNER_JOB_ID_FK, "association_job_owner", type_="foreignkey") + op.drop_constraint( + JOB_OWNER_JOB_ID_FK, "association_job_owner", type_="foreignkey" + ) op.drop_constraint( JOB_OWNER_USER_ID_FK, "association_job_owner", type_="foreignkey" ) @@ -133,7 +135,9 @@ def upgrade(): ondelete="cascade", ) - op.execute("ALTER TYPE validation_type ADD VALUE IF NOT EXISTS 'validation_only'") + op.execute( + "ALTER TYPE validation_type ADD VALUE IF NOT EXISTS 'validation_only'" + ) class ValidationSchema(str, Enum): @@ -146,7 +150,9 @@ def downgrade(): Remove validation_only job type and rollback fk constraints """ - op.alter_column("jobs", "validation_type", type_=sa.VARCHAR(), server_default=None) + op.alter_column( + "jobs", "validation_type", type_=sa.VARCHAR(), server_default=None + ) op.execute("DROP TYPE validation_type;") op.execute("DELETE FROM jobs " "WHERE validation_type = 'validation_only'") @@ -161,7 +167,9 @@ def downgrade(): ) op.drop_constraint(TASKS_JOB_ID_FK, "tasks", type_="foreignkey") - op.create_foreign_key(TASKS_JOB_ID_FK, "tasks", "jobs", ["job_id"], ["job_id"]) + op.create_foreign_key( + TASKS_JOB_ID_FK, "tasks", "jobs", ["job_id"], ["job_id"] + ) op.drop_constraint( JOB_VALIDATOR_USER_ID_FK, @@ -191,7 +199,9 @@ def downgrade(): op.drop_constraint( JOB_OWNER_USER_ID_FK, "association_job_owner", type_="foreignkey" ) - op.drop_constraint(JOB_OWNER_JOB_ID_FK, "association_job_owner", type_="foreignkey") + op.drop_constraint( + JOB_OWNER_JOB_ID_FK, "association_job_owner", type_="foreignkey" + ) op.create_foreign_key( JOB_OWNER_USER_ID_FK, "association_job_owner", diff --git a/annotation/alembic/versions/3a083a1fbba0_first_revision.py b/annotation/alembic/versions/3a083a1fbba0_first_revision.py index 6ec1fd6bb..0020e61d2 100644 --- a/annotation/alembic/versions/3a083a1fbba0_first_revision.py +++ b/annotation/alembic/versions/3a083a1fbba0_first_revision.py @@ -29,7 +29,9 @@ def upgrade(): if "annotators" not in tables: op.create_table( "annotators", - sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column( + "user_id", postgresql.UUID(as_uuid=True), nullable=False + ), sa.Column("default_load", sa.INTEGER(), nullable=False), sa.PrimaryKeyConstraint("user_id"), ) # Here and below this 'if' condition creates tables only for new db @@ -46,7 +48,9 @@ def upgrade(): nullable=True, ), sa.Column("is_link", sa.BOOLEAN(), nullable=False), - sa.ForeignKeyConstraint(["parent"], ["categories.id"], ondelete="cascade"), + sa.ForeignKeyConstraint( + ["parent"], ["categories.id"], ondelete="cascade" + ), sa.PrimaryKeyConstraint("id"), ) op.create_check_constraint( @@ -87,8 +91,12 @@ def upgrade(): ), sa.Column("file_id", sa.INTEGER(), nullable=False), sa.Column("job_id", sa.INTEGER(), nullable=False), - sa.Column("pages", postgresql.JSON(astext_type=sa.Text()), nullable=False), - sa.Column("validated", postgresql.ARRAY(sa.INTEGER()), nullable=False), + sa.Column( + "pages", postgresql.JSON(astext_type=sa.Text()), nullable=False + ), + sa.Column( + "validated", postgresql.ARRAY(sa.INTEGER()), nullable=False + ), sa.Column("tenant", sa.VARCHAR(), nullable=False), sa.CheckConstraint( '("user" IS NULL AND pipeline IS NOT NULL) OR ' @@ -103,7 +111,9 @@ def upgrade(): if "association_job_annotator" not in tables: op.create_table( "association_job_annotator", - sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column( + "user_id", postgresql.UUID(as_uuid=True), nullable=False + ), sa.Column("job_id", sa.INTEGER(), nullable=False), sa.ForeignKeyConstraint( ["job_id"], @@ -150,7 +160,9 @@ def upgrade(): sa.Column("file_id", sa.INTEGER(), nullable=False), sa.Column("pages", postgresql.ARRAY(sa.INTEGER()), nullable=False), sa.Column("job_id", sa.INTEGER(), nullable=False), - sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column( + "user_id", postgresql.UUID(as_uuid=True), nullable=False + ), sa.Column("is_validation", sa.BOOLEAN(), nullable=False), sa.Column( "status", diff --git a/annotation/alembic/versions/4272d0a43ff1_agreement_score.py b/annotation/alembic/versions/4272d0a43ff1_agreement_score.py index 91ea17310..b48c75b06 100644 --- a/annotation/alembic/versions/4272d0a43ff1_agreement_score.py +++ b/annotation/alembic/versions/4272d0a43ff1_agreement_score.py @@ -21,7 +21,9 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_table( "agreement_score", - sa.Column("annotator_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column( + "annotator_id", postgresql.UUID(as_uuid=True), nullable=False + ), sa.Column("job_id", sa.INTEGER(), nullable=False), sa.Column("task_id", sa.INTEGER(), nullable=False), sa.Column( @@ -33,7 +35,9 @@ def upgrade(): ["annotator_id"], ["users.user_id"], ), - sa.ForeignKeyConstraint(["job_id"], ["jobs.job_id"], ondelete="cascade"), + sa.ForeignKeyConstraint( + ["job_id"], ["jobs.job_id"], ondelete="cascade" + ), sa.ForeignKeyConstraint(["task_id"], ["tasks.id"], ondelete="cascade"), sa.PrimaryKeyConstraint("task_id"), ) diff --git a/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py b/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py index 539b230cb..58c611a5f 100644 --- a/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py +++ b/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py @@ -21,7 +21,9 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column( "categories", - sa.Column("tree", sqlalchemy_utils.types.ltree.LtreeType(), nullable=True), + sa.Column( + "tree", sqlalchemy_utils.types.ltree.LtreeType(), nullable=True + ), ) op.create_index( "index_tree", @@ -35,6 +37,8 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_index("index_tree", table_name="categories", postgresql_using="gist") + op.drop_index( + "index_tree", table_name="categories", postgresql_using="gist" + ) op.drop_column("categories", "tree") # ### end Alembic commands ### diff --git a/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py b/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py index d7c36853a..08d6dc0be 100644 --- a/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py +++ b/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py @@ -6,9 +6,9 @@ """ import sqlalchemy as sa -from annotation.models import ValidationSchema from alembic import op +from annotation.models import ValidationSchema # revision identifiers, used by Alembic. revision = "71095b8e6343" @@ -25,7 +25,9 @@ def upgrade(): for v in enum_keys_to_add: op.execute(f"ALTER TYPE {enum_name} ADD VALUE '{v}'") - op.add_column("jobs", sa.Column("extensive_coverage", sa.INTEGER(), nullable=True)) + op.add_column( + "jobs", sa.Column("extensive_coverage", sa.INTEGER(), nullable=True) + ) # ### end Alembic commands ### diff --git a/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py b/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py index 5ceae3194..a96504e7c 100644 --- a/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py +++ b/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py @@ -23,7 +23,9 @@ def upgrade() -> None: sa.Column("task_from", sa.INTEGER(), nullable=False), sa.Column("task_to", sa.INTEGER(), nullable=False), sa.Column("agreement_metric", sa.FLOAT(), nullable=False), - sa.ForeignKeyConstraint(["task_from"], ["tasks.id"], ondelete="cascade"), + sa.ForeignKeyConstraint( + ["task_from"], ["tasks.id"], ondelete="cascade" + ), sa.ForeignKeyConstraint(["task_to"], ["tasks.id"], ondelete="cascade"), sa.PrimaryKeyConstraint("task_from", "task_to"), ) diff --git a/annotation/alembic/versions/89276b8ebe84_remove_annotateddoc_constraint.py b/annotation/alembic/versions/89276b8ebe84_remove_annotateddoc_constraint.py index 06e90ecd3..190041aa1 100644 --- a/annotation/alembic/versions/89276b8ebe84_remove_annotateddoc_constraint.py +++ b/annotation/alembic/versions/89276b8ebe84_remove_annotateddoc_constraint.py @@ -19,7 +19,9 @@ def upgrade(): op.drop_constraint( "annotated_docs_task_id_fkey", "annotated_docs", type_="foreignkey" ) - op.drop_constraint("annotated_docs_user_fkey", "annotated_docs", type_="foreignkey") + op.drop_constraint( + "annotated_docs_user_fkey", "annotated_docs", type_="foreignkey" + ) op.create_foreign_key( None, "annotated_docs", @@ -39,14 +41,18 @@ def upgrade(): def downgrade(): - op.execute('DELETE FROM annotated_docs WHERE "user" IS NULL AND pipeline IS NULL') + op.execute( + 'DELETE FROM annotated_docs WHERE "user" IS NULL AND pipeline IS NULL' + ) op.create_check_constraint( "annotated_docs_check", "annotated_docs", '("user" IS NULL AND pipeline IS NOT NULL) OR ' '("user" IS NOT NULL AND pipeline IS NULL)', ) - op.drop_constraint("annotated_docs_user_fkey", "annotated_docs", type_="foreignkey") + op.drop_constraint( + "annotated_docs_user_fkey", "annotated_docs", type_="foreignkey" + ) op.drop_constraint( "annotated_docs_task_id_fkey", "annotated_docs", type_="foreignkey" ) diff --git a/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py b/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py index bf4d9ffe6..c16f0523e 100644 --- a/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py +++ b/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py @@ -35,7 +35,9 @@ def downgrade(): op.alter_column( "categories", "data_attributes", - existing_type=postgresql.ARRAY(postgresql.JSONB(astext_type=sa.Text())), + existing_type=postgresql.ARRAY( + postgresql.JSONB(astext_type=sa.Text()) + ), type_=postgresql.ARRAY(sa.VARCHAR()), existing_nullable=True, ) diff --git a/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py b/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py index 26b9a59dc..f26495237 100644 --- a/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py +++ b/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py @@ -27,7 +27,9 @@ class JobStatusEnumSchema(str, Enum): def upgrade(): - job_status = postgresql.ENUM(JobStatusEnumSchema, name="jobstatusenumschema") + job_status = postgresql.ENUM( + JobStatusEnumSchema, name="jobstatusenumschema" + ) job_status.create(op.get_bind(), checkfirst=True) op.add_column( "jobs", diff --git a/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py b/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py index 8b135495d..52b44dd70 100644 --- a/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py +++ b/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py @@ -26,7 +26,9 @@ class CategoryTypeSchema(str, Enum): def upgrade(): - category_type = postgresql.ENUM(CategoryTypeSchema, name="categorytypeschema") + category_type = postgresql.ENUM( + CategoryTypeSchema, name="categorytypeschema" + ) category_type.create(op.get_bind(), checkfirst=True) op.add_column( "categories", @@ -36,16 +38,22 @@ def upgrade(): type_=category_type, ), ) - op.execute("UPDATE categories SET type = CAST ('box' AS categorytypeschema)") + op.execute( + "UPDATE categories SET type = CAST ('box' AS categorytypeschema)" + ) op.alter_column("categories", "type", nullable=False) op.drop_index("ix_categories_is_link", table_name="categories") op.drop_column("categories", "is_link") def downgrade(): - op.add_column("categories", sa.Column("is_link", sa.BOOLEAN(), nullable=True)) + op.add_column( + "categories", sa.Column("is_link", sa.BOOLEAN(), nullable=True) + ) op.execute("UPDATE categories SET is_link = 'false'") op.alter_column("categories", "is_link", nullable=False) - op.create_index("ix_categories_is_link", "categories", ["is_link"], unique=False) + op.create_index( + "ix_categories_is_link", "categories", ["is_link"], unique=False + ) op.drop_column("categories", "type") op.execute("DROP TYPE categorytypeschema;") diff --git a/annotation/annotation/annotations/main.py b/annotation/annotation/annotations/main.py index 37cf961b4..640ddef62 100644 --- a/annotation/annotation/annotations/main.py +++ b/annotation/annotation/annotations/main.py @@ -6,6 +6,14 @@ from uuid import UUID import boto3 +from dotenv import find_dotenv, load_dotenv +from fastapi import HTTPException +from kafka import KafkaProducer +from kafka.errors import KafkaError +from sqlalchemy import asc +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session + from annotation import logger from annotation.kafka_client import KAFKA_BOOTSTRAP_SERVER, KAFKA_SEARCH_TOPIC from annotation.kafka_client import producers as kafka_producers @@ -17,13 +25,6 @@ ParticularRevisionSchema, RevisionLink, ) -from dotenv import find_dotenv, load_dotenv -from fastapi import HTTPException -from kafka import KafkaProducer -from kafka.errors import KafkaError -from sqlalchemy import asc -from sqlalchemy.exc import IntegrityError -from sqlalchemy.orm import Session load_dotenv(find_dotenv()) ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL") @@ -134,7 +135,9 @@ def upload_pages_to_minio( for page in pages: json_page = json.dumps(page.dict()) path_to_object = f"{s3_path}/{pages_sha[str(page.page_num)]}.json" - upload_json_to_minio(json_page, path_to_object, bucket_name, s3_resource) + upload_json_to_minio( + json_page, path_to_object, bucket_name, s3_resource + ) def upload_json_to_minio( @@ -234,7 +237,9 @@ def create_manifest_json( manifest = row_to_dict(doc) redundant_keys = ("task_id", "file_id", "tenant", "categories") manifest = { - key: value for key, value in manifest.items() if key not in redundant_keys + key: value + for key, value in manifest.items() + if key not in redundant_keys } manifest["pages"] = all_pages manifest["validated"] = list(validated) @@ -249,7 +254,9 @@ def create_manifest_json( ] manifest_json = json.dumps(manifest) - upload_json_to_minio(manifest_json, manifest_path, bucket_name, s3_resource) + upload_json_to_minio( + manifest_json, manifest_path, bucket_name, s3_resource + ) def construct_annotated_doc( @@ -415,7 +422,8 @@ def check_docs_identity( latest_doc is not None and latest_doc.pages == new_doc.pages and set(latest_doc.validated) == new_doc.validated - and set(latest_doc.failed_validation_pages) == new_doc.failed_validation_pages + and set(latest_doc.failed_validation_pages) + == new_doc.failed_validation_pages and latest_doc.categories == new_doc.categories ) @@ -547,7 +555,9 @@ def find_all_revisions_pages( } """ pages = {} - revisions = [AnnotatedDocSchema.from_orm(revision) for revision in revisions] + revisions = [ + AnnotatedDocSchema.from_orm(revision) for revision in revisions + ] for revision in revisions: revision.pages = { int(key): value @@ -596,7 +606,9 @@ def find_latest_revision_pages( } """ pages = {} - revisions = [AnnotatedDocSchema.from_orm(revision) for revision in revisions] + revisions = [ + AnnotatedDocSchema.from_orm(revision) for revision in revisions + ] for revision in revisions: revision.pages = { int(key): value @@ -832,7 +844,9 @@ def accumulate_pages_info( specific_pages: Set[int] = None, with_page_hash: bool = False, unique_status: bool = False, -) -> Tuple[Set[int], Set[int], Set[int], Set[int], List[str], Optional[AnnotatedDoc]]: +) -> Tuple[ + Set[int], Set[int], Set[int], Set[int], List[str], Optional[AnnotatedDoc] +]: """ Get pages, that have been validated, marked as failed, annotated and not processed in all given revisions (revisions are sorted in asc order). @@ -855,7 +869,9 @@ def accumulate_pages_info( all_annotated.update(revision.pages) for status, attr in attr_map.items(): - latest_status.update({int(i): status for i in getattr(revision, attr)}) + latest_status.update( + {int(i): status for i in getattr(revision, attr)} + ) # if there is specific revision, where we need to stop, # we will stop here @@ -903,7 +919,9 @@ def accumulate_pages_info( annotated_list = all_annotated if with_page_hash: - annotated = {str(page): all_annotated[str(page)] for page in annotated_list} + annotated = { + str(page): all_annotated[str(page)] for page in annotated_list + } else: annotated = set(map(int, annotated_list)) @@ -959,7 +977,8 @@ def check_task_pages( for array_name, pgs in error_mapping.items(): if pgs: err_msg += ( - f"Pages {pgs} from {array_name} array " "do not belong to the task. " + f"Pages {pgs} from {array_name} array " + "do not belong to the task. " ) if err_msg: @@ -978,7 +997,9 @@ def _init_search_annotation_producer(): ) return producer except KafkaError as error: # KafkaError is parent of all kafka errors - logger_.warning(f"Error occurred during kafka producer creating: {error}") + logger_.warning( + f"Error occurred during kafka producer creating: {error}" + ) def add_search_annotation_producer() -> KafkaProducer: @@ -987,10 +1008,13 @@ def add_search_annotation_producer() -> KafkaProducer: return search_annotation_producer -def send_annotation_kafka_message(job_id: int, file_id: int, tenant: str) -> None: +def send_annotation_kafka_message( + job_id: int, file_id: int, tenant: str +) -> None: # if startup failed, try to recreate it search_annotation_producer = ( - kafka_producers.get("search_annotation") or add_search_annotation_producer() + kafka_producers.get("search_annotation") + or add_search_annotation_producer() ) if search_annotation_producer: search_annotation_producer.send( diff --git a/annotation/annotation/annotations/resources.py b/annotation/annotation/annotations/resources.py index 0a43e065a..5071acd66 100644 --- a/annotation/annotation/annotations/resources.py +++ b/annotation/annotation/annotations/resources.py @@ -1,6 +1,11 @@ from typing import Dict, List, Optional, Set from uuid import UUID +from fastapi import APIRouter, Depends, HTTPException, Path, Query, status +from sqlalchemy import and_, desc +from sqlalchemy.orm import Session +from tenant_dependency import TenantData + from annotation.database import get_db from annotation.errors import NoSuchRevisionsError from annotation.microservice_communication.assets_communication import ( @@ -22,10 +27,6 @@ ) from annotation.tags import ANNOTATION_TAG, JOBS_TAG, REVISION_TAG from annotation.tasks import update_task_status -from fastapi import APIRouter, Depends, HTTPException, Path, Query, status -from sqlalchemy import and_, desc -from sqlalchemy.orm import Session -from tenant_dependency import TenantData from ..models import AnnotatedDoc, File, Job, ManualAnnotationTask from ..token_dependency import TOKEN @@ -88,7 +89,8 @@ def post_annotation_by_user( if doc.user is None: raise HTTPException( status_code=400, - detail="Field user should not be null, " "when saving annotation by user.", + detail="Field user should not be null, " + "when saving annotation by user.", ) check_null_fields(doc) @@ -115,7 +117,9 @@ def post_annotation_by_user( f"User_id associated with task: [{task.user_id}].", ) - if not task.is_validation and (doc.validated or doc.failed_validation_pages): + if not task.is_validation and ( + doc.validated or doc.failed_validation_pages + ): raise HTTPException( status_code=400, detail="This task is for annotation. " @@ -327,7 +331,11 @@ def get_jobs_by_file_id( x_current_tenant: str = X_CURRENT_TENANT_HEADER, db: Session = Depends(get_db), ): - db_file = db.query(File).filter_by(file_id=file_id, tenant=x_current_tenant).first() + db_file = ( + db.query(File) + .filter_by(file_id=file_id, tenant=x_current_tenant) + .first() + ) if not db_file: raise HTTPException( status_code=404, @@ -342,7 +350,10 @@ def get_jobs_by_file_id( .distinct(AnnotatedDoc.job_id, AnnotatedDoc.pipeline) .all() ) - return [{"job_id": job.job_id, "is_manual": not bool(job.pipeline)} for job in jobs] + return [ + {"job_id": job.job_id, "is_manual": not bool(job.pipeline)} + for job in jobs + ] @router.get( @@ -374,7 +385,10 @@ def get_latest_revision_by_user( if user_id: filters.append(AnnotatedDoc.user == user_id) revisions = ( - db.query(AnnotatedDoc).filter(and_(*filters)).order_by(AnnotatedDoc.date).all() + db.query(AnnotatedDoc) + .filter(and_(*filters)) + .order_by(AnnotatedDoc.date) + .all() ) pages = find_latest_revision_pages(revisions, page_numbers) if not pages: @@ -405,7 +419,8 @@ def get_annotations_up_to_given_revision( user_id: Optional[UUID] = Query( None, example="1843c251-564b-4c2f-8d42-c61fdac369a1", - description="Required in case job validation type is extensive_" "coverage", + description="Required in case job validation type is extensive_" + "coverage", ), ): job: Job = db.query(Job).filter(Job.job_id == job_id).first() @@ -422,7 +437,10 @@ def get_annotations_up_to_given_revision( if job.validation_type == ValidationSchema.extensive_coverage: filters.append(AnnotatedDoc.user.in_((user_id, None))) revisions = ( - db.query(AnnotatedDoc).filter(*filters).order_by(AnnotatedDoc.date.asc()).all() + db.query(AnnotatedDoc) + .filter(*filters) + .order_by(AnnotatedDoc.date.asc()) + .all() ) if not revisions: @@ -524,7 +542,8 @@ def get_annotation_for_given_revision( responses={ 500: {"model": ConnectionErrorSchema}, }, - summary="Get all users revisions (or pipeline revision) " "for particular pages.", + summary="Get all users revisions (or pipeline revision) " + "for particular pages.", tags=[REVISION_TAG, ANNOTATION_TAG], ) def get_all_revisions( @@ -535,7 +554,8 @@ def get_all_revisions( user_id: Optional[UUID] = Query( None, example="1843c251-564b-4c2f-8d42-c61fdac369a1", - description="Required in case job validation type is extensive_" "coverage", + description="Required in case job validation type is extensive_" + "coverage", ), db: Session = Depends(get_db), ): @@ -553,7 +573,10 @@ def get_all_revisions( if job.validation_type == ValidationSchema.extensive_coverage: filters.append(AnnotatedDoc.user.in_((user_id, None))) revisions = ( - db.query(AnnotatedDoc).filter(and_(*filters)).order_by(AnnotatedDoc.date).all() + db.query(AnnotatedDoc) + .filter(and_(*filters)) + .order_by(AnnotatedDoc.date) + .all() ) pages = find_all_revisions_pages(revisions, page_numbers) if not pages: diff --git a/annotation/annotation/categories/resources.py b/annotation/annotation/categories/resources.py index 3a2b11d35..8efeb687f 100644 --- a/annotation/annotation/categories/resources.py +++ b/annotation/annotation/categories/resources.py @@ -1,5 +1,10 @@ from typing import List, Union +from fastapi import APIRouter, Depends, HTTPException, Path, Response, status +from filter_lib import Page +from sqlalchemy.orm import Session +from sqlalchemy_filters.exceptions import BadFilterFormat + from annotation.database import get_db from annotation.errors import NoSuchCategoryError from annotation.filters import CategoryFilter @@ -16,10 +21,6 @@ SubCategoriesOutSchema, ) from annotation.tags import CATEGORIES_TAG -from fastapi import APIRouter, Depends, HTTPException, Path, Response, status -from filter_lib import Page -from sqlalchemy.orm import Session -from sqlalchemy_filters.exceptions import BadFilterFormat from .services import ( add_category_db, @@ -73,7 +74,9 @@ def fetch_category( x_current_tenant: str = X_CURRENT_TENANT_HEADER, ) -> CategoryResponseSchema: category_db = fetch_category_db(db, category_id, x_current_tenant) - category_response = insert_category_tree(db, category_db, tenant=x_current_tenant) + category_response = insert_category_tree( + db, category_db, tenant=x_current_tenant + ) return category_response @@ -150,7 +153,9 @@ def update_category( """ Updates category by id and returns updated category. """ - category_db = update_category_db(db, category_id, query.dict(), x_current_tenant) + category_db = update_category_db( + db, category_id, query.dict(), x_current_tenant + ) if not category_db: raise NoSuchCategoryError("Cannot update category parameters") return response_object_from_db(category_db) diff --git a/annotation/annotation/categories/services.py b/annotation/annotation/categories/services.py index 44cd3cc85..c81adbb2b 100644 --- a/annotation/annotation/categories/services.py +++ b/annotation/annotation/categories/services.py @@ -1,6 +1,14 @@ import uuid from typing import Dict, List, Optional, Set, Tuple, Union +from cachetools import TTLCache, cached, keys +from filter_lib import Page, form_query, map_request_to_filter, paginate +from sqlalchemy import and_, null, or_ +from sqlalchemy.event import listens_for +from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import func +from sqlalchemy_utils import Ltree + from annotation import logger as app_logger from annotation.errors import ( CheckFieldError, @@ -15,13 +23,6 @@ CategoryORMSchema, CategoryResponseSchema, ) -from cachetools import TTLCache, cached, keys -from filter_lib import Page, form_query, map_request_to_filter, paginate -from sqlalchemy import and_, null, or_ -from sqlalchemy.event import listens_for -from sqlalchemy.orm import Session -from sqlalchemy.sql.expression import func -from sqlalchemy_utils import Ltree cache = TTLCache(maxsize=128, ttl=300) @@ -113,7 +114,9 @@ def response_object_from_db(category_db: Category) -> CategoryResponseSchema: return CategoryResponseSchema.parse_obj(category_orm) -def fetch_category_parents(db: Session, category_input: Category) -> List[Category]: +def fetch_category_parents( + db: Session, category_input: Category +) -> List[Category]: return ( db.query(Category) .filter(Category.tree.ancestor_of(category_input.tree)) @@ -122,7 +125,9 @@ def fetch_category_parents(db: Session, category_input: Category) -> List[Catego ) # remove self item from result -def fetch_category_children(db: Session, category_input: Category) -> List[Category]: +def fetch_category_children( + db: Session, category_input: Category +) -> List[Category]: return ( db.query(Category) .filter(Category.tree.descendant_of(category_input.tree)) @@ -147,7 +152,9 @@ def check_unique_category_field( def fetch_category_db(db: Session, category_id: str, tenant: str) -> Category: category = db.query(Category).get(category_id) if not category or category.tenant and category.tenant != tenant: - raise NoSuchCategoryError(f"Category with id: {category_id} doesn't exist") + raise NoSuchCategoryError( + f"Category with id: {category_id} doesn't exist" + ) return category @@ -190,7 +197,9 @@ def recursive_subcategory_search( if child_ids: child_categories.update(child_ids) for child_id in child_ids: - recursive_subcategory_search(db, child_id, root_id, child_categories) + recursive_subcategory_search( + db, child_id, root_id, child_categories + ) return child_categories @@ -260,7 +269,9 @@ def _get_leaves( return leaves -def _extract_category(path: str, categories: Dict[str, Category]) -> List[Category]: +def _extract_category( + path: str, categories: Dict[str, Category] +) -> List[Category]: return [categories[node] for node in path.split(".")[0:-1]] @@ -281,7 +292,8 @@ def _get_parents( uniq_cats = uniq_cats.union({tree.path for tree in cat.tree}) category_to_object = { - cat.id: cat for cat in fetch_bunch_categories_db(db, uniq_cats, tenant, job_id) + cat.id: cat + for cat in fetch_bunch_categories_db(db, uniq_cats, tenant, job_id) } for path in uniq_pathes: @@ -309,7 +321,9 @@ def _compose_response( { **CategoryORMSchema.from_orm(cat).dict(), "is_leaf": leaves.get(cat.id, False), - "parents": converted_parents.get(cat.tree.path, []) if cat.tree else [], + "parents": converted_parents.get(cat.tree.path, []) + if cat.tree + else [], } ) for cat in categories @@ -343,7 +357,9 @@ def filter_category_db( tenant: str, job_id: Optional[int] = None, ) -> Page[Union[CategoryResponseSchema, str, dict]]: - child_categories, pagination = _get_child_categories(db, request, tenant, job_id) + child_categories, pagination = _get_child_categories( + db, request, tenant, job_id + ) if request.filters and "distinct" in [ item.operator.value for item in request.filters @@ -391,7 +407,9 @@ def update_category_db( ) ex_parent_id = category.parent new_parent_id = update_query["parent"] - parent_db = db.query(Category).get(new_parent_id) if new_parent_id else None + parent_db = ( + db.query(Category).get(new_parent_id) if new_parent_id else None + ) if parent_db and parent_db.tenant not in [tenant, None]: raise ForeignKeyError("Category with this id doesn't exist.") diff --git a/annotation/annotation/distribution/main.py b/annotation/annotation/distribution/main.py index a2267b55d..37b66ba91 100644 --- a/annotation/annotation/distribution/main.py +++ b/annotation/annotation/distribution/main.py @@ -45,6 +45,8 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple, Union from uuid import UUID +from sqlalchemy.orm import Session + from annotation.jobs import create_user, read_user from annotation.microservice_communication.assets_communication import ( FilesForDistribution, @@ -52,7 +54,6 @@ from annotation.models import File, User from annotation.schemas import TaskStatusEnumSchema, ValidationSchema from annotation.tasks import create_tasks as create_db_tasks -from sqlalchemy.orm import Session MAX_PAGES = 50 @@ -132,7 +133,9 @@ def distribute( tasks.extend(annotation_tasks) if validation_type == ValidationSchema.cross: annotated_files_pages = find_annotated_pages(tasks) - job_validators = choose_validators_users(validation_type, annotators, validators) + job_validators = choose_validators_users( + validation_type, annotators, validators + ) if job_validators: validation_tasks = distribute_tasks( annotated_files_pages, @@ -194,7 +197,9 @@ def distribute_tasks_extensively( user_can_take_pages = min(len(pages), user_can_take_pages) pages_not_seen_by_user = sorted( set(pages).difference( - users_seen_pages[annotators[0]["user_id"]][file["file_id"]] + users_seen_pages[annotators[0]["user_id"]][ + file["file_id"] + ] ) ) @@ -214,9 +219,9 @@ def distribute_tasks_extensively( "deadline": deadline, } ) - users_seen_pages[annotators[0]["user_id"]][file["file_id"]].update( - set(pages_for_user) - ) + users_seen_pages[annotators[0]["user_id"]][ + file["file_id"] + ].update(set(pages_for_user)) pages = sorted(set(pages).difference(set(pages_for_user))) annotators[0]["pages_number"] -= len(pages_for_user) if annotators[0]["pages_number"] == 0: @@ -351,7 +356,9 @@ def find_users_share_loads( """ quantity = len(users) for user in users: - average_pages_deviation = users_overall_load - user["overall_load"] * quantity + average_pages_deviation = ( + users_overall_load - user["overall_load"] * quantity + ) average_deviation_coefficient = ( average_pages_deviation / (users_overall_load * quantity) if users_overall_load @@ -360,11 +367,15 @@ def find_users_share_loads( pages_deviation = average_deviation_coefficient * average_job_pages user_deviation_pages = average_job_pages + pages_deviation user["share_load"] = ( - user_deviation_pages / all_job_pages_sum if all_job_pages_sum else 1 + user_deviation_pages / all_job_pages_sum + if all_job_pages_sum + else 1 ) default_load_part = user["default_load"] / users_default_load user["share_load"] *= default_load_part - all_annotators_share_load = sum(annotator["share_load"] for annotator in users) + all_annotators_share_load = sum( + annotator["share_load"] for annotator in users + ) return all_annotators_share_load @@ -389,7 +400,9 @@ def distribute_whole_files( files_to_distribute = [ item for item in files if item["file_id"] not in annotated_files ] - files_for_task = find_equal_files(files_to_distribute, user["pages_number"]) + files_for_task = find_equal_files( + files_to_distribute, user["pages_number"] + ) create_tasks( tasks, files_for_task, @@ -399,7 +412,9 @@ def distribute_whole_files( tasks_status, deadline, ) - files_for_task = find_small_files(files_to_distribute, user["pages_number"]) + files_for_task = find_small_files( + files_to_distribute, user["pages_number"] + ) create_tasks( tasks, files_for_task, @@ -428,7 +443,8 @@ def find_files_for_task( file_for_task = next( x for x in files - if x["pages_number"] == pages and x["file_id"] not in distributed_files + if x["pages_number"] == pages + and x["file_id"] not in distributed_files ) files_for_task.append(file_for_task) distributed_files.append(file_for_task["file_id"]) @@ -572,7 +588,9 @@ def distribute_annotation_partial_files( annotators[0]["pages_number"] -= 1 if pages: full_tasks = len(pages) // MAX_PAGES - tasks_number = full_tasks + 1 if len(pages) % MAX_PAGES else full_tasks + tasks_number = ( + full_tasks + 1 if len(pages) % MAX_PAGES else full_tasks + ) for times in range(tasks_number): annotation_tasks.append( { @@ -635,7 +653,9 @@ def filter_validation_files_pages( if validator["pages_number"] > 0 else 0 ) - files_all_pages[file_id].difference_update(files_for_validation[file_id]) + files_all_pages[file_id].difference_update( + files_for_validation[file_id] + ) return files_for_validation @@ -665,7 +685,9 @@ def create_partial_validation_tasks( for file_id, pages in validation_files_pages.items(): if pages: full_tasks = len(pages) // MAX_PAGES - tasks_number = full_tasks + 1 if len(pages) % MAX_PAGES else full_tasks + tasks_number = ( + full_tasks + 1 if len(pages) % MAX_PAGES else full_tasks + ) for times in range(tasks_number): validation_tasks.append( { @@ -785,7 +807,9 @@ def check_file_distribution( add_unassigned_file(files_to_distribute, file_id, pages_number) else: # file was partially distributed - unassigned_pages = find_unassigned_pages(distributed_pages, pages_number) + unassigned_pages = find_unassigned_pages( + distributed_pages, pages_number + ) add_unassigned_file( files_to_distribute, file_id, @@ -794,11 +818,17 @@ def check_file_distribution( ) -def find_unassigned_pages(assigned_pages: list, pages_amount: int) -> List[int]: +def find_unassigned_pages( + assigned_pages: list, pages_amount: int +) -> List[int]: """ Get all pages, that were not distributed. """ - return [page for page in range(1, pages_amount + 1) if page not in assigned_pages] + return [ + page + for page in range(1, pages_amount + 1) + if page not in assigned_pages + ] def add_unassigned_file( diff --git a/annotation/annotation/distribution/resources.py b/annotation/annotation/distribution/resources.py index 916d5049c..a8ef4a6ff 100644 --- a/annotation/annotation/distribution/resources.py +++ b/annotation/annotation/distribution/resources.py @@ -1,5 +1,10 @@ from typing import List +from fastapi import APIRouter, Depends, Path, status +from sqlalchemy import and_ +from sqlalchemy.orm import Session +from tenant_dependency import TenantData + from annotation.database import get_db from annotation.distribution import ( distribute, @@ -28,10 +33,6 @@ ) from annotation.tags import TASKS_TAG from annotation.token_dependency import TOKEN -from fastapi import APIRouter, Depends, Path, status -from sqlalchemy import and_ -from sqlalchemy.orm import Session -from tenant_dependency import TenantData router = APIRouter( prefix="/distribution", @@ -68,12 +69,15 @@ def post_tasks( task_file_ids = {task_file["file_id"] for task_file in files} job_files = [ file_db[0] - for file_db in db.query(File.file_id).filter(File.job_id == job_id).all() + for file_db in db.query(File.file_id) + .filter(File.job_id == job_id) + .all() ] files_beyond_job = task_file_ids.difference(job_files) if files_beyond_job: raise FieldConstraintError( - f"Files with ids {files_beyond_job} are not assigned to " f"job {job_id}" + f"Files with ids {files_beyond_job} are not assigned to " + f"job {job_id}" ) annotators = ( db.query(User) @@ -97,7 +101,9 @@ def post_tasks( .all() ) validator_ids = {user.user_id for user in validators} - users_beyond_job = task_info.user_ids.difference(annotator_ids.union(validator_ids)) + users_beyond_job = task_info.user_ids.difference( + annotator_ids.union(validator_ids) + ) if users_beyond_job: raise FieldConstraintError( f"Users with ids {users_beyond_job} are not assigned to " @@ -129,7 +135,8 @@ def post_tasks( responses={ 400: {"model": BadRequestErrorSchema}, }, - summary="Distribute all remaining unassigned " "files and pages for given job_id.", + summary="Distribute all remaining unassigned " + "files and pages for given job_id.", ) def post_tasks_for_unassigned_files( job_id: int = Path(..., example=3), @@ -142,7 +149,10 @@ def post_tasks_for_unassigned_files( annotation_files_to_distribute, validation_files_to_distribute, ) = find_unassigned_files(job.files) - if not annotation_files_to_distribute and not validation_files_to_distribute: + if ( + not annotation_files_to_distribute + and not validation_files_to_distribute + ): return [] annotation_files_to_distribute = prepare_files_for_distribution( annotation_files_to_distribute diff --git a/annotation/annotation/errors.py b/annotation/annotation/errors.py index 6158aa56e..9b9cc839e 100644 --- a/annotation/annotation/errors.py +++ b/annotation/annotation/errors.py @@ -1,9 +1,10 @@ -from annotation import logger as app_logger from botocore.exceptions import BotoCoreError, ClientError from fastapi.requests import Request from fastapi.responses import JSONResponse from sqlalchemy.exc import DBAPIError, SQLAlchemyError +from annotation import logger as app_logger + logger = app_logger.Logger @@ -51,7 +52,9 @@ def __init__(self, exc: str): self.exc = exc -def no_such_revisions_error_handler(request: Request, exc: NoSuchRevisionsError): +def no_such_revisions_error_handler( + request: Request, exc: NoSuchRevisionsError +): return JSONResponse( status_code=404, content={"detail": "Cannot find such revision(s)."}, @@ -72,7 +75,9 @@ def no_such_category_error_handler(request: Request, exc: NoSuchCategoryError): ) -def category_unique_field_error_handler(request: Request, exc: CheckFieldError): +def category_unique_field_error_handler( + request: Request, exc: CheckFieldError +): return JSONResponse( status_code=400, content={"detail": f"Field constraint error. {exc.message}"}, @@ -114,7 +119,9 @@ def minio_no_such_bucket_error_handler(request: Request, exc: ClientError): ) -def field_constraint_error_handler(request: Request, exc: FieldConstraintError): +def field_constraint_error_handler( + request: Request, exc: FieldConstraintError +): return JSONResponse( status_code=400, content={"detail": f"Error: {exc.message}"}, @@ -128,7 +135,9 @@ def enum_validation_error_handler(request: Request, exc: EnumValidationError): ) -def category_parent_child_error_handler(request: Request, exc: SelfParentError): +def category_parent_child_error_handler( + request: Request, exc: SelfParentError +): return JSONResponse( status_code=400, content={"detail": f"Self parent error. {exc.message}"}, diff --git a/annotation/annotation/filters.py b/annotation/annotation/filters.py index 859a21179..c8f8d1448 100644 --- a/annotation/annotation/filters.py +++ b/annotation/annotation/filters.py @@ -1,6 +1,12 @@ from filter_lib import create_filter_model -from annotation.models import AnnotatedDoc, Category, Job, ManualAnnotationTask, User +from annotation.models import ( + AnnotatedDoc, + Category, + Job, + ManualAnnotationTask, + User, +) CategoryFilter = create_filter_model( Category, diff --git a/annotation/annotation/jobs/resources.py b/annotation/annotation/jobs/resources.py index e0b5f757d..030e860fb 100644 --- a/annotation/annotation/jobs/resources.py +++ b/annotation/annotation/jobs/resources.py @@ -1,6 +1,22 @@ from typing import Dict, List, Optional, Set, Union from uuid import UUID +from fastapi import ( + APIRouter, + Depends, + HTTPException, + Path, + Query, + Response, + status, +) +from filter_lib import Page +from sqlalchemy import and_ +from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import func, or_ +from sqlalchemy_filters.exceptions import BadFilterFormat +from tenant_dependency import TenantData + import annotation.categories.services from annotation import logger as app_logger from annotation.categories import fetch_bunch_categories_db @@ -36,21 +52,6 @@ ) from annotation.tags import FILES_TAG, JOBS_TAG from annotation.token_dependency import TOKEN -from fastapi import ( - APIRouter, - Depends, - HTTPException, - Path, - Query, - Response, - status, -) -from filter_lib import Page -from sqlalchemy import and_ -from sqlalchemy.orm import Session -from sqlalchemy.sql.expression import func, or_ -from sqlalchemy_filters.exceptions import BadFilterFormat -from tenant_dependency import TenantData from ..models import ( AnnotatedDoc, @@ -114,8 +115,12 @@ def post_job( ) db.add_all(new_users) db_users = saved_users + new_users - annotators = [user for user in db_users if user.user_id in job_info.annotators] - validators = [user for user in db_users if user.user_id in job_info.validators] + annotators = [ + user for user in db_users if user.user_id in job_info.annotators + ] + validators = [ + user for user in db_users if user.user_id in job_info.validators + ] owners = [user for user in db_users if user.user_id in job_info.owners] categories = fetch_bunch_categories_db( db, job_info.categories, x_current_tenant, root_parents=True @@ -159,7 +164,9 @@ def post_job( tenant=x_current_tenant, job_id=job_id, pages_number=f["pages_number"], - distributed_annotating_pages=list(range(1, f["pages_number"] + 1)), + distributed_annotating_pages=list( + range(1, f["pages_number"] + 1) + ), annotated_pages=list(range(1, f["pages_number"] + 1)), status=FileStatusEnumSchema.pending, ) @@ -351,8 +358,10 @@ def get_unassigned_files( job_undistributed_files = job_files.filter( or_( - func.cardinality(File.distributed_annotating_pages) != File.pages_number, - func.cardinality(File.distributed_validating_pages) != File.pages_number, + func.cardinality(File.distributed_annotating_pages) + != File.pages_number, + func.cardinality(File.distributed_validating_pages) + != File.pages_number, ), ) @@ -418,7 +427,9 @@ def start_job( job status to In Progress. """ job = get_job(db, job_id, x_current_tenant) - annotation_tasks = db.query(ManualAnnotationTask).filter_by(job_id=job_id).all() + annotation_tasks = ( + db.query(ManualAnnotationTask).filter_by(job_id=job_id).all() + ) if not annotation_tasks: raise HTTPException( status_code=404, @@ -487,7 +498,10 @@ def get_users_for_job( .filter(User.job_annotators.any(job_id=job_id)) .all() ) - return [{"id": user.user_id, "overall_load": user.overall_load} for user in users] + return [ + {"id": user.user_id, "overall_load": user.overall_load} + for user in users + ] # Get categories for job_id, each entity requires children/parents @@ -576,7 +590,10 @@ def get_jobs_info_by_files( db, file_ids, x_current_tenant, token.token ) - return {file_id: grouped_by_file_jobs_info.get(file_id, []) for file_id in file_ids} + return { + file_id: grouped_by_file_jobs_info.get(file_id, []) + for file_id in file_ids + } @router.get( diff --git a/annotation/annotation/jobs/services.py b/annotation/annotation/jobs/services.py index 5ec0fa108..b87486504 100644 --- a/annotation/annotation/jobs/services.py +++ b/annotation/annotation/jobs/services.py @@ -2,6 +2,12 @@ from typing import Any, DefaultDict, Dict, List, Optional, Set, Tuple, Union from uuid import UUID +from filter_lib import Page, form_query, map_request_to_filter, paginate +from pydantic import ValidationError +from sqlalchemy import and_, desc, not_ +from sqlalchemy.orm import Session, query +from sqlalchemy.orm.attributes import InstrumentedAttribute + from annotation.categories import fetch_bunch_categories_db from annotation.categories.services import response_object_from_db from annotation.database import Base @@ -35,14 +41,11 @@ TaskStatusEnumSchema, ValidationSchema, ) -from filter_lib import Page, form_query, map_request_to_filter, paginate -from pydantic import ValidationError -from sqlalchemy import and_, desc, not_ -from sqlalchemy.orm import Session, query -from sqlalchemy.orm.attributes import InstrumentedAttribute -def update_inner_job_status(db: Session, job_id: int, status: JobStatusEnumSchema): +def update_inner_job_status( + db: Session, job_id: int, status: JobStatusEnumSchema +): """Updates job status in db""" db.query(Job).filter(Job.job_id == job_id).update({"status": status}) @@ -129,7 +132,9 @@ def get_job_attributes_for_post( return job_attributes -def check_annotators(annotators: Set[UUID], validation_type: ValidationSchema) -> None: +def check_annotators( + annotators: Set[UUID], validation_type: ValidationSchema +) -> None: annotators_validation_mapping = { ValidationSchema.cross: ( len(annotators) < CROSS_MIN_ANNOTATORS_NUMBER, @@ -154,7 +159,9 @@ def check_annotators(annotators: Set[UUID], validation_type: ValidationSchema) - raise FieldConstraintError(error_message) -def check_validators(validators: Set[UUID], validation_type: ValidationSchema) -> None: +def check_validators( + validators: Set[UUID], validation_type: ValidationSchema +) -> None: validators_validation_mapping = { ValidationSchema.cross: ( validators, @@ -163,7 +170,8 @@ def check_validators(validators: Set[UUID], validation_type: ValidationSchema) - ), ValidationSchema.hierarchical: ( not validators, - "If the validation type is hierarchical, validators should " "be provided.", + "If the validation type is hierarchical, validators should " + "be provided.", ), ValidationSchema.validation_only: ( not validators, @@ -278,7 +286,8 @@ def find_users(db: Session, users_ids: Set[UUID]): saved_users = db.query(User).filter(User.user_id.in_(users_ids)).all() saved_users_ids = {user.user_id for user in saved_users} new_users = [ - User(user_id=user_id) for user_id in users_ids.difference(saved_users_ids) + User(user_id=user_id) + for user_id in users_ids.difference(saved_users_ids) ] return saved_users, new_users diff --git a/annotation/annotation/main.py b/annotation/annotation/main.py index d907b1bb7..45f4c283b 100644 --- a/annotation/annotation/main.py +++ b/annotation/annotation/main.py @@ -1,6 +1,12 @@ import os import pathlib +from botocore.exceptions import BotoCoreError, ClientError +from dotenv import find_dotenv, load_dotenv +from fastapi import Depends, FastAPI +from sqlalchemy.exc import DBAPIError, SQLAlchemyError +from starlette.requests import Request + from annotation import logger as app_logger from annotation.annotations import resources as annotations_resources from annotation.categories import resources as categories_resources @@ -36,11 +42,6 @@ from annotation.tags import TAGS from annotation.tasks import resources as task_resources from annotation.token_dependency import TOKEN -from botocore.exceptions import BotoCoreError, ClientError -from dotenv import find_dotenv, load_dotenv -from fastapi import Depends, FastAPI -from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from starlette.requests import Request load_dotenv(find_dotenv()) @@ -89,7 +90,9 @@ async def catch_exceptions_middleware(request: Request, call_next): app.add_exception_handler( AgreementScoreServiceException, agreement_score_service_error_handler ) -app.add_exception_handler(NoSuchRevisionsError, no_such_revisions_error_handler) +app.add_exception_handler( + NoSuchRevisionsError, no_such_revisions_error_handler +) app.add_exception_handler(CheckFieldError, category_unique_field_error_handler) app.add_exception_handler(EnumValidationError, enum_validation_error_handler) app.add_exception_handler(FieldConstraintError, field_constraint_error_handler) diff --git a/annotation/annotation/metadata/resources.py b/annotation/annotation/metadata/resources.py index be013589f..f64c2aab1 100644 --- a/annotation/annotation/metadata/resources.py +++ b/annotation/annotation/metadata/resources.py @@ -1,6 +1,8 @@ from fastapi import APIRouter, status -from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.microservice_communication.search import ( + X_CURRENT_TENANT_HEADER, +) from annotation.schemas import EntitiesStatusesSchema from annotation.tags import METADATA_TAG, TASKS_TAG diff --git a/annotation/annotation/microservice_communication/assets_communication.py b/annotation/annotation/microservice_communication/assets_communication.py index 6c1d1c3ac..bf0b52eb1 100644 --- a/annotation/annotation/microservice_communication/assets_communication.py +++ b/annotation/annotation/microservice_communication/assets_communication.py @@ -2,6 +2,9 @@ from typing import Dict, List, Optional, Set, Tuple, Union import requests +from dotenv import find_dotenv, load_dotenv +from requests import ConnectionError, RequestException, Timeout + from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, @@ -9,8 +12,6 @@ get_response, raise_request_exception, ) -from dotenv import find_dotenv, load_dotenv -from requests import ConnectionError, RequestException, Timeout load_dotenv(find_dotenv()) ASSETS_FILES_URL = os.environ.get("ASSETS_FILES_URL") @@ -53,9 +54,13 @@ def get_files_info( for f in dataset_files_info if f["id"] not in files ] - files.update({dataset_file["file_id"] for dataset_file in dataset_pages_info}) + files.update( + {dataset_file["file_id"] for dataset_file in dataset_pages_info} + ) datasets_pages_info.extend(dataset_pages_info) - return prepare_files_for_distribution(files_pages_info + datasets_pages_info) + return prepare_files_for_distribution( + files_pages_info + datasets_pages_info + ) def get_dataset_info(dataset_id: int, tenant: str, token: str) -> List[dict]: @@ -75,7 +80,9 @@ def get_dataset_info(dataset_id: int, tenant: str, token: str) -> List[dict]: return dataset_files_info.json() -def get_file_names(file_ids: List[int], tenant: str, token: str) -> Dict[int, str]: +def get_file_names( + file_ids: List[int], tenant: str, token: str +) -> Dict[int, str]: """ Return dict of file_id and its name for provided file_ids. diff --git a/annotation/annotation/microservice_communication/jobs_communication.py b/annotation/annotation/microservice_communication/jobs_communication.py index b62689203..2c031ddb6 100644 --- a/annotation/annotation/microservice_communication/jobs_communication.py +++ b/annotation/annotation/microservice_communication/jobs_communication.py @@ -2,14 +2,15 @@ from typing import Dict, List, Union import requests +from dotenv import find_dotenv, load_dotenv +from requests import RequestException + from annotation.microservice_communication.search import ( AUTHORIZATION, BEARER, HEADER_TENANT, get_response, ) -from dotenv import find_dotenv, load_dotenv -from requests import RequestException load_dotenv(find_dotenv()) JOBS_SEARCH_URL = os.environ.get("JOBS_SEARCH_URL") @@ -39,7 +40,9 @@ def update_job_status(callback_url: str, status: str, tenant: str, token: str): raise JobUpdateException(exc) -def get_job_names(job_ids: List[int], tenant: str, token: str) -> Dict[int, str]: +def get_job_names( + job_ids: List[int], tenant: str, token: str +) -> Dict[int, str]: """ Return dict of job_id and its name for provided job_ids from jobs microservice. diff --git a/annotation/annotation/microservice_communication/search.py b/annotation/annotation/microservice_communication/search.py index f139a985f..b97a7282d 100644 --- a/annotation/annotation/microservice_communication/search.py +++ b/annotation/annotation/microservice_communication/search.py @@ -90,11 +90,12 @@ from typing import Dict, List import requests +from fastapi import Header, HTTPException +from requests.exceptions import ConnectionError, RequestException, Timeout + from annotation.annotations import row_to_dict from annotation.models import ManualAnnotationTask from annotation.schemas import ExpandedManualAnnotationTaskSchema -from fastapi import Header, HTTPException -from requests.exceptions import ConnectionError, RequestException, Timeout PAGE_SIZE = 100 # max page size in assets HEADER_TENANT = "X-Current-Tenant" @@ -137,7 +138,9 @@ def construct_search_params(page: int, ids: List[int]): } -def get_response(ids: List[int], url: str, tenant: str, token: str) -> List[dict]: +def get_response( + ids: List[int], url: str, tenant: str, token: str +) -> List[dict]: """ Request from jobs or assets microservices all elements, that have provided ids. diff --git a/annotation/annotation/models.py b/annotation/annotation/models.py index bf4905150..5f8f40536 100644 --- a/annotation/annotation/models.py +++ b/annotation/annotation/models.py @@ -1,18 +1,6 @@ from datetime import datetime from typing import Callable -from annotation.database import Base -from annotation.errors import CheckFieldError -from annotation.schemas import ( - DEFAULT_LOAD, - AnnotationStatisticsEventEnumSchema, - CategoryTypeSchema, - FileStatusEnumSchema, - JobStatusEnumSchema, - JobTypeEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) from sqlalchemy import ( BOOLEAN, FLOAT, @@ -33,6 +21,19 @@ from sqlalchemy.orm import relationship, validates from sqlalchemy_utils import Ltree, LtreeType +from annotation.database import Base +from annotation.errors import CheckFieldError +from annotation.schemas import ( + DEFAULT_LOAD, + AnnotationStatisticsEventEnumSchema, + CategoryTypeSchema, + FileStatusEnumSchema, + JobStatusEnumSchema, + JobTypeEnumSchema, + TaskStatusEnumSchema, + ValidationSchema, +) + association_job_annotator = Table( "association_job_annotator", Base.metadata, @@ -89,7 +90,9 @@ class AnnotatedDoc(Base): revision = Column(VARCHAR, primary_key=True) file_id = Column(INTEGER, primary_key=True) job_id = Column(INTEGER, primary_key=True) - user = Column(UUID(as_uuid=True), ForeignKey("users.user_id", ondelete="SET NULL")) + user = Column( + UUID(as_uuid=True), ForeignKey("users.user_id", ondelete="SET NULL") + ) pipeline = Column(INTEGER) date = Column(TIMESTAMP, server_default=func.now(), nullable=False) pages = Column(JSON, nullable=False, server_default="{}") @@ -283,7 +286,9 @@ class ManualAnnotationTask(Base): job_id = Column( INTEGER, ForeignKey("jobs.job_id", ondelete="cascade"), nullable=False ) - user_id = Column(UUID(as_uuid=True), ForeignKey("users.user_id"), nullable=False) + user_id = Column( + UUID(as_uuid=True), ForeignKey("users.user_id"), nullable=False + ) is_validation = Column(BOOLEAN, nullable=False) status = Column( ENUM(TaskStatusEnumSchema), diff --git a/annotation/annotation/revisions/resources.py b/annotation/annotation/revisions/resources.py index 6739fb119..2bc837d27 100644 --- a/annotation/annotation/revisions/resources.py +++ b/annotation/annotation/revisions/resources.py @@ -5,7 +5,9 @@ from starlette import status from annotation.database import get_db -from annotation.microservice_communication.search import X_CURRENT_TENANT_HEADER +from annotation.microservice_communication.search import ( + X_CURRENT_TENANT_HEADER, +) from annotation.models import AnnotatedDoc from annotation.schemas import AnnotatedDocSchema, ConnectionErrorSchema from annotation.tags import ANNOTATION_TAG, REVISION_TAG diff --git a/annotation/annotation/schemas/annotations.py b/annotation/annotation/schemas/annotations.py index 1dde6d64d..f483b46ab 100644 --- a/annotation/annotation/schemas/annotations.py +++ b/annotation/annotation/schemas/annotations.py @@ -7,7 +7,9 @@ class PageSchema(BaseModel): page_num: int = Field(..., ge=1, example=2) - size: Dict[str, float] = Field(..., example={"width": 10.2, "height": 123.34}) + size: Dict[str, float] = Field( + ..., example={"width": 10.2, "height": 123.34} + ) objs: List[dict] = Field( ..., example=[ @@ -41,8 +43,12 @@ class PageSchema(BaseModel): class PageOutSchema(PageSchema): - revision: str = Field(..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69") - user_id: Optional[UUID] = Field(..., example="c1c76433-5bfb-4c4a-a5b5-93c66fbfe376") + revision: str = Field( + ..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69" + ) + user_id: Optional[UUID] = Field( + ..., example="c1c76433-5bfb-4c4a-a5b5-93c66fbfe376" + ) pipeline: Optional[int] = Field(..., example=2) date: datetime = Field(..., example="2021-10-19 01:01:01") is_validated: bool = Field(default=False, example=False) @@ -61,12 +67,16 @@ class ParticularRevisionSchema(BaseModel): revision: Optional[str] = Field( ..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69" ) - user: Optional[UUID] = Field(..., example="c7311267-fdfd-4ef1-be44-160d3dd819ca") + user: Optional[UUID] = Field( + ..., example="c7311267-fdfd-4ef1-be44-160d3dd819ca" + ) pipeline: Optional[int] = Field(..., example=1) date: Optional[datetime] = Field(..., example="2021-10-19 01:01:01") pages: List[PageSchema] validated: Optional[List[int]] = Field(None, ge=1, example=[2]) - failed_validation_pages: Optional[List[int]] = Field(None, ge=1, example=[]) + failed_validation_pages: Optional[List[int]] = Field( + None, ge=1, example=[] + ) similar_revisions: Optional[List[RevisionLink]] = Field(None) categories: Optional[Set[str]] = Field(None, example=["1", "2"]) links_json: Optional[List[dict]] = Field(None, example={}) @@ -76,11 +86,15 @@ class DocForSaveSchema(BaseModel): base_revision: Optional[str] = Field( None, example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69" ) - user: Optional[UUID] = Field(None, example="b0ac6d8c-7b31-4570-a634-c92b07c9e566") + user: Optional[UUID] = Field( + None, example="b0ac6d8c-7b31-4570-a634-c92b07c9e566" + ) pipeline: Optional[int] = Field(None, example=1) pages: Optional[List[PageSchema]] = Field(None) validated: Optional[Set[int]] = Field(None, ge=1, example={1, 2, 10}) - failed_validation_pages: Optional[Set[int]] = Field(None, ge=1, example={3, 4}) + failed_validation_pages: Optional[Set[int]] = Field( + None, ge=1, example={3, 4} + ) similar_revisions: Optional[List[RevisionLink]] = Field(None) categories: Optional[Set[str]] = Field(None, example=["1", "2"]) links_json: Optional[List[dict]] = Field(None, example={}) @@ -148,8 +162,12 @@ def pages_for_save_check(cls, values): class AnnotatedDocSchema(BaseModel): - revision: str = Field(..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69") - user: Optional[UUID] = Field(..., example="0b0ea570-e4e8-4664-84ac-dd1122471fc5") + revision: str = Field( + ..., example="20fe52cce6a632c6eb09fdc5b3e1594f926eea69" + ) + user: Optional[UUID] = Field( + ..., example="0b0ea570-e4e8-4664-84ac-dd1122471fc5" + ) pipeline: Optional[int] = Field(..., example=1) date: datetime = Field(..., example="2021-10-19 01:01:01") file_id: int = Field(..., example=1) diff --git a/annotation/annotation/schemas/categories.py b/annotation/annotation/schemas/categories.py index 0e5d37313..edd68ea39 100644 --- a/annotation/annotation/schemas/categories.py +++ b/annotation/annotation/schemas/categories.py @@ -1,9 +1,10 @@ from enum import Enum from typing import List, Optional -from annotation.errors import CheckFieldError from pydantic import BaseModel, Field, validator +from annotation.errors import CheckFieldError + class CategoryTypeSchema(str, Enum): box = "box" @@ -61,7 +62,9 @@ class SubCategoriesOutSchema(BaseModel): class CategoryORMSchema(CategoryInputSchema): - metadata: Optional[dict] = Field(None, example={"color": "blue"}, alias="metadata_") + metadata: Optional[dict] = Field( + None, example={"color": "blue"}, alias="metadata_" + ) class Config: orm_mode = True diff --git a/annotation/annotation/schemas/jobs.py b/annotation/annotation/schemas/jobs.py index 03342393c..b6aa939af 100644 --- a/annotation/annotation/schemas/jobs.py +++ b/annotation/annotation/schemas/jobs.py @@ -65,7 +65,9 @@ class JobInfoSchema(BaseModel): is_auto_distribution: bool = Field(default=False, example=False) categories: Set[str] = Field(..., example={"1", "2"}) deadline: Optional[datetime] = Field(None, example="2021-10-19 01:01:01") - job_type: JobTypeEnumSchema = Field(..., example=JobTypeEnumSchema.ExtractionJob) + job_type: JobTypeEnumSchema = Field( + ..., example=JobTypeEnumSchema.ExtractionJob + ) extensive_coverage: int = Field( 1, example=1, @@ -78,9 +80,12 @@ def check_files_and_datasets(cls, values): """ files, datasets = values.get("files"), values.get("datasets") job_type = values.get("job_type") - if (not files and not datasets) and job_type != JobTypeEnumSchema.ImportJob: + if ( + not files and not datasets + ) and job_type != JobTypeEnumSchema.ImportJob: raise ValueError( - "Fields files and datasets should " "not be empty at the same time." + "Fields files and datasets should " + "not be empty at the same time." ) return values @@ -158,7 +163,9 @@ def check_categories(cls, values): job_type = values.get("job_type") categories = values.get("categories") if job_type != JobTypeEnumSchema.ImportJob and not categories: - raise ValueError("There should be not less than one category provided") + raise ValueError( + "There should be not less than one category provided" + ) return values @@ -196,7 +203,9 @@ class FileStatusEnumSchema(str, Enum): class FileInfoSchema(BaseModel): id: int = Field(..., example=1) - status: FileStatusEnumSchema = Field(..., example=FileStatusEnumSchema.pending) + status: FileStatusEnumSchema = Field( + ..., example=FileStatusEnumSchema.pending + ) class JobFilesInfoSchema(BaseModel): diff --git a/annotation/annotation/schemas/tasks.py b/annotation/annotation/schemas/tasks.py index b13a22fcd..b7ce00e65 100644 --- a/annotation/annotation/schemas/tasks.py +++ b/annotation/annotation/schemas/tasks.py @@ -40,7 +40,9 @@ class ManualAnnotationTaskInSchema(BaseModel): deadline: Optional[datetime] = Field(None, example="2021-10-19 01:01:01") -class ManualAnnotationTaskSchema(ManualAnnotationTaskInSchema, TaskStatusSchema): +class ManualAnnotationTaskSchema( + ManualAnnotationTaskInSchema, TaskStatusSchema +): class Config: orm_mode = True @@ -87,7 +89,8 @@ def both_fields_not_empty_check(cls, values): files, datasets = values.get("files"), values.get("datasets") if not files and not datasets: raise ValueError( - "Fields files and datasets should " "not be empty at the same time." + "Fields files and datasets should " + "not be empty at the same time." ) return values @@ -110,7 +113,9 @@ class ValidationEndSchema(BaseModel): class TaskPatchSchema(BaseModel): file_id: Optional[int] = Field(None, example=2) - pages: Optional[Set[int]] = Field(None, ge=1, min_items=1, example={1, 2, 3}) + pages: Optional[Set[int]] = Field( + None, ge=1, min_items=1, example={1, 2, 3} + ) job_id: Optional[int] = Field(None, example=3) user_id: Optional[UUID] = Field( None, example="4e9c5839-f63b-49c8-b918-614b87813e53" @@ -136,7 +141,9 @@ class Config: class AgreementScoreServiceInput(BaseModel): - annotator_id: UUID = Field(..., example="f0474853-f733-41c0-b897-90b788b822e3") + annotator_id: UUID = Field( + ..., example="f0474853-f733-41c0-b897-90b788b822e3" + ) job_id: int = Field(..., example=1) task_id: int = Field(..., example=1) s3_file_path: str = Field(..., example="files/1/1.pdf") @@ -146,7 +153,9 @@ class AgreementScoreServiceInput(BaseModel): class ExportTaskStatsInput(BaseModel): - user_ids: List[UUID] = Field(..., example=["e20af190-0f05-4cd8-ad51-811bfb19ad71"]) + user_ids: List[UUID] = Field( + ..., example=["e20af190-0f05-4cd8-ad51-811bfb19ad71"] + ) date_from: datetime = Field(..., example="2020-12-20 01:01:01") date_to: Optional[datetime] = Field(None, example="2025-12-20 01:01:01") @@ -157,7 +166,9 @@ class ResponseScore(BaseModel): class AgreementScoreServiceResponse(BaseModel): - annotator_id: UUID = Field(..., example="f0474853-f733-41c0-b897-90b788b822e3") + annotator_id: UUID = Field( + ..., example="f0474853-f733-41c0-b897-90b788b822e3" + ) job_id: int = Field(..., example=1) task_id: int = Field(..., example=1) agreement_score: List[ResponseScore] = Field(...) diff --git a/annotation/annotation/tasks/resources.py b/annotation/annotation/tasks/resources.py index 067f11d22..e82134953 100644 --- a/annotation/annotation/tasks/resources.py +++ b/annotation/annotation/tasks/resources.py @@ -5,6 +5,25 @@ from uuid import UUID import dotenv +from fastapi import ( + APIRouter, + Body, + Depends, + Header, + HTTPException, + Path, + Query, + Response, + status, +) +from fastapi.responses import JSONResponse, StreamingResponse +from filter_lib import Page +from sqlalchemy import and_, not_ +from sqlalchemy.exc import IntegrityError, SQLAlchemyError +from sqlalchemy.orm import Session +from sqlalchemy_filters.exceptions import BadFilterFormat +from tenant_dependency import TenantData + from annotation.annotations import accumulate_pages_info, row_to_dict from annotation.database import get_db from annotation.filters import TaskFilter @@ -58,24 +77,6 @@ create_validation_tasks, ) from annotation.token_dependency import TOKEN -from fastapi import ( - APIRouter, - Body, - Depends, - Header, - HTTPException, - Path, - Query, - Response, - status, -) -from fastapi.responses import JSONResponse, StreamingResponse -from filter_lib import Page -from sqlalchemy import and_, not_ -from sqlalchemy.exc import IntegrityError, SQLAlchemyError -from sqlalchemy.orm import Session -from sqlalchemy_filters.exceptions import BadFilterFormat -from tenant_dependency import TenantData from ..models import File, Job, ManualAnnotationTask from .services import ( @@ -124,7 +125,8 @@ def _prepare_expanded_tasks_response( user_logins = get_user_logins(tasks, tenant, token) except GetUserInfoAccessDenied: Logger.info( - "Trying to get users logins with non-admin jwt. " "Getting empty dict" + "Trying to get users logins with non-admin jwt. " + "Getting empty dict" ) user_logins = {} @@ -298,7 +300,9 @@ def get_task( if not annotation_task: return JSONResponse( status_code=404, - content={"detail": "Task with id {0} was not found.".format(task_id)}, + content={ + "detail": "Task with id {0} was not found.".format(task_id) + }, ) annotation_task = _prepare_expanded_tasks_response( db, @@ -314,11 +318,14 @@ def get_task( @router.get( "", status_code=status.HTTP_200_OK, - response_model=Dict[str, Union[int, List[ExpandedManualAnnotationTaskSchema]]], + response_model=Dict[ + str, Union[int, List[ExpandedManualAnnotationTaskSchema]] + ], responses={ 404: {"model": NotFoundErrorSchema}, }, - summary="Get a list of manual annotation tasks based " "on search parameters.", + summary="Get a list of manual annotation tasks based " + "on search parameters.", ) def get_tasks( file_id: Optional[int] = Query(None, example=5), @@ -327,7 +334,9 @@ def get_tasks( None, example="2016a913-47f2-417d-afdb-032165b9330d" ), deadline: Optional[datetime] = Query(None, example="2021-10-19 01:01:01"), - task_status: Optional[str] = Query(None, example=TaskStatusEnumSchema.ready), + task_status: Optional[str] = Query( + None, example=TaskStatusEnumSchema.ready + ), pagination_page_size: Optional[int] = Query(50, gt=0, le=100, example=25), pagination_start_page: Optional[int] = Query(1, gt=0, example=1), db: Session = Depends(get_db), @@ -480,7 +489,8 @@ def update_task( if task.status != TaskStatusEnumSchema.pending: raise HTTPException( status_code=400, - detail="Error: only tasks in 'Pending' status could " "be updated", + detail="Error: only tasks in 'Pending' status could " + "be updated", ) task_info_dict = row_to_dict(task) @@ -518,7 +528,9 @@ def update_task( if old_task_file: recalculate_file_pages(db, old_task_file) - if not (old_task_job_id == task.job_id and old_task_file_id == task.file_id): + if not ( + old_task_job_id == task.job_id and old_task_file_id == task.file_id + ): update_files(db, [row_to_dict(task)], task.job_id) db.flush() @@ -775,7 +787,9 @@ def finish_task( # if there is user for annotation # param will be True, otherwise False - annotation_user = bool(validation_info.annotation_user_for_failed_pages is not None) + annotation_user = bool( + validation_info.annotation_user_for_failed_pages is not None + ) # if there is user for validation # param will be True, otherwise False @@ -974,7 +988,9 @@ def finish_task( status_code=500, detail=f"Error: connection error ({exc.exc_info})", ) - update_inner_job_status(db, task.job_id, JobStatusEnumSchema.finished) + update_inner_job_status( + db, task.job_id, JobStatusEnumSchema.finished + ) # store metrics in db save_agreement_metrics(db=db, scores=compared_score) diff --git a/annotation/annotation/tasks/services.py b/annotation/annotation/tasks/services.py index 7a6c84443..823b41c0f 100644 --- a/annotation/annotation/tasks/services.py +++ b/annotation/annotation/tasks/services.py @@ -6,6 +6,12 @@ import dotenv import pydantic +from fastapi import HTTPException +from filter_lib import Page, form_query, map_request_to_filter, paginate +from sqlalchemy import and_, asc, text +from sqlalchemy.orm import Session +from tenant_dependency import TenantData + from annotation.errors import CheckFieldError, FieldConstraintError from annotation.filters import TaskFilter from annotation.jobs import update_files, update_user_overall_load @@ -34,11 +40,6 @@ TaskStatusEnumSchema, ValidationSchema, ) -from fastapi import HTTPException -from filter_lib import Page, form_query, map_request_to_filter, paginate -from sqlalchemy import and_, asc, text -from sqlalchemy.orm import Session -from tenant_dependency import TenantData dotenv.load_dotenv(dotenv.find_dotenv()) AGREEMENT_SCORE_MIN_MATCH = float(os.getenv("AGREEMENT_SCORE_MIN_MATCH")) @@ -67,7 +68,10 @@ def validate_users_info( for new/updated task. Raises FieldConstraintError in case of any validation fails. """ - if validation_type == ValidationSchema.cross and task_info["is_validation"]: + if ( + validation_type == ValidationSchema.cross + and task_info["is_validation"] + ): check_cross_annotating_pages(db, task_info) if task_info["is_validation"]: job_task_validator = ( @@ -76,7 +80,9 @@ def validate_users_info( if validation_type == ValidationSchema.cross else association_job_validator ) - .filter_by(user_id=task_info["user_id"], job_id=task_info["job_id"]) + .filter_by( + user_id=task_info["user_id"], job_id=task_info["job_id"] + ) .first() ) if not job_task_validator: @@ -87,7 +93,9 @@ def validate_users_info( else: job_task_annotator = ( db.query(association_job_annotator) - .filter_by(user_id=task_info["user_id"], job_id=task_info["job_id"]) + .filter_by( + user_id=task_info["user_id"], job_id=task_info["job_id"] + ) .first() ) if not job_task_annotator: @@ -200,7 +208,9 @@ def validate_user_actions( ) -def create_annotation_task(db: Session, annotation_task: ManualAnnotationTaskInSchema): +def create_annotation_task( + db: Session, annotation_task: ManualAnnotationTaskInSchema +): annotation_task = ManualAnnotationTask(**annotation_task.dict()) db.add(annotation_task) @@ -245,7 +255,9 @@ def filter_tasks_db( filter_query = db.query(ManualAnnotationTask).filter( ManualAnnotationTask.jobs.has(tenant=tenant) ) - filter_args = map_request_to_filter(request.dict(), ManualAnnotationTask.__name__) + filter_args = map_request_to_filter( + request.dict(), ManualAnnotationTask.__name__ + ) task_query, pagination = form_query(filter_args, filter_query) return paginate(task_query.all(), pagination) @@ -302,7 +314,9 @@ def finish_validation_task(db: Session, task: ManualAnnotationTask) -> None: ManualAnnotationTask.file_id == task.file_id, ManualAnnotationTask.is_validation.is_(True), ).with_for_update().update( - {ManualAnnotationTask.status: TaskStatusEnumSchema.finished}, # noqa: E501 + { + ManualAnnotationTask.status: TaskStatusEnumSchema.finished + }, # noqa: E501 synchronize_session="fetch", ) db.commit() @@ -347,9 +361,13 @@ def get_task_revisions( if int(key) in task_pages } revision.failed_validation_pages = [ - page for page in revision.failed_validation_pages if page in task_pages + page + for page in revision.failed_validation_pages + if page in task_pages + ] + revision.validated = [ + page for page in revision.validated if page in task_pages ] - revision.validated = [page for page in revision.validated if page in task_pages] return [ revision @@ -364,7 +382,9 @@ def get_task_revisions( ] -def get_task_info(db: Session, task_id: int, tenant: str) -> ManualAnnotationTask: +def get_task_info( + db: Session, task_id: int, tenant: str +) -> ManualAnnotationTask: return ( db.query(ManualAnnotationTask) .filter( @@ -398,7 +418,9 @@ def unblock_validation_tasks( ManualAnnotationTask.pages.contained_by(annotated_file_pages), ) ) - .update({"status": TaskStatusEnumSchema.ready}, synchronize_session=False) + .update( + {"status": TaskStatusEnumSchema.ready}, synchronize_session=False + ) ) @@ -426,7 +448,9 @@ def add_task_stats_record( stats_db.updated = datetime.utcnow() else: if stats.event_type == "closed": - raise CheckFieldError("Attribute event_type can not start from closed.") + raise CheckFieldError( + "Attribute event_type can not start from closed." + ) stats_db = AnnotationStatistics(task_id=task_id, **stats.dict()) db.add(stats_db) @@ -463,7 +487,9 @@ def create_export_csv( "file_id": stat.task.file_id, "pages": stat.task.pages, "time_start": stat.created.isoformat(), - "time_finish": (stat.updated.isoformat() if stat.updated else None), + "time_finish": ( + stat.updated.isoformat() if stat.updated else None + ), "agreement_score": [ { "task_from": metric.task_from, @@ -538,7 +564,9 @@ def evaluate_agreement_score( ) for task_in in tasks_intersection_pages ] - agreement_scores: List[AgreementScoreServiceResponse] = get_agreement_score( + agreement_scores: List[ + AgreementScoreServiceResponse + ] = get_agreement_score( agreement_scores_input=agreement_scores_input, tenant=tenant, token=token.token, @@ -589,7 +617,9 @@ def compare_agreement_scores( get_unique_scores(task_from_id, scores, unique_scores) # check is every annotator reached min match score and return result - agreement_reached: bool = all(map(lambda a: a.score >= min_match, unique_scores)) + agreement_reached: bool = all( + map(lambda a: a.score >= min_match, unique_scores) + ) metrics: List[TaskMetric] = list( sorted( map( @@ -608,7 +638,9 @@ def compare_agreement_scores( ) -def save_agreement_metrics(db: Session, scores: AgreementScoreComparingResult) -> None: +def save_agreement_metrics( + db: Session, scores: AgreementScoreComparingResult +) -> None: metrics: List[AgreementMetrics] = [ AgreementMetrics( task_from=el.task_from_id, diff --git a/annotation/annotation/tasks/validation.py b/annotation/annotation/tasks/validation.py index a77d78e91..c04082c53 100644 --- a/annotation/annotation/tasks/validation.py +++ b/annotation/annotation/tasks/validation.py @@ -3,6 +3,10 @@ from typing import Dict, List, Optional, Set, Union from uuid import UUID +from fastapi import HTTPException +from sqlalchemy import and_, asc, null, or_ +from sqlalchemy.orm import Session + from annotation.distribution import prepare_response from annotation.microservice_communication.assets_communication import ( FilesForDistribution, @@ -13,9 +17,6 @@ TaskStatusEnumSchema, ValidationSchema, ) -from fastapi import HTTPException -from sqlalchemy import and_, asc, null, or_ -from sqlalchemy.orm import Session from .services import create_tasks @@ -116,7 +117,9 @@ def create_tasks_initial_users( Create validation tasks with 'pending' status automatically. """ # revisions for job_id and file_id, made by annotators - annotators_revisions = get_annotators_revisions(db, file_id, job.job_id, task_id) + annotators_revisions = get_annotators_revisions( + db, file_id, job.job_id, task_id + ) # find annotators, who made annotation for each page initial_annotators = find_initial_annotators(annotators_revisions, failed) # create tasks for annotation with 'ready' status @@ -154,8 +157,12 @@ def create_annotation_tasks_specific_user( Create validation tasks with 'pending' status automatically. """ # check, that string is valid uuid - annotation_user_for_failed_pages = check_uuid(annotation_user_for_failed_pages) - check_user_job_action(db, annotation_user_for_failed_pages, job.job_id, False) + annotation_user_for_failed_pages = check_uuid( + annotation_user_for_failed_pages + ) + check_user_job_action( + db, annotation_user_for_failed_pages, job.job_id, False + ) # create annotation task for specific user with 'ready' status # and tasks for validation with 'pending' status prepare_response( @@ -216,7 +223,9 @@ def _find_annotators_for_failed_pages( for revision in revisions: rev_pages = set(map(int, revision.pages)) # take unique pages - for page in rev_pages.intersection(pages): # take only failed by val pages + for page in rev_pages.intersection( + pages + ): # take only failed by val pages pages_user[page] = revision.user if None in pages_user.values(): @@ -373,7 +382,9 @@ def create_validation_tasks_specific_user( validation_user_for_reannotated_pages ) - check_user_job_action(db, validation_user_for_reannotated_pages, job.job_id, False) + check_user_job_action( + db, validation_user_for_reannotated_pages, job.job_id, False + ) if ( validator_id == validation_user_for_reannotated_pages @@ -433,7 +444,8 @@ def check_user_job_action( if not check_user_job_belonging(db, user_id, job_id, only_owner=True): raise HTTPException( status_code=400, - detail="Only owner may not request " "validation of edited pages.", + detail="Only owner may not request " + "validation of edited pages.", ) else: if not check_user_job_belonging(db, user_id, job_id, only_owner=False): @@ -460,8 +472,12 @@ def check_user_job_belonging( if not only_owner: filters.extend( [ - and_(Job.annotators.any(user_id=user_id), Job.job_id == job_id), - and_(Job.validators.any(user_id=user_id), Job.job_id == job_id), + and_( + Job.annotators.any(user_id=user_id), Job.job_id == job_id + ), + and_( + Job.validators.any(user_id=user_id), Job.job_id == job_id + ), ] ) return bool(db.query(User).filter(or_(*filters)).first()) diff --git a/annotation/documentation/update_docs.py b/annotation/documentation/update_docs.py index 9f8c70153..362ca5ce3 100644 --- a/annotation/documentation/update_docs.py +++ b/annotation/documentation/update_docs.py @@ -4,7 +4,9 @@ def str_presenter(dumper, data): if "\n" in data: - return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar( + "tag:yaml.org,2002:str", data, style="|" + ) return dumper.represent_scalar("tag:yaml.org,2002:str", data) diff --git a/annotation/tests/conftest.py b/annotation/tests/conftest.py index 7c0c4cf6d..4e171e140 100644 --- a/annotation/tests/conftest.py +++ b/annotation/tests/conftest.py @@ -253,7 +253,9 @@ def use_temp_env_var(): @pytest.fixture(scope="module") def db_session(setup_test_db): - session_local = sessionmaker(autocommit=False, autoflush=False, bind=engine) + session_local = sessionmaker( + autocommit=False, autoflush=False, bind=engine + ) session = session_local() yield session @@ -449,7 +451,9 @@ def prepare_moto_s3_for_get_revisions(): @pytest.fixture(name="expected_latest_revisions", scope="module") def load_expected_latest_revisions(): - with open("tests/fixtures/expected_latest_revisions.json", "r") as json_file: + with open( + "tests/fixtures/expected_latest_revisions.json", "r" + ) as json_file: json_data = json.load(json_file) return json_data @@ -588,12 +592,16 @@ def prepare_db_for_finish_task_check_deleted_annotators(db_session): @pytest.fixture def prepare_db_categories_same_names(db_session): - category_tenant = Category(id="1", tenant=TEST_TENANT, name="Title", type="box") + category_tenant = Category( + id="1", tenant=TEST_TENANT, name="Title", type="box" + ) category_common = Category(id="2", tenant=None, name="Table", type="box") category_other_tenant = Category( id="3", tenant="other_tenant", name="Title", type="box" ) - add_objects(db_session, (category_tenant, category_common, category_other_tenant)) + add_objects( + db_session, (category_tenant, category_common, category_other_tenant) + ) yield db_session @@ -602,7 +610,9 @@ def prepare_db_categories_same_names(db_session): @pytest.fixture def prepare_db_categories_different_names(db_session): - category_tenant = Category(id="1", tenant=TEST_TENANT, name="Title", type="box") + category_tenant = Category( + id="1", tenant=TEST_TENANT, name="Title", type="box" + ) category_common = Category(id="2", tenant=None, name="Table", type="box") category_other_tenant = Category( id="3", tenant="other_tenant", name="Header", type="box" @@ -729,7 +739,10 @@ def prepare_db_for_cr_task(db_session): @pytest.fixture(scope="module") def prepare_db_update_stats(prepare_db_for_cr_task): for task_id in [ - id_ for (id_,) in prepare_db_for_cr_task.query(ManualAnnotationTask.id).all() + id_ + for (id_,) in prepare_db_for_cr_task.query( + ManualAnnotationTask.id + ).all() ]: add_task_stats_record( db=prepare_db_for_cr_task, @@ -744,7 +757,10 @@ def prepare_db_update_stats(prepare_db_for_cr_task): @pytest.fixture(scope="module") def prepare_db_update_stats_already_updated(prepare_db_update_stats): for task_id in [ - id_ for (id_,) in prepare_db_update_stats.query(ManualAnnotationTask.id).all() + id_ + for (id_,) in prepare_db_update_stats.query( + ManualAnnotationTask.id + ).all() ]: add_task_stats_record( db=prepare_db_update_stats, @@ -852,7 +868,9 @@ def prepare_db_for_get_next_task(db_session): @pytest.fixture(scope="function") def prepare_db_for_batch_delete_tasks(db_session): add_objects(db_session, [DELETE_BATCH_TASKS_JOB]) - add_objects(db_session, (DELETE_BATCH_TASKS_FILE, DELETE_BATCH_TASKS_ANNOTATOR)) + add_objects( + db_session, (DELETE_BATCH_TASKS_FILE, DELETE_BATCH_TASKS_ANNOTATOR) + ) db_session.bulk_insert_mappings(ManualAnnotationTask, DIFF_STATUSES_TASKS) db_session.commit() @@ -867,7 +885,10 @@ def minio_particular_revision(): s3_resource = boto3.resource("s3", region_name=DEFAULT_REGION) s3_resource.create_bucket(Bucket=TEST_TENANT) - path = f"{S3_START_PATH}/{PART_REV_DOC.job_id}/" f"{PART_REV_DOC.file_id}/" + path = ( + f"{S3_START_PATH}/{PART_REV_DOC.job_id}/" + f"{PART_REV_DOC.file_id}/" + ) s3_resource.Bucket(TEST_TENANT).put_object( Body=json.dumps(PART_REV_PAGES[0]), @@ -1083,7 +1104,9 @@ def db_get_unassigned_files(db_session): @pytest.fixture def db_validation_end(db_session): add_objects(db_session, [validation.JOBS[0]]) - add_objects(db_session, validation.FILES + validation.TASKS + validation.DOCS) + add_objects( + db_session, validation.FILES + validation.TASKS + validation.DOCS + ) update_annotators_overall_load(db_session, validation.ANNOTATORS) yield db_session @@ -1104,9 +1127,9 @@ def prepare_db_find_annotators_for_failed_pages(db_validation_end): ManualAnnotationTask.user_id == annotator_for_delete ).delete(synchronize_session=False) db_validation_end.commit() - db_validation_end.query(User).filter(User.user_id == annotator_for_delete).delete( - synchronize_session=False - ) + db_validation_end.query(User).filter( + User.user_id == annotator_for_delete + ).delete(synchronize_session=False) db_validation_end.commit() yield db_validation_end diff --git a/annotation/tests/test_annotators_overall_load.py b/annotation/tests/test_annotators_overall_load.py index 1ecb0b22d..972e14d77 100644 --- a/annotation/tests/test_annotators_overall_load.py +++ b/annotation/tests/test_annotators_overall_load.py @@ -96,7 +96,9 @@ annotators=[user for user in OVERALL_LOAD_USERS[:3]], validation_type=ValidationSchema.cross, is_auto_distribution=False, - categories=[Category(id="123", name="Title", type=CategoryTypeSchema.box)], + categories=[ + Category(id="123", name="Title", type=CategoryTypeSchema.box) + ], deadline="2021-10-19T01:01:01", tenant=TEST_TENANT, ), @@ -109,7 +111,9 @@ validation_type=ValidationSchema.hierarchical, files=[TASK_FILES_OVERALL_LOAD[0]], is_auto_distribution=False, - categories=[Category(id="125", name="Paragraph", type=CategoryTypeSchema.box)], + categories=[ + Category(id="125", name="Paragraph", type=CategoryTypeSchema.box) + ], deadline="2021-10-19T01:01:01", tenant=TEST_TENANT, ), @@ -121,7 +125,9 @@ validation_type=ValidationSchema.cross, files=[TASK_FILES_OVERALL_LOAD[2]], is_auto_distribution=False, - categories=[Category(id="126", name="Abstract", type=CategoryTypeSchema.box)], + categories=[ + Category(id="126", name="Abstract", type=CategoryTypeSchema.box) + ], deadline="2021-10-19T01:01:01", tenant=TEST_TENANT, ), # job for task distribution for particular job @@ -133,7 +139,9 @@ validation_type=ValidationSchema.hierarchical, files=[TASK_FILES_OVERALL_LOAD[5]], is_auto_distribution=False, - categories=[Category(id="127", name="Abstract", type=CategoryTypeSchema.box)], + categories=[ + Category(id="127", name="Abstract", type=CategoryTypeSchema.box) + ], deadline="2021-10-19T01:01:01", tenant=TEST_TENANT, ), @@ -145,7 +153,9 @@ owners=[OVERALL_LOAD_USERS[14]], validation_type=ValidationSchema.hierarchical, is_auto_distribution=True, - categories=[Category(id="128", name="Abstract", type=CategoryTypeSchema.box)], + categories=[ + Category(id="128", name="Abstract", type=CategoryTypeSchema.box) + ], deadline="2022-10-19T01:01:01", tenant=TEST_TENANT, status=JobStatusEnumSchema.in_progress, @@ -364,7 +374,9 @@ def test_overall_load_after_update_task( ) assert response.status_code == 200 - for user_id, expected_overall_load in zip(users_id, expected_overall_loads): + for user_id, expected_overall_load in zip( + users_id, expected_overall_loads + ): user = prepare_db_for_overall_load.query(User).get(user_id) assert user.overall_load == expected_overall_load @@ -395,9 +407,13 @@ def test_overall_load_after_delete_batch_tasks(prepare_db_for_overall_load): OVERALL_LOAD_CREATED_TASKS[5].user_id, ] expected_overall_loads = [4, 0] - response = client.delete(CRUD_TASKS_PATH, json=[4, 6], headers=TEST_HEADERS) + response = client.delete( + CRUD_TASKS_PATH, json=[4, 6], headers=TEST_HEADERS + ) assert response.status_code == 204 - for user_id, expected_overall_load in zip(user_ids, expected_overall_loads): + for user_id, expected_overall_load in zip( + user_ids, expected_overall_loads + ): user = prepare_db_for_overall_load.query(User).get(user_id) assert user.overall_load == expected_overall_load @@ -414,7 +430,11 @@ def test_overall_load_after_delete_batch_tasks(prepare_db_for_overall_load): ), ( # validator with pages for reannotation 7, - {"annotation_user_for_failed_pages": OVERALL_LOAD_USERS[4].user_id}, + { + "annotation_user_for_failed_pages": OVERALL_LOAD_USERS[ + 4 + ].user_id + }, [OVERALL_LOAD_USERS[5].user_id, OVERALL_LOAD_USERS[4].user_id], [1, 6], ), @@ -433,22 +453,29 @@ def test_overall_load_after_finish_task( headers=TEST_HEADERS, ) assert response.status_code == 200 - for user_id, expected_overall_load in zip(users_id, expected_overall_loads): + for user_id, expected_overall_load in zip( + users_id, expected_overall_loads + ): user = prepare_db_for_overall_load.query(User).get(user_id) assert user.overall_load == expected_overall_load @mark.integration -def test_overall_load_after_distribution(monkeypatch, prepare_db_for_overall_load): +def test_overall_load_after_distribution( + monkeypatch, prepare_db_for_overall_load +): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication." "get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=[{"id": 3, "pages": 4}]), ) response = client.post( "/distribution", json=OVERALL_LOAD_NEW_TASKS[2], headers=TEST_HEADERS ) assert response.status_code == 201 - user = prepare_db_for_overall_load.query(User).get(OVERALL_LOAD_USERS[6].user_id) + user = prepare_db_for_overall_load.query(User).get( + OVERALL_LOAD_USERS[6].user_id + ) assert user.overall_load == 4 diff --git a/annotation/tests/test_assets_communication.py b/annotation/tests/test_assets_communication.py index f9101dfe5..fa51d52c9 100644 --- a/annotation/tests/test_assets_communication.py +++ b/annotation/tests/test_assets_communication.py @@ -122,9 +122,12 @@ (FILE_IDS, [], {}), ], ) -def test_get_file_names(monkeypatch, file_ids, parsed_response, expected_result): +def test_get_file_names( + monkeypatch, file_ids, parsed_response, expected_result +): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication." "get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=parsed_response), ) @@ -213,7 +216,8 @@ def test_get_files_info( expected_result, ): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication." "get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=mocked_files), ) for i, dataset_id in enumerate(dataset_ids): @@ -224,7 +228,9 @@ def test_get_files_info( headers=TEST_HEADERS, status=200, ) - actual_result = get_files_info(file_ids, dataset_ids, TEST_TENANT, TEST_TOKEN) + actual_result = get_files_info( + file_ids, dataset_ids, TEST_TENANT, TEST_TOKEN + ) assert actual_result == expected_result diff --git a/annotation/tests/test_category_crud.py b/annotation/tests/test_category_crud.py index 64f1ad4d8..0815dd18d 100644 --- a/annotation/tests/test_category_crud.py +++ b/annotation/tests/test_category_crud.py @@ -161,10 +161,14 @@ def add_for_cascade_delete( parent_id = request.param session = prepare_db_categories_different_names data_1 = prepare_category_body(name="Title1", parent=parent_id) - response_1 = client.post(CATEGORIES_PATH, json=data_1, headers=TEST_HEADERS) + response_1 = client.post( + CATEGORIES_PATH, json=data_1, headers=TEST_HEADERS + ) cat_id_1 = response_1.json()["id"] data_2 = prepare_category_body(name="Title3", parent=cat_id_1) - response_2 = client.post(CATEGORIES_PATH, json=data_2, headers=TEST_HEADERS) + response_2 = client.post( + CATEGORIES_PATH, json=data_2, headers=TEST_HEADERS + ) cat_id_2 = response_2.json()["id"] common_cat = session.query(Category).get("2") session.delete(common_cat) @@ -173,7 +177,10 @@ def add_for_cascade_delete( @mark.integration -@patch("annotation.categories.resources.add_category_db", side_effect=SQLAlchemyError) +@patch( + "annotation.categories.resources.add_category_db", + side_effect=SQLAlchemyError, +) def test_add_db_connection_error(prepare_db_categories_different_names): data = prepare_category_body() response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) @@ -207,7 +214,9 @@ def test_add_unique_name(prepare_db_categories_different_names, category_name): data = prepare_category_body(name=category_name) response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) assert response.status_code == 201 - assert prepare_expected_result(response.text) == prepare_category_response(data) + assert prepare_expected_result(response.text) == prepare_category_response( + data + ) @mark.integration @@ -229,7 +238,9 @@ def test_add_unique_name_custom_fields( data = prepare_category_body(**field_value_pairs) response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) assert response.status_code == 201 - assert prepare_expected_result(response.text) == prepare_category_response(data) + assert prepare_expected_result(response.text) == prepare_category_response( + data + ) @mark.integration @@ -253,7 +264,9 @@ def test_add_wrong_field_types(db_session, wrong_field, wrong_value): "data_attributes": None, wrong_field: wrong_value, # rewrite default value with parametrized } - response = client.post(CATEGORIES_PATH, json=wrong_body, headers=TEST_HEADERS) + response = client.post( + CATEGORIES_PATH, json=wrong_body, headers=TEST_HEADERS + ) assert response.status_code == 422 @@ -328,7 +341,9 @@ def test_add_id_is_generated(prepare_db_categories_different_names): "category_id", ("1Category123", "second_category", "3rd_category"), ) -def test_add_id_numbers_underscore(category_id, prepare_db_categories_different_names): +def test_add_id_numbers_underscore( + category_id, prepare_db_categories_different_names +): data = prepare_category_body(id_=category_id, name=str(uuid.uuid4())) response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) assert response.status_code == 201 @@ -342,7 +357,9 @@ def test_add_id_numbers_underscore(category_id, prepare_db_categories_different_ "category_id", ("1st!-category1", "2nd%category", "3rd:.category"), ) -def test_add_id_special_chars(category_id, prepare_db_categories_different_names): +def test_add_id_special_chars( + category_id, prepare_db_categories_different_names +): data = prepare_category_body(id_=category_id, name=str(uuid.uuid4())) response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) assert response.status_code == 400 @@ -358,7 +375,10 @@ def test_add_self_parent(prepare_db_categories_different_names): @mark.integration -@patch("annotation.categories.resources.fetch_category_db", side_effect=SQLAlchemyError) +@patch( + "annotation.categories.resources.fetch_category_db", + side_effect=SQLAlchemyError, +) def test_get_db_connection_error(prepare_db_categories_same_names): cat_id = 1 response = client.get(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) @@ -372,7 +392,9 @@ def test_get_db_connection_error(prepare_db_categories_same_names): ("3", "100"), # other tenant category and category that doesn't exist ) def test_get_wrong_category(category_id, prepare_db_categories_same_names): - response = client.get(f"{CATEGORIES_PATH}/{category_id}", headers=TEST_HEADERS) + response = client.get( + f"{CATEGORIES_PATH}/{category_id}", headers=TEST_HEADERS + ) assert response.status_code == 404 assert f"Category with id: {category_id} doesn't exist" in response.text @@ -389,9 +411,13 @@ def test_get_allowed_category( category_id, category_name, prepare_db_categories_same_names ): data = prepare_category_body(name=category_name) - response = client.get(f"{CATEGORIES_PATH}/{category_id}", headers=TEST_HEADERS) + response = client.get( + f"{CATEGORIES_PATH}/{category_id}", headers=TEST_HEADERS + ) assert response.status_code == 200 - assert prepare_expected_result(response.text) == prepare_category_response(data) + assert prepare_expected_result(response.text) == prepare_category_response( + data + ) @mark.integration @@ -404,11 +430,14 @@ def test_get_no_tenant_specified(prepare_db_categories_same_names): @mark.integration @patch( - "annotation.categories.resources.filter_category_db", side_effect=SQLAlchemyError + "annotation.categories.resources.filter_category_db", + side_effect=SQLAlchemyError, ) def test_search_db_connection_error(prepare_db_categories_for_filtration): data = prepare_filtration_body() - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) assert response.status_code == 500 assert "Error: connection error" in response.text @@ -424,7 +453,9 @@ def test_search_pagination( data = prepare_filtration_body( page_num=page_num, page_size=page_size, no_filtration=True ) - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) categories = response.json()["data"] pagination = response.json()["pagination"] assert response.status_code == 200 @@ -437,7 +468,9 @@ def test_search_pagination( @mark.integration def test_search_no_filtration(prepare_db_categories_for_filtration): data = prepare_filtration_body(page_size=30, no_filtration=True) - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) categories = response.json()["data"] assert response.status_code == 200 assert len(categories) == 16 @@ -448,9 +481,13 @@ def test_search_no_filtration(prepare_db_categories_for_filtration): "category_id", ("2", "100"), # other tenant category and category that doesn't exist ) -def test_search_wrong_category(category_id, prepare_db_categories_for_filtration): +def test_search_wrong_category( + category_id, prepare_db_categories_for_filtration +): data = prepare_filtration_body(value=category_id) - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) categories = response.json()["data"] total = response.json()["pagination"]["total"] assert response.status_code == 200 @@ -473,7 +510,9 @@ def test_search_allowed_categories( ): expected = prepare_category_body(name=category_name) data = prepare_filtration_body(value=category_id) - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) category = response.json()["data"][0] assert response.status_code == 200 @@ -492,7 +531,9 @@ def test_search_filter_gt_lt( operator, value, expected, prepare_db_categories_for_filtration ): data = prepare_filtration_body(operator=operator, value=value) - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) categories = response.json()["data"] assert response.status_code == 200 assert len(categories) == expected @@ -506,8 +547,12 @@ def test_search_filter_gt_lt( def test_search_filter_name_like( operator, value, expected, prepare_db_categories_for_filtration ): - data = prepare_filtration_body(field="name", operator=operator, value=value) - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + data = prepare_filtration_body( + field="name", operator=operator, value=value + ) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) categories = response.json()["data"] assert response.status_code == 200 assert len(categories) == expected @@ -518,8 +563,12 @@ def test_search_filter_name_like( def test_search_filter_ordering( direction, expected, prepare_db_categories_for_filtration ): - data = prepare_filtration_body(operator="lt", value="5", direction=direction) - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + data = prepare_filtration_body( + operator="lt", value="5", direction=direction + ) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) categories = response.json()["data"][0] assert response.status_code == 200 assert categories["id"] == expected @@ -527,8 +576,12 @@ def test_search_filter_ordering( @mark.integration def test_search_filter_distinct_id(prepare_db_categories_for_filtration): - data = prepare_filtration_body(page_size=30, field="id", operator="distinct") - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + data = prepare_filtration_body( + page_size=30, field="id", operator="distinct" + ) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) result_data = response.json()["data"] assert response.status_code == 200 assert len(result_data) == 16 @@ -544,10 +597,14 @@ def test_search_two_filters_different_distinct_order( second_operator="is_not_null", sorting_field="type", ) - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) first_result_data = response.json()["data"] data = prepare_filtration_body_double_filter(first_operator="is_not_null") - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) second_result_data = response.json()["data"] assert first_result_data == second_result_data @@ -557,7 +614,9 @@ def test_search_two_filters_both_distinct( prepare_db_categories_for_distinct_filtration, ): data = prepare_filtration_body_double_filter() - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) result_data = response.json()["data"] assert response.status_code == 200 assert len(result_data) == 3 @@ -566,9 +625,12 @@ def test_search_two_filters_both_distinct( @mark.integration def test_search_categories_400_error(prepare_db_categories_for_filtration): data = prepare_filtration_body(field="parent", operator="distinct") - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) error_message = ( - "SELECT DISTINCT ON expressions must " "match initial ORDER BY expressions" + "SELECT DISTINCT ON expressions must " + "match initial ORDER BY expressions" ) assert response.status_code == 400 assert error_message in response.text @@ -587,14 +649,17 @@ def test_search_wrong_parameters( wrong_parameter, value, prepare_db_categories_for_filtration ): data = prepare_filtration_body(**{wrong_parameter: value}) - response = client.post(f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS) + response = client.post( + f"{CATEGORIES_PATH}/search", json=data, headers=TEST_HEADERS + ) assert response.status_code == 422 assert "value is not a valid enumeration member" in response.text @mark.integration @patch( - "annotation.categories.resources.update_category_db", side_effect=SQLAlchemyError + "annotation.categories.resources.update_category_db", + side_effect=SQLAlchemyError, ) def test_update_db_connection_error(prepare_db_categories_different_names): cat_id = 1 @@ -651,7 +716,9 @@ def test_update_category_custom_fields( f"{CATEGORIES_PATH}/{cat_id}", json=data, headers=TEST_HEADERS ) assert response.status_code == 200 - assert prepare_expected_result(response.text) == prepare_category_response(data) + assert prepare_expected_result(response.text) == prepare_category_response( + data + ) @mark.integration @@ -682,7 +749,9 @@ def test_update_other_tenant_exist_name(prepare_db_categories_different_names): f"{CATEGORIES_PATH}/{cat_id}", json=data, headers=TEST_HEADERS ) assert response.status_code == 200 - assert prepare_expected_result(response.text) == prepare_category_response(data) + assert prepare_expected_result(response.text) == prepare_category_response( + data + ) @mark.integration @@ -730,11 +799,15 @@ def test_update_other_tenant_parent(prepare_db_categories_different_names): "category_parent", ("2", "4"), # parent from commons and this tenant other category as parent ) -def test_update_allowed_parent(category_parent, prepare_db_categories_different_names): +def test_update_allowed_parent( + category_parent, prepare_db_categories_different_names +): cat_id = "1" data_add = prepare_category_body(name="Footer") data_add["id"] = category_parent - prepare_db_categories_different_names.merge(Category(**clean_data_for_db(data_add))) + prepare_db_categories_different_names.merge( + Category(**clean_data_for_db(data_add)) + ) prepare_db_categories_different_names.commit() data_update = prepare_category_body(parent=category_parent) response = client.put( @@ -748,11 +821,14 @@ def test_update_allowed_parent(category_parent, prepare_db_categories_different_ @mark.integration @patch( - "annotation.categories.resources.delete_category_db", side_effect=SQLAlchemyError + "annotation.categories.resources.delete_category_db", + side_effect=SQLAlchemyError, ) def test_delete_db_connection_error(prepare_db_categories_same_names): cat_id = "1" - response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) + response = client.delete( + f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS + ) assert response.status_code == 500 assert "Error: connection error" in response.text @@ -767,7 +843,9 @@ def test_delete_wrong_category( prepare_db_categories_same_names, ): cat_id = "100" - response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) + response = client.delete( + f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS + ) assert response.status_code == 404 assert "Cannot delete category that doesn't exist" in response.text @@ -775,7 +853,9 @@ def test_delete_wrong_category( @mark.integration def test_delete_common_category(prepare_db_categories_same_names): cat_id = "2" - response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) + response = client.delete( + f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS + ) assert response.status_code == 400 assert "Cannot delete default category" in response.text @@ -783,10 +863,14 @@ def test_delete_common_category(prepare_db_categories_same_names): @mark.integration def test_delete_tenant_category(prepare_db_categories_same_names): cat_id = "1" - response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) + response = client.delete( + f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS + ) assert response.status_code == 204 assert ( - client.get(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS).status_code + client.get( + f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS + ).status_code == 404 ) @@ -796,18 +880,26 @@ def test_delete_tenant_category(prepare_db_categories_same_names): def test_cascade_delete_tenant_parent(add_for_cascade_delete): cat_id = "1" child_1, child_2 = add_for_cascade_delete - response = client.delete(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS) + response = client.delete( + f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS + ) assert response.status_code == 204 assert ( - client.get(f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS).status_code + client.get( + f"{CATEGORIES_PATH}/{cat_id}", headers=TEST_HEADERS + ).status_code == 404 ) assert ( - client.get(f"{CATEGORIES_PATH}/{child_1}", headers=TEST_HEADERS).status_code + client.get( + f"{CATEGORIES_PATH}/{child_1}", headers=TEST_HEADERS + ).status_code == 404 ) assert ( - client.get(f"{CATEGORIES_PATH}/{child_2}", headers=TEST_HEADERS).status_code + client.get( + f"{CATEGORIES_PATH}/{child_2}", headers=TEST_HEADERS + ).status_code == 404 ) @@ -818,14 +910,20 @@ def test_cascade_delete_common_parent(add_for_cascade_delete): common_id = "2" child_1, child_2 = add_for_cascade_delete assert ( - client.get(f"{CATEGORIES_PATH}/{common_id}", headers=TEST_HEADERS).status_code + client.get( + f"{CATEGORIES_PATH}/{common_id}", headers=TEST_HEADERS + ).status_code == 404 ) assert ( - client.get(f"{CATEGORIES_PATH}/{child_1}", headers=TEST_HEADERS).status_code + client.get( + f"{CATEGORIES_PATH}/{child_1}", headers=TEST_HEADERS + ).status_code == 404 ) assert ( - client.get(f"{CATEGORIES_PATH}/{child_2}", headers=TEST_HEADERS).status_code + client.get( + f"{CATEGORIES_PATH}/{child_2}", headers=TEST_HEADERS + ).status_code == 404 ) diff --git a/annotation/tests/test_cross_validation.py b/annotation/tests/test_cross_validation.py index 270b3d5c0..e52b85cce 100644 --- a/annotation/tests/test_cross_validation.py +++ b/annotation/tests/test_cross_validation.py @@ -381,7 +381,9 @@ def test_cross_distribution_small_files( ), ], ) -def test_cross_partial_files(annotated_files_pages, files, annotators, expected_tasks): +def test_cross_partial_files( + annotated_files_pages, files, annotators, expected_tasks +): assert ( distribute_validation_partial_files( annotated_files_pages, diff --git a/annotation/tests/test_delete_batch_tasks.py b/annotation/tests/test_delete_batch_tasks.py index 9ee017379..3a321ab10 100644 --- a/annotation/tests/test_delete_batch_tasks.py +++ b/annotation/tests/test_delete_batch_tasks.py @@ -18,7 +18,9 @@ client = TestClient(app) -DELETE_BATCH_TASKS_ANNOTATOR = User(user_id="18d3d189-e73a-4680-bfa7-7ba3fe6ebee5") +DELETE_BATCH_TASKS_ANNOTATOR = User( + user_id="18d3d189-e73a-4680-bfa7-7ba3fe6ebee5" +) CATEGORIES = [ Category( id="18d3d189e73a4680bfa77ba3fe6ebee5", @@ -123,7 +125,9 @@ def test_delete_batch_tasks_status_codes( prepare_db_for_batch_delete_tasks, tasks_id, job_id, expected_code ): - response = client.delete(CRUD_TASKS_PATH, json=tasks_id, headers=TEST_HEADERS) + response = client.delete( + CRUD_TASKS_PATH, json=tasks_id, headers=TEST_HEADERS + ) assert response.status_code == expected_code check_files_distributed_pages(prepare_db_for_batch_delete_tasks, job_id) diff --git a/annotation/tests/test_distribution.py b/annotation/tests/test_distribution.py index 87d4eb6e5..1043f6d87 100644 --- a/annotation/tests/test_distribution.py +++ b/annotation/tests/test_distribution.py @@ -944,7 +944,9 @@ def test_find_files_for_task(task_pages, expected_files): ), ], ) -def test_distribute_annotation_limit_50_pages(files, annotators, expected_tasks): +def test_distribute_annotation_limit_50_pages( + files, annotators, expected_tasks +): assert ( distribute_tasks( {}, @@ -1001,7 +1003,9 @@ def test_distribute_annotation_limit_50_pages(files, annotators, expected_tasks) ], ) @pytest.mark.unittest -def test_distribution_with_extensive_coverage(files, annotators, extensive_coverage): +def test_distribution_with_extensive_coverage( + files, annotators, extensive_coverage +): tasks = distribute_tasks_extensively( files=files, users=annotators, @@ -1071,7 +1075,9 @@ def test_add_unassigned_file( unassigned_pages, expected_result, ): - add_unassigned_file(files_to_distribute, file_id, pages_number, unassigned_pages) + add_unassigned_file( + files_to_distribute, file_id, pages_number, unassigned_pages + ) assert files_to_distribute == expected_result diff --git a/annotation/tests/test_finish_task.py b/annotation/tests/test_finish_task.py index 7f53ee320..e3074af82 100644 --- a/annotation/tests/test_finish_task.py +++ b/annotation/tests/test_finish_task.py @@ -336,14 +336,18 @@ ] -def check_files_finished_pages(test_session: Session, job_id: int, tenant: str): +def check_files_finished_pages( + test_session: Session, job_id: int, tenant: str +): finished_tasks = test_session.query(ManualAnnotationTask).filter( ManualAnnotationTask.job_id == job_id, ManualAnnotationTask.status == TaskStatusEnumSchema.finished, ) files = test_session.query(File).filter(File.job_id == job_id).all() validation_type = ( - test_session.query(Job.validation_type).filter_by(job_id=job_id).first() + test_session.query(Job.validation_type) + .filter_by(job_id=job_id) + .first() ) for task_file in files: @@ -546,7 +550,9 @@ def test_finish_not_all_tasks_db_contain( status=500, headers=TEST_HEADERS, ) - client.post(FINISH_TASK_PATH.format(task_id=FINISH_TASK_ID), headers=TEST_HEADERS) + client.post( + FINISH_TASK_PATH.format(task_id=FINISH_TASK_ID), headers=TEST_HEADERS + ) task = prepare_db_for_finish_task_status_two_tasks_same_job.query( ManualAnnotationTask ).get(FINISH_TASK_ID) @@ -825,7 +831,9 @@ def test_finish_task_pending_validation_unblocking( headers=TEST_HEADERS, ) session = prepare_db_for_finish_task_change_validation_status - annotation_finish_task = ManualAnnotationTask(**ANNOTATION_TASKS_TO_FINISH[0]) + annotation_finish_task = ManualAnnotationTask( + **ANNOTATION_TASKS_TO_FINISH[0] + ) session.add(annotation_finish_task) session.commit() client.post( @@ -871,7 +879,9 @@ def test_finish_tasks_failed_validation_statuses( status=200, headers=TEST_HEADERS, ) - validation_finish_task = ManualAnnotationTask(**VALIDATION_TASKS_TO_FINISH[0]) + validation_finish_task = ManualAnnotationTask( + **VALIDATION_TASKS_TO_FINISH[0] + ) session.add(validation_finish_task) session.commit() client.post( @@ -916,7 +926,9 @@ def test_finish_tasks_reannotation_statuses( status=200, headers=TEST_HEADERS, ) - validation_finish_task = ManualAnnotationTask(**VALIDATION_TASKS_TO_FINISH[1]) + validation_finish_task = ManualAnnotationTask( + **VALIDATION_TASKS_TO_FINISH[1] + ) session.add(validation_finish_task) session.commit() client.post( @@ -938,17 +950,22 @@ def test_finish_task_initial_annotator_deleted( ): session = prepare_db_for_finish_task_check_deleted_annotators session.query(ManualAnnotationTask).filter( - ManualAnnotationTask.id == FINISH_TASK_CHECK_DELETE_USER_ANNOTATOR_2["id"] + ManualAnnotationTask.id + == FINISH_TASK_CHECK_DELETE_USER_ANNOTATOR_2["id"] ).delete() session.commit() - session.query(User).filter(User.user_id == FINISH_TASK_USER_2.user_id).delete() + session.query(User).filter( + User.user_id == FINISH_TASK_USER_2.user_id + ).delete() session.commit() end_task_schema = { "annotation_user_for_failed_pages": "initial", } response = client.post( - FINISH_TASK_PATH.format(task_id=FINISH_TASK_CHECK_DELETE_USER_VALIDATOR["id"]), + FINISH_TASK_PATH.format( + task_id=FINISH_TASK_CHECK_DELETE_USER_VALIDATOR["id"] + ), headers=TEST_HEADERS, json=end_task_schema, ) @@ -1102,7 +1119,9 @@ def test_finish_task_with_agreement_score_enabled_score_matched( "annotation.tasks.services.get_file_path_and_bucket", return_value=("", ""), ) as mock2: - with patch("annotation.tasks.resources.update_job_status") as mock4: + with patch( + "annotation.tasks.resources.update_job_status" + ) as mock4: response = client.post( FINISH_TASK_PATH.format(task_id=annotation_tasks[2]["id"]), headers=TEST_HEADERS, @@ -1152,7 +1171,9 @@ def test_finish_task_with_agreement_score_enabled_score_not_matched( "annotation.tasks.services.get_file_path_and_bucket", return_value=("", ""), ) as mock2: - with patch("annotation.tasks.resources.update_job_status") as mock4: + with patch( + "annotation.tasks.resources.update_job_status" + ) as mock4: response = client.post( FINISH_TASK_PATH.format(task_id=annotation_tasks[2]["id"]), headers=TEST_HEADERS, @@ -1201,7 +1222,9 @@ def test_finish_task_with_agreement_score_enabled_annotation_not_finished( "annotation.tasks.services.get_file_path_and_bucket", return_value=("", ""), ) as mock2: - with patch("annotation.tasks.resources.update_job_status") as mock4: + with patch( + "annotation.tasks.resources.update_job_status" + ) as mock4: response = client.post( FINISH_TASK_PATH.format(task_id=annotation_tasks[2]["id"]), headers=TEST_HEADERS, diff --git a/annotation/tests/test_get_accumulated_revisions.py b/annotation/tests/test_get_accumulated_revisions.py index fbf5c26ae..840593605 100644 --- a/annotation/tests/test_get_accumulated_revisions.py +++ b/annotation/tests/test_get_accumulated_revisions.py @@ -373,7 +373,9 @@ def test_get_annotation_for_latest_revision_status_codes( ], indirect=["db_errors"], ) -def test_get_annotation_for_latest_revision_db_exceptions(monkeypatch, db_errors): +def test_get_annotation_for_latest_revision_db_exceptions( + monkeypatch, db_errors +): response = client.get( construct_accumulated_revs_path( DOCS[0].job_id, diff --git a/annotation/tests/test_get_annotation_for_particular_revision.py b/annotation/tests/test_get_annotation_for_particular_revision.py index d1989076e..f05b2defe 100644 --- a/annotation/tests/test_get_annotation_for_particular_revision.py +++ b/annotation/tests/test_get_annotation_for_particular_revision.py @@ -132,7 +132,9 @@ def test_get_annotation_for_particular_revision_status_codes( Mock(return_value=minio_particular_revision), ) response = client.get( - construct_part_rev_path(PART_REV_DOC.job_id, file_id, PART_REV_DOC.revision), + construct_part_rev_path( + PART_REV_DOC.job_id, file_id, PART_REV_DOC.revision + ), headers={ HEADER_TENANT: tenant, AUTHORIZATION: f"{BEARER} {TEST_TOKEN}", @@ -151,7 +153,9 @@ def test_get_annotation_for_particular_revision_status_codes( ], indirect=["db_errors"], ) -def test_get_annotation_for_particular_revision_db_exceptions(monkeypatch, db_errors): +def test_get_annotation_for_particular_revision_db_exceptions( + monkeypatch, db_errors +): response = client.get( construct_part_rev_path( PART_REV_DOC.job_id, diff --git a/annotation/tests/test_get_child_categories.py b/annotation/tests/test_get_child_categories.py index 029f068d8..f8da46ccc 100644 --- a/annotation/tests/test_get_child_categories.py +++ b/annotation/tests/test_get_child_categories.py @@ -39,7 +39,9 @@ Category(id="4", name="4", parent="2", tenant=TEST_TENANT, type=TEST_TYPE), ) -OTHER_TENANT_CHILD_CATEGORY = Category(id="5", name="5", tenant="other", type=TEST_TYPE) +OTHER_TENANT_CHILD_CATEGORY = Category( + id="5", name="5", tenant="other", type=TEST_TYPE +) NOT_EXIST_ID = "100" # Common categories have tree hierarchical structure of ids: @@ -118,7 +120,9 @@ def test_get_child_categories( ["category_id", "tenant"], [("1", "other"), ("5", TEST_TENANT), (NOT_EXIST_ID, TEST_TENANT)], ) -def test_get_wrong_categories(prepare_db_child_categories, category_id, tenant): +def test_get_wrong_categories( + prepare_db_child_categories, category_id, tenant +): response = client.get( f"{CATEGORIES_PATH}/{category_id}/child", headers={ diff --git a/annotation/tests/test_get_job.py b/annotation/tests/test_get_job.py index f984563cf..785ab0c80 100644 --- a/annotation/tests/test_get_job.py +++ b/annotation/tests/test_get_job.py @@ -185,7 +185,9 @@ def test_get_jobs_by_file_id_sql_connection_error( (FILE_TEST_IDS[0], JOB_TEST_TENANTS[1]), ], ) -def test_get_jobs_by_file_id_404_error(prepare_db_for_get_job, tenant, file_id): +def test_get_jobs_by_file_id_404_error( + prepare_db_for_get_job, tenant, file_id +): response = client.get( f"{ANNOTATION_PATH}/{file_id}", headers={ @@ -231,7 +233,9 @@ def test_get_jobs_by_file_id_404_error(prepare_db_for_get_job, tenant, file_id): ), ], ) -def test_get_jobs_by_file(prepare_db_for_get_job, file_id, tenant, expected_response): +def test_get_jobs_by_file( + prepare_db_for_get_job, file_id, tenant, expected_response +): response = client.get( f"{ANNOTATION_PATH}/{file_id}", headers={ @@ -256,7 +260,9 @@ def test_get_jobs_name(monkeypatch, prepare_db_for_get_job): 2: "Job2name", 3: "JobNameFromJobsMicroservice", } - result = collect_job_names(session, job_ids, JOB_TEST_TENANTS[0], TEST_TOKEN) + result = collect_job_names( + session, job_ids, JOB_TEST_TENANTS[0], TEST_TOKEN + ) job_name_from_db = session.query(Job.name).filter(Job.job_id == 3).scalar() assert job_name_from_db == "JobNameFromJobsMicroservice" assert result == expected_result diff --git a/annotation/tests/test_get_job_files.py b/annotation/tests/test_get_job_files.py index b2de86494..ceb4788bc 100644 --- a/annotation/tests/test_get_job_files.py +++ b/annotation/tests/test_get_job_files.py @@ -200,7 +200,9 @@ def test_get_job_files_404_error( ), ], ) -def test_get_job_files(prepare_db_for_get_job_files, job_id, tenant, expected_files): +def test_get_job_files( + prepare_db_for_get_job_files, job_id, tenant, expected_files +): response = client.get( GET_JOB_FILES_PATH.format(job_id=job_id), headers={ @@ -235,7 +237,8 @@ def test_get_job_files(prepare_db_for_get_job_files, job_id, tenant, expected_fi "current_page": 1, "page_size": 50, "files": [ - {"id": f.file_id, "status": f.status} for f in GET_JOB_FILES[:3] + {"id": f.file_id, "status": f.status} + for f in GET_JOB_FILES[:3] ], }, ), @@ -268,7 +271,8 @@ def test_get_job_files(prepare_db_for_get_job_files, job_id, tenant, expected_fi "current_page": 1, "page_size": 2, "files": [ - {"id": f.file_id, "status": f.status} for f in GET_JOB_FILES[:2] + {"id": f.file_id, "status": f.status} + for f in GET_JOB_FILES[:2] ], }, ), diff --git a/annotation/tests/test_get_jobs_info_by_files.py b/annotation/tests/test_get_jobs_info_by_files.py index 4e7b7f4bc..791f1dd97 100644 --- a/annotation/tests/test_get_jobs_info_by_files.py +++ b/annotation/tests/test_get_jobs_info_by_files.py @@ -30,7 +30,9 @@ File(file_id=3, tenant=TEST_TENANT, job_id=2, pages_number=5), File(file_id=4, tenant=TEST_TENANT, job_id=2, pages_number=5), ] -FILES_THIRD_JOB = [File(file_id=5, tenant=TEST_TENANT, job_id=3, pages_number=5)] +FILES_THIRD_JOB = [ + File(file_id=5, tenant=TEST_TENANT, job_id=3, pages_number=5) +] JOBS = [ # files with ids [1, 2, 3, 6] belong to this job diff --git a/annotation/tests/test_get_pages_info.py b/annotation/tests/test_get_pages_info.py index 6bff4e0d5..ea5e0b91d 100644 --- a/annotation/tests/test_get_pages_info.py +++ b/annotation/tests/test_get_pages_info.py @@ -11,7 +11,13 @@ BEARER, HEADER_TENANT, ) -from annotation.models import AnnotatedDoc, File, Job, ManualAnnotationTask, User +from annotation.models import ( + AnnotatedDoc, + File, + Job, + ManualAnnotationTask, + User, +) from annotation.schemas import TaskStatusEnumSchema, ValidationSchema from annotation.tasks import get_task_revisions from tests.consts import CRUD_TASKS_PATH diff --git a/annotation/tests/test_get_revisions.py b/annotation/tests/test_get_revisions.py index aac8663d1..8834f8b82 100644 --- a/annotation/tests/test_get_revisions.py +++ b/annotation/tests/test_get_revisions.py @@ -363,7 +363,9 @@ def test_get_latest_revision_by_user_s3_connection_error( @pytest.mark.integration @patch.object(Session, "query") -def test_get_all_revisions_sql_connection_error(Session, prepare_db_for_get_revisions): +def test_get_all_revisions_sql_connection_error( + Session, prepare_db_for_get_revisions +): Session.side_effect = Mock(side_effect=SQLAlchemyError()) response = client.get( f"{ANNOTATION_PATH}/{JOBS_IDS[0]}/{FILES_IDS[0]}", diff --git a/annotation/tests/test_get_revisions_without_annotation.py b/annotation/tests/test_get_revisions_without_annotation.py index cc3a6b78c..206b0fdce 100644 --- a/annotation/tests/test_get_revisions_without_annotation.py +++ b/annotation/tests/test_get_revisions_without_annotation.py @@ -204,7 +204,9 @@ def test_get_revisions_without_annotation_status_codes( ], indirect=["db_errors"], ) -def test_get_revisions_without_annotation_db_exceptions(monkeypatch, db_errors): +def test_get_revisions_without_annotation_db_exceptions( + monkeypatch, db_errors +): response = client.get( construct_rev_without_annotation_path(JOB_ID, FILE_ID_1), headers={"X-Current-Tenant": TEST_TENANT}, diff --git a/annotation/tests/test_job_categories.py b/annotation/tests/test_job_categories.py index ee3a7795a..d7522ccda 100644 --- a/annotation/tests/test_job_categories.py +++ b/annotation/tests/test_job_categories.py @@ -425,7 +425,9 @@ def test_get_job_categories_pagination( @mark.integration @mark.parametrize("page_size", (20, 1, 10)) -def test_get_job_wrong_pagination(page_size, prepare_db_job_with_filter_categories): +def test_get_job_wrong_pagination( + page_size, prepare_db_job_with_filter_categories +): pagination_params = {"page_size": page_size, "page_num": 1} response = client.get( f"{JOBS_PATH}/{MOCK_ID}/categories", @@ -526,7 +528,9 @@ def test_search_allowed_categories( ) category = response.json()["data"][0] assert response.status_code == 200 - assert prepare_expected_result(category) == prepare_category_response(expected) + assert prepare_expected_result(category) == prepare_category_response( + expected + ) @mark.integration @@ -563,7 +567,9 @@ def test_search_filter_name_like( prepare_db_categories_for_filtration, prepare_db_job_with_filter_categories, ): - data = prepare_filtration_body(field="name", operator=operator, value=value) + data = prepare_filtration_body( + field="name", operator=operator, value=value + ) response = client.post( f"{POST_JOBS_PATH}/{MOCK_ID}/categories/search", json=data, @@ -582,7 +588,9 @@ def test_search_filter_ordering( prepare_db_categories_for_filtration, prepare_db_job_with_filter_categories, ): - data = prepare_filtration_body(operator="lt", value="5", direction=direction) + data = prepare_filtration_body( + operator="lt", value="5", direction=direction + ) response = client.post( f"{POST_JOBS_PATH}/{MOCK_ID}/categories/search", json=data, @@ -598,7 +606,9 @@ def test_search_filter_distinct_id( prepare_db_categories_for_filtration, prepare_db_job_with_filter_categories, ): - data = prepare_filtration_body(page_size=30, field="id", operator="distinct") + data = prepare_filtration_body( + page_size=30, field="id", operator="distinct" + ) response = client.post( f"{POST_JOBS_PATH}/{MOCK_ID}/categories/search", json=data, @@ -664,7 +674,8 @@ def test_search_categories_400_error( headers=TEST_HEADERS, ) error_message = ( - "SELECT DISTINCT ON expressions must " "match initial ORDER BY expressions" + "SELECT DISTINCT ON expressions must " + "match initial ORDER BY expressions" ) assert response.status_code == 400 assert error_message in response.text diff --git a/annotation/tests/test_microservices_search.py b/annotation/tests/test_microservices_search.py index 27d34be9a..e00795510 100644 --- a/annotation/tests/test_microservices_search.py +++ b/annotation/tests/test_microservices_search.py @@ -3,7 +3,9 @@ from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ) -from annotation.microservice_communication.jobs_communication import JOBS_SEARCH_URL +from annotation.microservice_communication.jobs_communication import ( + JOBS_SEARCH_URL, +) from annotation.microservice_communication.search import ( PAGE_SIZE, calculate_amount_of_pagination_pages, @@ -235,7 +237,9 @@ @pytest.mark.parametrize( ["elem_amount", "expected_amount_of_pages"], [(50, 1), (100, 1), (101, 2)] ) -def test_calculate_amount_of_pagination_pages(elem_amount, expected_amount_of_pages): +def test_calculate_amount_of_pagination_pages( + elem_amount, expected_amount_of_pages +): actual_result = calculate_amount_of_pagination_pages(elem_amount) assert actual_result == expected_amount_of_pages @@ -296,7 +300,9 @@ def test_expand_response(): ], ) @responses.activate -def test_get_response(ids, url, is_assets, microservice_response, expected_response): +def test_get_response( + ids, url, is_assets, microservice_response, expected_response +): responses.add( responses.POST, url, diff --git a/annotation/tests/test_post.py b/annotation/tests/test_post.py index b9a50b7ba..3f2a719f6 100644 --- a/annotation/tests/test_post.py +++ b/annotation/tests/test_post.py @@ -7,7 +7,9 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session -from annotation.microservice_communication.assets_communication import ASSETS_URL +from annotation.microservice_communication.assets_communication import ( + ASSETS_URL, +) from annotation.models import Category, File, Job, ManualAnnotationTask, User from annotation.schemas import CategoryTypeSchema, ValidationSchema from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app @@ -392,7 +394,9 @@ def check_files_distributed_pages(test_session: Session, job_id: int): ) files = test_session.query(File).filter(File.job_id == job_id).all() validation_type = ( - test_session.query(Job.validation_type).filter_by(job_id=job_id).first() + test_session.query(Job.validation_type) + .filter_by(job_id=job_id) + .first() ) test_session.add_all(files) test_session.commit() @@ -407,7 +411,8 @@ def check_files_distributed_pages(test_session: Session, job_id: int): distributed_annotating_pages = sorted(distributed_annotating_pages) if validation_type[0] != ValidationSchema.validation_only: assert ( - task_file.distributed_annotating_pages == distributed_annotating_pages + task_file.distributed_annotating_pages + == distributed_annotating_pages ) validating_tasks = tasks.filter( @@ -418,7 +423,10 @@ def check_files_distributed_pages(test_session: Session, job_id: int): for validating_task in validating_tasks: distributed_validating_pages.update(validating_task.pages) distributed_validating_pages = sorted(distributed_validating_pages) - assert task_file.distributed_validating_pages == distributed_validating_pages + assert ( + task_file.distributed_validating_pages + == distributed_validating_pages + ) @pytest.mark.integration @@ -506,8 +514,12 @@ def test_post_tasks_only_datasets( @pytest.mark.integration def test_post_tasks_new_user(monkeypatch, prepare_db_for_post): - assert not prepare_db_for_post.query(User).get(TASK_INFO_NEW_USER["user_ids"][0]) - assert not prepare_db_for_post.query(User).get(TASK_INFO_NEW_USER["user_ids"][1]) + assert not prepare_db_for_post.query(User).get( + TASK_INFO_NEW_USER["user_ids"][0] + ) + assert not prepare_db_for_post.query(User).get( + TASK_INFO_NEW_USER["user_ids"][1] + ) monkeypatch.setattr( "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=FILES_FROM_ASSETS_FOR_TASK_INFO_NEW_USER), @@ -525,9 +537,15 @@ def test_post_tasks_new_user(monkeypatch, prepare_db_for_post): for user in TASK_INFO_NEW_USER["user_ids"]: assert user in response.text assert expected_message in response.text - assert not prepare_db_for_post.query(User).get(TASK_INFO_NEW_USER["user_ids"][0]) - assert not prepare_db_for_post.query(User).get(TASK_INFO_NEW_USER["user_ids"][1]) - check_files_distributed_pages(prepare_db_for_post, TASK_INFO_NEW_USER["job_id"]) + assert not prepare_db_for_post.query(User).get( + TASK_INFO_NEW_USER["user_ids"][0] + ) + assert not prepare_db_for_post.query(User).get( + TASK_INFO_NEW_USER["user_ids"][1] + ) + check_files_distributed_pages( + prepare_db_for_post, TASK_INFO_NEW_USER["job_id"] + ) @pytest.mark.integration @@ -568,7 +586,9 @@ def test_post_tasks_deadline( "annotation.microservice_communication.assets_communication.get_response", Mock(return_value=assets_files), ) - response = client.post(f"{POST_TASKS_PATH}", json=task_info, headers=TEST_HEADERS) + response = client.post( + f"{POST_TASKS_PATH}", json=task_info, headers=TEST_HEADERS + ) assert response.status_code == 201 for task in response.json(): assert task["deadline"] == expected_deadline @@ -587,7 +607,9 @@ def test_post_tasks_validation_only(monkeypatch, prepare_db_for_post): "datasets": [], "job_id": JOBS_ID[3], } - response = client.post(f"{POST_TASKS_PATH}", json=tasks_info, headers=TEST_HEADERS) + response = client.post( + f"{POST_TASKS_PATH}", json=tasks_info, headers=TEST_HEADERS + ) assert response.status_code == 201 for task in response.json(): assert task["is_validation"] diff --git a/annotation/tests/test_post_annotation.py b/annotation/tests/test_post_annotation.py index e09220063..a9714671d 100644 --- a/annotation/tests/test_post_annotation.py +++ b/annotation/tests/test_post_annotation.py @@ -68,8 +68,12 @@ type=CategoryTypeSchema.box, ), ] -POST_ANNOTATION_ANNOTATOR = User(user_id="6ffab2dd-3605-46d4-98a1-2d20011e132d") -POST_ANNOTATION_VALIDATOR = User(user_id="6ffab2dd-3605-46d4-98a1-2d20011e132e") +POST_ANNOTATION_ANNOTATOR = User( + user_id="6ffab2dd-3605-46d4-98a1-2d20011e132d" +) +POST_ANNOTATION_VALIDATOR = User( + user_id="6ffab2dd-3605-46d4-98a1-2d20011e132e" +) FIRST_DATE = "2021-12-01T12:19:54.188831" @@ -336,7 +340,9 @@ DIFF_FIRST_PAGE = copy.deepcopy(PAGES[1]) DIFF_FIRST_PAGE["page_num"] = 1 -HASH_OF_DIFF_FIRST_PAGE = sha1(json.dumps(DIFF_FIRST_PAGE).encode()).hexdigest() +HASH_OF_DIFF_FIRST_PAGE = sha1( + json.dumps(DIFF_FIRST_PAGE).encode() +).hexdigest() DOC_FOR_FIRST_SAVE_BY_USER = { "user": POST_ANNOTATION_ANNOTATOR.user_id, @@ -513,7 +519,9 @@ "revision": sha1( json.dumps(DOC_FOR_FIRST_SAVE_BY_USER["pages"][0]).encode() + json.dumps(DOC_FOR_FIRST_SAVE_BY_USER["validated"]).encode() - + json.dumps(DOC_FOR_FIRST_SAVE_BY_USER["failed_validation_pages"]).encode() + + json.dumps( + DOC_FOR_FIRST_SAVE_BY_USER["failed_validation_pages"] + ).encode() ).hexdigest(), "user": POST_ANNOTATION_ANNOTATOR.user_id, "pipeline": None, @@ -534,7 +542,9 @@ ANNOTATED_DOC_PIPELINE_FIRST = { "revision": sha1( json.dumps(DOC_FOR_FIRST_SAVE_BY_PIPELINE["pages"][0]).encode() - + json.dumps(DOC_FOR_FIRST_SAVE_BY_PIPELINE.get("validated", [])).encode() + + json.dumps( + DOC_FOR_FIRST_SAVE_BY_PIPELINE.get("validated", []) + ).encode() + json.dumps( DOC_FOR_FIRST_SAVE_BY_PIPELINE.get("failed_validation_pages", []) ).encode() @@ -554,9 +564,15 @@ } ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE = copy.deepcopy(ANNOTATED_DOC_FIRST) -ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["file_id"] = POST_ANNOTATION_TASK_2["file_id"] -ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["job_id"] = POST_ANNOTATION_TASK_2["job_id"] -ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["task_id"] = POST_ANNOTATION_TASK_2["id"] +ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["file_id"] = POST_ANNOTATION_TASK_2[ + "file_id" +] +ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["job_id"] = POST_ANNOTATION_TASK_2[ + "job_id" +] +ANNOTATED_DOC_WITH_DIFFERENT_JOB_AND_FILE["task_id"] = POST_ANNOTATION_TASK_2[ + "id" +] PAGES_SHA = {} B_PAGES = b"" @@ -603,7 +619,9 @@ DOC_FOR_SECOND_SAVE_BY_USER["base_revision"].encode() + json.dumps(DOC_FOR_SECOND_SAVE_BY_USER["pages"][0]).encode() + json.dumps(DOC_FOR_SECOND_SAVE_BY_USER["validated"]).encode() - + json.dumps(DOC_FOR_SECOND_SAVE_BY_USER["failed_validation_pages"]).encode() + + json.dumps( + DOC_FOR_SECOND_SAVE_BY_USER["failed_validation_pages"] + ).encode() ).hexdigest(), "user": POST_ANNOTATION_ANNOTATOR.user_id, "pipeline": None, @@ -624,7 +642,9 @@ ANNOTATED_DOC_WITH_BOTH_TOKENS_AND_BBOX = { "revision": sha1( json.dumps(DOC_WITH_BBOX_AND_TOKENS_FIELDS["pages"][0]).encode() - + json.dumps(DOC_WITH_BBOX_AND_TOKENS_FIELDS.get("validated", [])).encode() + + json.dumps( + DOC_WITH_BBOX_AND_TOKENS_FIELDS.get("validated", []) + ).encode() + json.dumps( DOC_WITH_BBOX_AND_TOKENS_FIELDS.get("failed_validation_pages", []) ).encode() @@ -851,7 +871,9 @@ POST_ANNOTATION_PG_DOC.revision.encode() + json.dumps(DOC_FOR_CHECK_MERGE_CONFLICT["pages"][0]).encode() + json.dumps(DOC_FOR_CHECK_MERGE_CONFLICT["validated"]).encode() - + json.dumps(DOC_FOR_CHECK_MERGE_CONFLICT["failed_validation_pages"]).encode() + + json.dumps( + DOC_FOR_CHECK_MERGE_CONFLICT["failed_validation_pages"] + ).encode() ).hexdigest(), "user": POST_ANNOTATION_ANNOTATOR.user_id, "pipeline": None, @@ -1182,7 +1204,8 @@ def test_post_annotation_by_pipeline_status_codes( response = client.post( construct_path( ANNOTATION_PATH, - f"{POST_ANNOTATION_PG_DOC.job_id}/" f"{POST_ANNOTATION_PG_DOC.file_id}", + f"{POST_ANNOTATION_PG_DOC.job_id}/" + f"{POST_ANNOTATION_PG_DOC.file_id}", ), headers={ HEADER_TENANT: POST_ANNOTATION_PG_DOC.tenant, @@ -1433,7 +1456,9 @@ def test_upload_json_to_minio(mock_minio_empty_bucket): def test_upload_pages_to_minio(mock_minio_empty_bucket): s3_resource = mock_minio_empty_bucket - upload_pages_to_minio(PAGES_SCHEMA, PAGES_SHA, S3_PATH, TEST_TENANT, s3_resource) + upload_pages_to_minio( + PAGES_SCHEMA, PAGES_SHA, S3_PATH, TEST_TENANT, s3_resource + ) for page_obj in s3_resource.Bucket(TEST_TENANT).objects.filter( Delimiter="/", Prefix=S3_PATH + "/" @@ -1614,7 +1639,9 @@ def test_check_docs_identity(latest_doc, new_doc, expected_result): # validated: empty # failed_validation_pages: empty ( - ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories")[0], + ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories")[ + 0 + ], "path/to/file", "bucket-of-phys-file", { @@ -1661,7 +1688,9 @@ def test_create_manifest_json_first_upload( prepare_db_for_manifest_creation_with_one_record, s3_resource, ) - man_obj = s3_resource.Object(POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}") + man_obj = s3_resource.Object( + POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}" + ) actual_manifest = json.loads(man_obj.get()["Body"].read().decode("utf-8")) del actual_manifest["date"] assert actual_manifest == expected_manifest @@ -1795,8 +1824,12 @@ def test_create_manifest_json_first_upload( # validated : from latest revision # failed_validation_pages: empty ( - ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("same_pages_not_validated"), - ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("same_pages_not_validated")[1], + ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get( + "same_pages_not_validated" + ), + ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get( + "same_pages_not_validated" + )[1], "path/to/another/file", "another-bucket", { @@ -1819,7 +1852,9 @@ def test_create_manifest_json_first_upload( # failed_validation_pages: empty ( ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories"), - ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories")[1], + ANNOTATED_DOCS_FOR_MANIFEST_CREATION.get("docs_with_categories")[ + 1 + ], "path/to/file", "bucket-of-phys-file", { @@ -1868,7 +1903,9 @@ def test_create_manifest_json_with_annotated_docs_and_manifest_in_minio( prepare_db_for_manifest_creation_with_several_records, s3_resource, ) - man_obj = s3_resource.Object(POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}") + man_obj = s3_resource.Object( + POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}" + ) actual_manifest = json.loads(man_obj.get()["Body"].read().decode("utf-8")) delete_date_fields([actual_manifest]) assert actual_manifest == expected_manifest @@ -1897,7 +1934,9 @@ def test_create_manifest_json_date_field( ) ) prepare_db_for_manifest_creation_with_one_record.commit() - man_obj = s3_resource.Object(POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}") + man_obj = s3_resource.Object( + POST_ANNOTATION_PG_DOC.tenant, f"{S3_PATH}/{MANIFEST}" + ) actual_manifest = json.loads(man_obj.get()["Body"].read().decode("utf-8")) assert annotated_doc["date"] @@ -2020,7 +2059,9 @@ def test_construct_annotated_doc( ) amount_of_docs_after_commit = db.query(AnnotatedDoc).count() - delete_date_fields([actual_doc, doc_in_db_after_commit, formatted_actual_doc]) + delete_date_fields( + [actual_doc, doc_in_db_after_commit, formatted_actual_doc] + ) assert doc_in_session_after_commit == [] assert doc_in_db_after_commit == expected_result @@ -2042,7 +2083,9 @@ def test_construct_annotated_doc_different_jobs_and_files( s3_resource = mock_minio_empty_bucket expected_result_1 = { - k: v for k, v in ANNOTATED_DOC_FIRST.items() if k not in ("similar_revisions",) + k: v + for k, v in ANNOTATED_DOC_FIRST.items() + if k not in ("similar_revisions",) } expected_result_2 = { k: v @@ -2203,7 +2246,8 @@ def test_post_annotation_by_pipeline( actual_result = client.post( construct_path( ANNOTATION_PATH, - f"{POST_ANNOTATION_PG_DOC.job_id}/" f"{POST_ANNOTATION_PG_DOC.file_id}", + f"{POST_ANNOTATION_PG_DOC.job_id}/" + f"{POST_ANNOTATION_PG_DOC.file_id}", ), headers={ HEADER_TENANT: POST_ANNOTATION_PG_DOC.tenant, diff --git a/annotation/tests/test_post_job.py b/annotation/tests/test_post_job.py index f436f79c5..812824d2c 100644 --- a/annotation/tests/test_post_job.py +++ b/annotation/tests/test_post_job.py @@ -430,7 +430,10 @@ def test_post_job_connection_exception(Session, prepare_db_for_post_job): ( POST_JOB_NEW_JOBS[11], 422, - ("Fields files and datasets should not be empty " "at the same time."), + ( + "Fields files and datasets should not be empty " + "at the same time." + ), ), # even in ExtractionJob must be either files or datasets ( POST_JOB_NEW_JOBS[12], @@ -722,7 +725,9 @@ def test_get_job_attributes_for_post( [(POST_JOB_NEW_JOBS[10], "ExtractionJob1"), (POST_JOB_NEW_JOBS[13], None)], ) @responses.activate -def test_post_extraction_job_saved(prepare_db_for_post_job, job_info, expected_name): +def test_post_extraction_job_saved( + prepare_db_for_post_job, job_info, expected_name +): """Tests that new ExtractionJob with valid user type fields will be created in db in default 'pending' status and that values for not-provided optional fields 'validation_type', 'deadline', 'name' are @@ -766,7 +771,9 @@ def test_post_import_job_saved(prepare_db_for_post_job): ) assert response.status_code == expected_response_code saved_job = row_to_dict(session.query(Job).get(job_info["job_id"])) - assert not session.query(File).filter(File.job_id == job_info["job_id"]).all() + assert ( + not session.query(File).filter(File.job_id == job_info["job_id"]).all() + ) assert not saved_job.get("categories") assert not saved_job.get("deadline") assert saved_job.get("validation_type") == ValidationSchema.cross diff --git a/annotation/tests/test_post_unassgined_files.py b/annotation/tests/test_post_unassgined_files.py index 84d797e01..01c15e343 100644 --- a/annotation/tests/test_post_unassgined_files.py +++ b/annotation/tests/test_post_unassgined_files.py @@ -328,7 +328,9 @@ def test_post_tasks_for_unassigned_files( db_post_unassigned_files.query(ManualAnnotationTask) .filter( ManualAnnotationTask.job_id == job_id, - not_(ManualAnnotationTask.status == TaskStatusEnumSchema.in_progress), + not_( + ManualAnnotationTask.status == TaskStatusEnumSchema.in_progress + ), ) .all() ) @@ -341,7 +343,9 @@ def test_post_tasks_for_unassigned_files( del task["id"] files_in_db = ( - db_post_unassigned_files.query(File).filter(File.job_id == job_id).all() + db_post_unassigned_files.query(File) + .filter(File.job_id == job_id) + .all() ) files_in_db = [row_to_dict(f) for f in files_in_db] diff --git a/annotation/tests/test_search_kafka.py b/annotation/tests/test_search_kafka.py index 6d7c56f89..e2af4b537 100644 --- a/annotation/tests/test_search_kafka.py +++ b/annotation/tests/test_search_kafka.py @@ -130,8 +130,12 @@ def __init__(self, bootstrap_servers, client_id, value_serializer): @mark.unittest -@mock.patch(target="annotation.annotations.main.KAFKA_BOOTSTRAP_SERVER", new="url_1") -@mock.patch(target="annotation.annotations.main.KafkaProducer", new=MockProducer) +@mock.patch( + target="annotation.annotations.main.KAFKA_BOOTSTRAP_SERVER", new="url_1" +) +@mock.patch( + target="annotation.annotations.main.KafkaProducer", new=MockProducer +) def test_add_search_annotation_producer(monkeypatch): """Checks that "add_search_annotation_producer" function calls "_init_search_annotation_producer" which creates KafkaProducer with @@ -150,7 +154,8 @@ def test_producer_startup_creation(monkeypatch): """Checks that producer creation automatically called on app startup.""" mock_startup = mock.Mock() monkeypatch.setattr( - "annotation.annotations.main._init_search_annotation_producer", mock_startup + "annotation.annotations.main._init_search_annotation_producer", + mock_startup, ) with TestClient(app): mock_startup.assert_called_once() @@ -168,8 +173,12 @@ def test_producer_startup_creation(monkeypatch): (f"{ANNOTATION_KAFKA_TASK_ID}", DOC_FOR_SAVE_BY_USER), ], ) -@mock.patch(target="annotation.annotations.main.KAFKA_SEARCH_TOPIC", new="test") -@mock.patch(target="annotation.annotations.main.KafkaProducer", new=mock.Mock()) +@mock.patch( + target="annotation.annotations.main.KAFKA_SEARCH_TOPIC", new="test" +) +@mock.patch( + target="annotation.annotations.main.KafkaProducer", new=mock.Mock() +) def test_post_annotation_send_message( monkeypatch, empty_bucket, diff --git a/annotation/tests/test_start_job.py b/annotation/tests/test_start_job.py index 08b87561a..3da8691bb 100644 --- a/annotation/tests/test_start_job.py +++ b/annotation/tests/test_start_job.py @@ -180,7 +180,9 @@ (CHANGE_STATUSES_JOBS[0].job_id, JobStatusEnumSchema.failed), ], ) -def test_update_inner_job_status(job_id, status, prepare_db_for_update_job_status): +def test_update_inner_job_status( + job_id, status, prepare_db_for_update_job_status +): update_inner_job_status(prepare_db_for_update_job_status, job_id, status) prepare_db_for_update_job_status.commit() db_job = prepare_db_for_update_job_status.query(Job).get(job_id) @@ -195,7 +197,9 @@ def test_post_start_job_500_response( job_id=CHANGE_STATUSES_TASKS[0].job_id, ): Session.side_effect = Mock(side_effect=SQLAlchemyError()) - response = client.post(START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS) + response = client.post( + START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS + ) assert response.status_code == 500 assert "Error: connection error" in response.text @@ -242,7 +246,9 @@ def test_post_start_job_bad_job_response( status=job_response_status, headers=TEST_HEADERS, ) - response = client.post(START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS) + response = client.post( + START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS + ) assert response.status_code == 500 assert expected_response in response.text saved_tasks = ( @@ -289,7 +295,9 @@ def test_post_start_job_tasks_statuses( status=200, headers=TEST_HEADERS, ) - response = client.post(START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS) + response = client.post( + START_JOB_PATH.format(job_id=job_id), headers=TEST_HEADERS + ) prepare_db_for_change_statuses.commit() assert response.status_code == 200 assert response.json() == expected_response diff --git a/annotation/tests/test_tasks_crud_cr.py b/annotation/tests/test_tasks_crud_cr.py index b86227ee0..0dc3e9d11 100644 --- a/annotation/tests/test_tasks_crud_cr.py +++ b/annotation/tests/test_tasks_crud_cr.py @@ -13,7 +13,9 @@ from annotation.microservice_communication.assets_communication import ( ASSETS_FILES_URL, ) -from annotation.microservice_communication.jobs_communication import JOBS_SEARCH_URL +from annotation.microservice_communication.jobs_communication import ( + JOBS_SEARCH_URL, +) from annotation.microservice_communication.user import USERS_SEARCH_URL from annotation.models import Category, File, Job, ManualAnnotationTask, User from annotation.schemas import CategoryTypeSchema, ValidationSchema @@ -654,14 +656,18 @@ def prepare_stats_export_body( @patch.object(Session, "query") def test_post_task_500_response(Session, prepare_db_for_cr_task): Session.side_effect = Mock(side_effect=SQLAlchemyError()) - response = client.post(CRUD_TASKS_PATH, json=NEW_TASKS[0], headers=TEST_HEADERS) + response = client.post( + CRUD_TASKS_PATH, json=NEW_TASKS[0], headers=TEST_HEADERS + ) assert response.status_code == 500 assert "Error: " in response.text @pytest.mark.integration def test_post_task_wrong_job(prepare_db_for_cr_task): - response = client.post(CRUD_TASKS_PATH, json=TASK_WRONG_JOB, headers=TEST_HEADERS) + response = client.post( + CRUD_TASKS_PATH, json=TASK_WRONG_JOB, headers=TEST_HEADERS + ) assert response.status_code == 400 assert "Error: wrong job_id" in response.text @@ -697,8 +703,12 @@ def test_post_task_wrong_job(prepare_db_for_cr_task): ), # ExtractionJob ], ) -def test_post_task_wrong_users_errors(prepare_db_for_cr_task, task_info, error_message): - response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) +def test_post_task_wrong_users_errors( + prepare_db_for_cr_task, task_info, error_message +): + response = client.post( + CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS + ) assert response.status_code == 400 assert error_message in response.text @@ -712,7 +722,9 @@ def test_post_task_wrong_users_errors(prepare_db_for_cr_task, task_info, error_m ], ) def test_post_task_422_pages_response(prepare_db_for_cr_task, task_info): - response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) + response = client.post( + CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS + ) assert response.status_code == 422 @@ -743,10 +755,18 @@ def test_post_task_422_pages_response(prepare_db_for_cr_task, task_info): ], ) def test_post_task(prepare_db_for_cr_task, task_info, expected_response): - response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) + response = client.post( + CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS + ) assert response.status_code == 201 - assert [value for key, value in response.json().items() if key == "id" and value] - response = {key: value for key, value in response.json().items() if key != "id"} + assert [ + value + for key, value in response.json().items() + if key == "id" and value + ] + response = { + key: value for key, value in response.json().items() if key != "id" + } assert response == expected_response check_files_distributed_pages(prepare_db_for_cr_task, task_info["job_id"]) @@ -869,7 +889,9 @@ def test_update_task_already_updated_change_event( @pytest.mark.integration def test_create_export_data_not_found(prepare_db_update_stats): - body = prepare_stats_export_body(user_ids=[f"{uuid4()}" for _ in range(10)]) + body = prepare_stats_export_body( + user_ids=[f"{uuid4()}" for _ in range(10)] + ) response = client.post( f"{CRUD_TASKS_PATH}/export", @@ -894,7 +916,9 @@ def test_create_export_data_not_found(prepare_db_update_stats): def test_create_export_invalid_datetime_format( prepare_db_for_cr_task, date_from, date_to ): - body = prepare_stats_export_body(user_ids=[f"{uuid4()}" for _ in range(10)]) + body = prepare_stats_export_body( + user_ids=[f"{uuid4()}" for _ in range(10)] + ) body["date_from"] = date_from body["date_to"] = date_to @@ -910,7 +934,9 @@ def test_create_export_invalid_datetime_format( @pytest.mark.integration def test_create_export_return_csv(prepare_db_update_stats_already_updated): - body = prepare_stats_export_body(user_ids=[str(ann.user_id) for ann in ANNOTATORS]) + body = prepare_stats_export_body( + user_ids=[str(ann.user_id) for ann in ANNOTATORS] + ) response = client.post( f"{CRUD_TASKS_PATH}/export", @@ -920,7 +946,10 @@ def test_create_export_return_csv(prepare_db_update_stats_already_updated): assert response.status_code == 200 assert "text/csv" in response.headers["content-type"] - assert "filename=annotator_stats_export" in response.headers["content-disposition"] + assert ( + "filename=annotator_stats_export" + in response.headers["content-disposition"] + ) assert len(response.content) > 0 @@ -1056,7 +1085,9 @@ def test_get_tasks( status=200, headers=TEST_HEADERS, ) - response = client.get(CRUD_TASKS_PATH, params=url_params, headers=TEST_HEADERS) + response = client.get( + CRUD_TASKS_PATH, params=url_params, headers=TEST_HEADERS + ) assert response.status_code == 200 response = [ {key: value for key, value in x.items() if key != "id"} @@ -1158,7 +1189,9 @@ def test_get_tasks_pagination( status=200, headers=TEST_HEADERS, ) - response = client.get(CRUD_TASKS_PATH, params=url_params, headers=TEST_HEADERS) + response = client.get( + CRUD_TASKS_PATH, params=url_params, headers=TEST_HEADERS + ) assert response.status_code == 200 response = { key: value @@ -1177,8 +1210,12 @@ def test_get_tasks_pagination( (NEW_TASKS[3], CRUD_CR_JOBS[3].deadline), ], ) -def test_post_task_deadline(prepare_db_for_cr_task, task_info, expected_deadline): - response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) +def test_post_task_deadline( + prepare_db_for_cr_task, task_info, expected_deadline +): + response = client.post( + CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS + ) assert response.status_code == 201 assert response.json()["deadline"] == expected_deadline check_files_distributed_pages(prepare_db_for_cr_task, task_info["job_id"]) @@ -1260,7 +1297,9 @@ def prepare_filtration_body_double_filter( @pytest.mark.integration -@patch("annotation.tasks.resources.filter_tasks_db", side_effect=SQLAlchemyError) +@patch( + "annotation.tasks.resources.filter_tasks_db", side_effect=SQLAlchemyError +) def test_search_tasks_500_error(prepare_db_for_cr_task): data = prepare_filtration_body() response = client.post(SEARCH_TASKS_PATH, json=data, headers=TEST_HEADERS) @@ -1270,10 +1309,13 @@ def test_search_tasks_500_error(prepare_db_for_cr_task): @pytest.mark.integration def test_search_tasks_400_error(prepare_db_for_cr_task): - data = prepare_filtration_body(ordering_field="status", operator="distinct") + data = prepare_filtration_body( + ordering_field="status", operator="distinct" + ) response = client.post(SEARCH_TASKS_PATH, json=data, headers=TEST_HEADERS) error_message = ( - "SELECT DISTINCT ON expressions must " "match initial ORDER BY expressions" + "SELECT DISTINCT ON expressions must " + "match initial ORDER BY expressions" ) assert response.status_code == 400 assert error_message in response.text @@ -1460,7 +1502,9 @@ def tests_search_tasks_ordering( ], ) @responses.activate -def test_search_tasks_wrong_parameters(wrong_parameter, value, prepare_db_for_cr_task): +def test_search_tasks_wrong_parameters( + wrong_parameter, value, prepare_db_for_cr_task +): responses.add( responses.POST, ASSETS_FILES_URL, @@ -1497,7 +1541,9 @@ def test_search_tasks_wrong_parameters(wrong_parameter, value, prepare_db_for_cr def test_post_task_validation_only( prepare_db_for_cr_task, task_info, expected_status_code, expected_response ): - response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) + response = client.post( + CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS + ) assert response.status_code == expected_status_code response = ( {key: value for key, value in response.json().items() if key != "id"} @@ -1511,7 +1557,9 @@ def test_post_task_validation_only( @pytest.mark.integration @pytest.mark.parametrize("task_info", (NEW_TASKS[9], NEW_TASKS[10])) def test_post_task_wrong_file_error(prepare_db_for_cr_task, task_info): - response = client.post(CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS) + response = client.post( + CRUD_TASKS_PATH, json=task_info, headers=TEST_HEADERS + ) error_message = ( f"{task_info['file_id']} is not assigned for job {task_info['job_id']}" ) @@ -1521,7 +1569,11 @@ def test_post_task_wrong_file_error(prepare_db_for_cr_task, task_info): @pytest.mark.integration def test_post_task_wrong_file_pages(prepare_db_for_cr_task): - response = client.post(CRUD_TASKS_PATH, json=NEW_TASKS[11], headers=TEST_HEADERS) - error_message = "({101, 102}) do not belong to file %s" % (NEW_TASKS[11]["file_id"]) + response = client.post( + CRUD_TASKS_PATH, json=NEW_TASKS[11], headers=TEST_HEADERS + ) + error_message = "({101, 102}) do not belong to file %s" % ( + NEW_TASKS[11]["file_id"] + ) assert response.status_code == 400 assert error_message in response.text diff --git a/annotation/tests/test_tasks_crud_ud.py b/annotation/tests/test_tasks_crud_ud.py index 201e9a864..e94c40465 100644 --- a/annotation/tests/test_tasks_crud_ud.py +++ b/annotation/tests/test_tasks_crud_ud.py @@ -549,12 +549,14 @@ def test_delete_task( ( CRUD_UD_CONSTRAINTS_TASKS[6], {"is_validation": False}, - f"{CRUD_UD_CONSTRAINTS_TASKS[6].user_id} is not assigned " f"as annotator", + f"{CRUD_UD_CONSTRAINTS_TASKS[6].user_id} is not assigned " + f"as annotator", ), # same job validator but not annotator ( CRUD_UD_CONSTRAINTS_TASKS[6], {"user_id": CRUD_UD_CONSTRAINTS_USERS[0].user_id}, - f"{CRUD_UD_CONSTRAINTS_USERS[0].user_id} is not assigned " f"as validator", + f"{CRUD_UD_CONSTRAINTS_USERS[0].user_id} is not assigned " + f"as validator", ), # same job annotator but not validator ( CRUD_UD_CONSTRAINTS_TASKS[0], @@ -562,7 +564,8 @@ def test_delete_task( "job_id": CRUD_UD_CONSTRAINTS_JOBS[2].job_id, "is_validation": True, }, - f"{CRUD_UD_CONSTRAINTS_USERS[0].user_id} is not assigned " f"as validator", + f"{CRUD_UD_CONSTRAINTS_USERS[0].user_id} is not assigned " + f"as validator", ), # same user not from new job ], ) @@ -733,7 +736,9 @@ def test_update_task_empty_request(prepare_db_for_ud_task_constrains, task): @pytest.mark.integration @pytest.mark.parametrize("task", CRUD_UD_CONSTRAINTS_TASKS[0:2]) -def test_update_task_deadline_with_none_value(prepare_db_for_ud_task_constrains, task): +def test_update_task_deadline_with_none_value( + prepare_db_for_ud_task_constrains, task +): """Checks if task deadline can be updated with None value""" response = client.patch( construct_path(CRUD_TASKS_PATH, task.id), diff --git a/annotation/tests/test_update_job.py b/annotation/tests/test_update_job.py index 6dd1961b9..2761f3284 100644 --- a/annotation/tests/test_update_job.py +++ b/annotation/tests/test_update_job.py @@ -352,7 +352,9 @@ def test_update_categories(category_ids, prepare_db_for_update_job, job_id): .order_by(asc("category_id")) .all() ) - expected_categories = [(category_id, job_id) for category_id in category_ids] + expected_categories = [ + (category_id, job_id) for category_id in category_ids + ] assert jobs_categories == expected_categories all_categories_after = session.query(Category).all() assert all_categories_before == all_categories_after @@ -391,7 +393,9 @@ def test_update_wrong_categories(category_ids, prepare_db_for_update_job): ("files", UPDATE_JOB_IDS[7], [UPDATE_JOB_FILES_FROM_ASSETS[2]]), ], ) -def test_update_files(prepare_db_for_update_job, monkeypatch, field, job_id, new_files): +def test_update_files( + prepare_db_for_update_job, monkeypatch, field, job_id, new_files +): """Checks that files for job successfully update with 204 response both from 'files' and 'dataset' fields and that old job's files delete from 'files' table. Also checks that files with same id as deleted/added for @@ -411,7 +415,10 @@ def test_update_files(prepare_db_for_update_job, monkeypatch, field, job_id, new ) assert response.status_code == 204 job_files_db = ( - session.query(File).filter_by(job_id=job_id).order_by(asc(File.file_id)).all() + session.query(File) + .filter_by(job_id=job_id) + .order_by(asc(File.file_id)) + .all() ) job_files = [ {"file_id": job_file.file_id, "pages_number": job_file.pages_number} @@ -456,7 +463,9 @@ def test_update_job_new_user( assert existing_users_count == 4 association_table = ASSOCIATION_TABLES[user_type] old_association = ( - session.query(association_table).filter_by(job_id=UPDATE_JOB_IDS[1]).first() + session.query(association_table) + .filter_by(job_id=UPDATE_JOB_IDS[1]) + .first() ) assert str(old_association.user_id) == old_user_id response = client.patch( @@ -535,7 +544,8 @@ def test_update_job_new_user( [USER_IDS[1]], UPDATE_JOB_IDS[7], 400, - "There should be no annotators or validators provided " "for ImportJob", + "There should be no annotators or validators provided " + "for ImportJob", ), ], ) @@ -574,7 +584,9 @@ def test_update_files_and_datasets_for_already_started_job( """Tests that update of job which in progress status with files or datasets is restricted""" expected_code = 422 - error_message = "files and datasets can't be updated for already started job" + error_message = ( + "files and datasets can't be updated for already started job" + ) monkeypatch.setattr( "annotation.jobs.services.get_job_names", Mock(return_value={UPDATE_JOB_IDS[5]: "JobName"}), @@ -622,7 +634,8 @@ def test_update_extraction_job_new_user( session = prepare_db_for_update_job job_id = UPDATE_JOB_IDS[6] existing_users_count = sum( - session.query(table).filter_by(job_id=job_id).count() for table in tables + session.query(table).filter_by(job_id=job_id).count() + for table in tables ) assert existing_users_count == 1 monkeypatch.setattr( @@ -636,7 +649,8 @@ def test_update_extraction_job_new_user( ) assert response.status_code == expected_code new_users_count = sum( - session.query(table).filter_by(job_id=job_id).count() for table in tables + session.query(table).filter_by(job_id=job_id).count() + for table in tables ) assert new_users_count == expected_users_count @@ -658,7 +672,9 @@ def test_delete_redundant_users(prepare_db_for_update_job): ) prepare_db_for_update_job.commit() redundant_user = ( - prepare_db_for_update_job.query(User).filter(User.user_id == USER_IDS[3]).all() + prepare_db_for_update_job.query(User) + .filter(User.user_id == USER_IDS[3]) + .all() ) assert not redundant_user assert response.status_code == 204 @@ -677,7 +693,9 @@ def test_not_delete_redundant_user_as_owner_of_another_job( ) prepare_db_for_update_job.commit() redundant_user_owner = ( - prepare_db_for_update_job.query(User).filter(User.user_id == USER_IDS[2]).all() + prepare_db_for_update_job.query(User) + .filter(User.user_id == USER_IDS[2]) + .all() ) assert redundant_user_owner assert response.status_code == 204 diff --git a/annotation/tests/test_validation.py b/annotation/tests/test_validation.py index 615790455..4ae607abd 100644 --- a/annotation/tests/test_validation.py +++ b/annotation/tests/test_validation.py @@ -7,7 +7,13 @@ from sqlalchemy import or_ from annotation.annotations import row_to_dict -from annotation.models import AnnotatedDoc, File, Job, ManualAnnotationTask, User +from annotation.models import ( + AnnotatedDoc, + File, + Job, + ManualAnnotationTask, + User, +) from annotation.schemas import ( AnnotationAndValidationActionsSchema, FileStatusEnumSchema, @@ -1417,7 +1423,9 @@ def test_create_validation_tasks_exceptions(db_validation_end, user): TASKS[8].id, { "annotation_user_for_failed_pages": AnnotationAndValidationActionsSchema.auto.value, # noqa E501 - "validation_user_for_reannotated_pages": str(ANNOTATORS[3].user_id), + "validation_user_for_reannotated_pages": str( + ANNOTATORS[3].user_id + ), }, 400, "does not belong", @@ -1475,7 +1483,11 @@ def test_create_validation_tasks_exceptions(db_validation_end, user): ), ( TASKS[4].id, - {"validation_user_for_reannotated_pages": str(ANNOTATORS[1].user_id)}, + { + "validation_user_for_reannotated_pages": str( + ANNOTATORS[1].user_id + ) + }, 400, "Missing `annotation_user", ), @@ -1659,7 +1671,9 @@ def test_check_delete_user_from_annotated_doc(db_validation_end): ManualAnnotationTask.id.in_([100, 102, 103, 107, 111]) ).delete(synchronize_session=False) db_validation_end.commit() - db_validation_end.query(User).filter(User.user_id == ANNOTATORS[0].user_id).delete() + db_validation_end.query(User).filter( + User.user_id == ANNOTATORS[0].user_id + ).delete() db_validation_end.commit() deleted_user = db_validation_end.query( diff --git a/assets/alembic/env.py b/assets/alembic/env.py index 0bf7a9c97..46caae68b 100644 --- a/assets/alembic/env.py +++ b/assets/alembic/env.py @@ -14,7 +14,9 @@ if not os.getenv("USE_TEST_DB"): config.set_main_option("sqlalchemy.url", settings.database_url) else: - config.set_main_option("sqlalchemy.url", get_test_db_url(settings.database_url)) + config.set_main_option( + "sqlalchemy.url", get_test_db_url(settings.database_url) + ) # Interpret the config file for Python logging. # This line sets up loggers basically. @@ -73,7 +75,9 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, target_metadata=target_metadata + ) with context.begin_transaction(): context.run_migrations() diff --git a/assets/alembic/versions/afa33cc83d57_new_fields.py b/assets/alembic/versions/afa33cc83d57_new_fields.py index f15531afb..6d7aa33a6 100644 --- a/assets/alembic/versions/afa33cc83d57_new_fields.py +++ b/assets/alembic/versions/afa33cc83d57_new_fields.py @@ -55,7 +55,9 @@ def upgrade() -> None: sa.Column( "ts_vector", TSVector(), - sa.Computed("to_tsvector('english', original_name)", persisted=True), + sa.Computed( + "to_tsvector('english', original_name)", persisted=True + ), nullable=True, ), sa.PrimaryKeyConstraint("id"), @@ -68,7 +70,9 @@ def upgrade() -> None: sa.Column("dataset_id", sa.Integer(), nullable=False), sa.Column("file_id", sa.Integer(), nullable=False), sa.Column("created", sa.DateTime(), nullable=False), - sa.ForeignKeyConstraint(["dataset_id"], ["datasets.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint( + ["dataset_id"], ["datasets.id"], ondelete="CASCADE" + ), sa.ForeignKeyConstraint(["file_id"], ["files.id"], ondelete="CASCADE"), sa.PrimaryKeyConstraint("dataset_id", "file_id"), ) diff --git a/assets/assets/db/models.py b/assets/assets/db/models.py index d5b1ab06f..119cb17e3 100644 --- a/assets/assets/db/models.py +++ b/assets/assets/db/models.py @@ -49,7 +49,9 @@ class Association(Base): # type: ignore sa.ForeignKey("files.id", ondelete="CASCADE"), primary_key=True, ) - created = sa.Column(sa.DateTime, nullable=False, default=datetime.datetime.utcnow()) + created = sa.Column( + sa.DateTime, nullable=False, default=datetime.datetime.utcnow() + ) @property def as_dict(self) -> Dict[str, Any]: @@ -72,7 +74,9 @@ class Datasets(Base): # type: ignore ) name = sa.Column(sa.String(150), nullable=False, unique=True) count = sa.Column(sa.Integer, default=0) - created = sa.Column(sa.DateTime, nullable=False, default=datetime.datetime.utcnow()) + created = sa.Column( + sa.DateTime, nullable=False, default=datetime.datetime.utcnow() + ) ts_vector = sa.Column( TSVector(), sa.Computed( @@ -81,7 +85,9 @@ class Datasets(Base): # type: ignore ), ) - __table_args__ = (sa.Index("ix_ds_name", ts_vector, postgresql_using="gin"),) + __table_args__ = ( + sa.Index("ix_ds_name", ts_vector, postgresql_using="gin"), + ) @property def as_dict(self) -> Dict[str, Any]: @@ -122,7 +128,9 @@ class FileObject(Base): # type: ignore persisted=True, ), ) - datasets = relationship("Datasets", secondary="association", backref="files") + datasets = relationship( + "Datasets", secondary="association", backref="files" + ) __table_args__ = (sa.Index("ix_name", ts_vector, postgresql_using="gin"),) diff --git a/assets/assets/db/service.py b/assets/assets/db/service.py index 0c89aa7ee..4492c48d7 100644 --- a/assets/assets/db/service.py +++ b/assets/assets/db/service.py @@ -63,7 +63,9 @@ def update_file( file_status: str, ) -> Optional[FileObject]: file: Optional[FileObject] = ( - session.query(FileObject).filter(FileObject.id == file_id).with_for_update() + session.query(FileObject) + .filter(FileObject.id == file_id) + .with_for_update() ).first() file.original_name = file_to_update file.bucket = (bucket_name,) @@ -89,7 +91,11 @@ def insert_dataset(session: Session, dataset_name: str) -> None: def delete_file_from_db(session: Session, row_id: int) -> Any: - q = session.query(FileObject).filter(FileObject.id == row_id).with_for_update() + q = ( + session.query(FileObject) + .filter(FileObject.id == row_id) + .with_for_update() + ) decrease_count_in_bounded_datasets(session, row_id) res = q.delete() session.commit() @@ -106,7 +112,9 @@ def update_file_status( file_id: int, file_status: FileProcessingStatusForUpdate, session: Session ) -> Optional[FileObject]: file: Optional[FileObject] = ( - session.query(FileObject).filter(FileObject.id == file_id).with_for_update() + session.query(FileObject) + .filter(FileObject.id == file_id) + .with_for_update() ).first() file.status = file_status try: @@ -130,7 +138,9 @@ def get_all_files_query( session: Session, request: Dict[str, Any] ) -> Tuple[Query, PaginationParams]: filter_args = map_request_to_filter(request, "FileObject") - query = session.query(FileObject).options(selectinload(FileObject.datasets)) + query = session.query(FileObject).options( + selectinload(FileObject.datasets) + ) query, pag = form_query(filter_args, query) return query, pag @@ -170,7 +180,9 @@ def get_all_bonds_query( return query, pag -def is_bounded(session: Session, file_id: int, ds_name: str) -> Optional[FileObject]: +def is_bounded( + session: Session, file_id: int, ds_name: str +) -> Optional[FileObject]: bond = ( session.query(FileObject) .join(Association, Datasets) @@ -181,8 +193,12 @@ def is_bounded(session: Session, file_id: int, ds_name: str) -> Optional[FileObj return bond -def add_dataset_to_file(session: Session, file: FileObject, ds: Datasets) -> None: - ds_query = session.query(Datasets).filter(Datasets.id == ds.id).with_for_update() +def add_dataset_to_file( + session: Session, file: FileObject, ds: Datasets +) -> None: + ds_query = ( + session.query(Datasets).filter(Datasets.id == ds.id).with_for_update() + ) file_obj = ( session.query(FileObject) .filter(FileObject.id == file.id) @@ -195,8 +211,12 @@ def add_dataset_to_file(session: Session, file: FileObject, ds: Datasets) -> Non session.commit() -def remove_dataset_from_file(session: Session, file: FileObject, ds: Datasets) -> None: - ds_query = session.query(Datasets).filter(Datasets.id == ds.id).with_for_update() +def remove_dataset_from_file( + session: Session, file: FileObject, ds: Datasets +) -> None: + ds_query = ( + session.query(Datasets).filter(Datasets.id == ds.id).with_for_update() + ) file_obj = ( session.query(FileObject) .filter(FileObject.id == file.id) @@ -216,7 +236,9 @@ def decrease_count_in_bounded_datasets(session: Session, file_id: int) -> None: .filter(FileObject.id == file_id) ) ds_ids = [row.id for row in query] - session.query(Datasets).filter(Datasets.id.in_(ds_ids)).with_for_update().update( + session.query(Datasets).filter( + Datasets.id.in_(ds_ids) + ).with_for_update().update( {Datasets.count: Datasets.count - 1}, synchronize_session="fetch" ) session.commit() diff --git a/assets/assets/routers/bonds_router.py b/assets/assets/routers/bonds_router.py index a45075356..b8f3e6cad 100644 --- a/assets/assets/routers/bonds_router.py +++ b/assets/assets/routers/bonds_router.py @@ -19,7 +19,9 @@ async def search_bonds( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> filter_lib.Page[Dict[str, Any]]: """ Takes every bounded pair dataset-file and returns them. @@ -49,7 +51,9 @@ async def bound_files_to_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> List[schemas.ActionResponse]: """ Bounds file objects to a given dataset. If dataset does not exist HTTPException @@ -125,7 +129,9 @@ async def unbound_files_from_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> List[schemas.ActionResponse]: """ Unbound file objects with a given dataset. If dataset does not exist HTTPException diff --git a/assets/assets/routers/datasets_router.py b/assets/assets/routers/datasets_router.py index bd4113100..2094cdf45 100644 --- a/assets/assets/routers/datasets_router.py +++ b/assets/assets/routers/datasets_router.py @@ -21,7 +21,9 @@ async def search_datasets( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> filter_lib.Page[schemas.DatasetResponse]: """ Allows getting datasets data with filters, sorts and pagination. @@ -52,7 +54,9 @@ async def create_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> Dict[str, Any]: """ Creates a new dataset object in database. If dataset with given name is already exists @@ -95,7 +99,9 @@ async def delete_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> Dict[str, Any]: """ Deletes a dataset with a given name from a database. If that dataset does not exist @@ -131,7 +137,9 @@ async def get_files_by_dataset( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> filter_lib.Page[schemas.FileResponse]: """ Takes a dataset name and returns all files metadata with this dataset. @@ -177,7 +185,9 @@ def get_all_files_by_dataset_id( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> Optional[List[schemas.FileResponse]]: if not db.service.get_all_files_by_ds_id(session, dataset_id): raise fastapi.HTTPException( diff --git a/assets/assets/routers/files_router.py b/assets/assets/routers/files_router.py index f357976e9..1a8a70894 100644 --- a/assets/assets/routers/files_router.py +++ b/assets/assets/routers/files_router.py @@ -21,7 +21,9 @@ async def search_files( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> filter_lib.Page[schemas.FileResponse]: """ Allows getting files metadata with filters, sorts and pagination. @@ -91,7 +93,10 @@ async def upload_files( bucket_name, files, session, storage_ ) - return [schemas.ActionResponse.parse_obj(response) for response in upload_results] + return [ + schemas.ActionResponse.parse_obj(response) + for response in upload_results + ] @router.delete( @@ -106,7 +111,9 @@ async def delete_files( db.service.session_scope_for_dependency ), storage: minio.Minio = fastapi.Depends(utils.minio_utils.get_storage), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> List[schemas.ActionResponse]: """ Deletes objects from minio storage and then their metadata from database. @@ -185,7 +192,9 @@ async def update_file( session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> schemas.FileResponse: file_obj = db.service.get_file_by_id(session, request.file) if not file_obj: diff --git a/assets/assets/routers/minio_router.py b/assets/assets/routers/minio_router.py index d4132b2a3..38e36f086 100644 --- a/assets/assets/routers/minio_router.py +++ b/assets/assets/routers/minio_router.py @@ -10,11 +10,15 @@ router = fastapi.APIRouter(tags=["minio"]) -@router.get("/download", name="gets file from minio with original content-type") +@router.get( + "/download", name="gets file from minio with original content-type" +) async def get_from_minio( file_id: int, background_tasks: fastapi.BackgroundTasks, - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), original: bool = False, session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency @@ -44,14 +48,18 @@ async def get_from_minio( utils.minio_utils.check_bucket(f.bucket, storage) response = utils.minio_utils.stream_minio(f.path, f.bucket, storage) if original: - response = utils.minio_utils.stream_minio(f.origin_path, f.bucket, storage) + response = utils.minio_utils.stream_minio( + f.origin_path, f.bucket, storage + ) background_tasks.add_task(utils.minio_utils.close_conn, response) return fastapi.responses.StreamingResponse( response.stream(), media_type=response.headers["Content-Type"] ) -@router.get("/download/thumbnail", name="get thumbnail of original file in jpg format") +@router.get( + "/download/thumbnail", name="get thumbnail of original file in jpg format" +) async def get_preview_from_minio( file_id: int, background_tasks: fastapi.BackgroundTasks, @@ -59,7 +67,9 @@ async def get_preview_from_minio( db.service.session_scope_for_dependency ), storage: minio.Minio = fastapi.Depends(utils.minio_utils.get_storage), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> fastapi.responses.StreamingResponse: f = db.service.get_file_by_id(session, file_id) if not f: @@ -85,7 +95,9 @@ async def get_image_piece( ..., example=(100, 100, 200, 200) ), page_number: int = fastapi.Query(..., ge=1, example=1), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), storage: minio.Minio = fastapi.Depends(utils.minio_utils.get_storage), session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency @@ -104,7 +116,9 @@ async def get_image_piece( ) piece_path = f"files/bbox/{f.id}/page{page_number}_bbox{bbox}_ext{settings.bbox_ext}.jpg" # noqa if not utils.minio_utils.check_file_exist(piece_path, f.bucket, storage): - utils.minio_utils.make_pdf_piece(f, page_number, bbox, piece_path, storage) + utils.minio_utils.make_pdf_piece( + f, page_number, bbox, piece_path, storage + ) response = utils.minio_utils.stream_minio(piece_path, f.bucket, storage) background_tasks.add_task(utils.minio_utils.close_conn, response) @@ -121,7 +135,9 @@ async def get_image_piece( async def create_bucket( bucket: schemas.Bucket, storage: minio.Minio = fastapi.Depends(utils.minio_utils.get_storage), - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), ) -> Dict[str, str]: """ Creates bucket into Minio. If bucket exists HTTPException will be diff --git a/assets/assets/routers/s3_router.py b/assets/assets/routers/s3_router.py index ed346fde9..704c1433b 100644 --- a/assets/assets/routers/s3_router.py +++ b/assets/assets/routers/s3_router.py @@ -18,7 +18,9 @@ async def download_s3_files( s3_data: schemas.S3Data, storage_url: Optional[str] = None, - x_current_tenant: Optional[str] = fastapi.Header(None, alias="X-Current-Tenant"), + x_current_tenant: Optional[str] = fastapi.Header( + None, alias="X-Current-Tenant" + ), session: sqlalchemy.orm.Session = fastapi.Depends( db.service.session_scope_for_dependency ), @@ -68,5 +70,6 @@ async def download_s3_files( ) return [ - schemas.ActionResponse.parse_obj(response) for response in upload_results + schemas.ActionResponse.parse_obj(response) + for response in upload_results ] # noqa diff --git a/assets/assets/utils/common_utils.py b/assets/assets/utils/common_utils.py index 8a1192925..f69734ceb 100644 --- a/assets/assets/utils/common_utils.py +++ b/assets/assets/utils/common_utils.py @@ -193,7 +193,9 @@ def convert_to_pdf(self) -> bytes: # In case of some error, the content of Gotenberg response is plain text. # noqa self.conversion_status = "conversion error" logger_.error( - logger_.error("%s with %s", self.conversion_status, self.file_name) + logger_.error( + "%s with %s", self.conversion_status, self.file_name + ) ) raise exceptions.FileConversionError self.converted_ext = ".pdf" @@ -221,8 +223,12 @@ def convert_to_jpg(self) -> bytes: return byte_im def convert_txt(self): - input_text_path = f"files/{self.new_file.id}/" f"{self.new_file.id}.txt" - output_pdf_path = f"files/{self.new_file.id}/" f"{self.new_file.id}.pdf" + input_text_path = ( + f"files/{self.new_file.id}/" f"{self.new_file.id}.txt" + ) + output_pdf_path = ( + f"files/{self.new_file.id}/" f"{self.new_file.id}.pdf" + ) output_tokens_path = f"files/{self.new_file.id}/ocr/1.json" post_to_convert( self.bucket_storage, diff --git a/assets/assets/utils/convert_service_utils.py b/assets/assets/utils/convert_service_utils.py index f55180a82..757109fc8 100644 --- a/assets/assets/utils/convert_service_utils.py +++ b/assets/assets/utils/convert_service_utils.py @@ -5,7 +5,9 @@ logger_ = logger.get_logger(__name__) -def post_to_convert(bucket: str, input_text, output_pdf, output_tokens) -> bool: +def post_to_convert( + bucket: str, input_text, output_pdf, output_tokens +) -> bool: """ Puts file into convert service """ @@ -19,7 +21,9 @@ def post_to_convert(bucket: str, input_text, output_pdf, output_tokens) -> bool: }, ) if response.status_code != 201: - logger_.info(f"File {input_text} failed to convert: " f"{response.text}") + logger_.info( + f"File {input_text} failed to convert: " f"{response.text}" + ) return False except requests.exceptions.ConnectionError as e: logger_.error(f"Connection error - detail: {e}") diff --git a/assets/assets/utils/minio_utils.py b/assets/assets/utils/minio_utils.py index 4fd7b2ecf..856461911 100644 --- a/assets/assets/utils/minio_utils.py +++ b/assets/assets/utils/minio_utils.py @@ -39,14 +39,20 @@ def create_minio_config(): elif settings.s3_credentials_provider == "aws_config": # environmental variable AWS_PROFILE_NAME should be set minio_config.update( - {"credentials": AWSConfigProvider(profile=settings.aws_profile_name)} + { + "credentials": AWSConfigProvider( + profile=settings.aws_profile_name + ) + } ) else: raise NotConfiguredException( "s3 connection is not properly configured - " "s3_credentials_provider is not set" ) - logger_.debug(f"S3_Credentials provider - {settings.s3_credentials_provider}") + logger_.debug( + f"S3_Credentials provider - {settings.s3_credentials_provider}" + ) return minio_config @@ -68,25 +74,37 @@ def upload_in_minio( """ pdf_bytes = make_thumbnail_pdf(file) if pdf_bytes and isinstance(pdf_bytes, bytes): - upload_thumbnail(file_obj.bucket, pdf_bytes, client, file_obj.thumb_path) + upload_thumbnail( + file_obj.bucket, pdf_bytes, client, file_obj.thumb_path + ) image_bytes = make_thumbnail_images(file) if image_bytes and isinstance(image_bytes, bytes): - upload_thumbnail(file_obj.bucket, image_bytes, client, file_obj.thumb_path) - return put_file_to_minio(client, file, file_obj, file_obj.content_type, "converted") + upload_thumbnail( + file_obj.bucket, image_bytes, client, file_obj.thumb_path + ) + return put_file_to_minio( + client, file, file_obj, file_obj.content_type, "converted" + ) -def remake_thumbnail(file_obj: db.models.FileObject, storage: minio.Minio) -> bool: +def remake_thumbnail( + file_obj: db.models.FileObject, storage: minio.Minio +) -> bool: obj: urllib3.response.HTTPResponse = storage.get_object( file_obj.bucket, file_obj.path ) pdf_bytes = make_thumbnail_pdf(obj.data) if pdf_bytes and isinstance(pdf_bytes, bytes): - upload_thumbnail(file_obj.bucket, pdf_bytes, storage, file_obj.thumb_path) + upload_thumbnail( + file_obj.bucket, pdf_bytes, storage, file_obj.thumb_path + ) image_bytes = make_thumbnail_images(obj.data) if image_bytes and isinstance(image_bytes, bytes): - upload_thumbnail(file_obj.bucket, image_bytes, storage, file_obj.thumb_path) + upload_thumbnail( + file_obj.bucket, image_bytes, storage, file_obj.thumb_path + ) obj.close() if not pdf_bytes and not image_bytes: logger_.error("File is not an image") @@ -327,7 +345,9 @@ def get_size_ratio(width: int, height: int) -> float: try: r = width / height if r <= 0: - logger_.error("Current size raio <= 0! w = %s , h = %s", width, height) + logger_.error( + "Current size raio <= 0! w = %s , h = %s", width, height + ) r = 1.0 return r except ZeroDivisionError: diff --git a/assets/assets/utils/s3_utils.py b/assets/assets/utils/s3_utils.py index 9982dfdae..dbe1d7f05 100644 --- a/assets/assets/utils/s3_utils.py +++ b/assets/assets/utils/s3_utils.py @@ -36,7 +36,9 @@ def __init__( region_name=region_name, ) - def get_files(self, bucket_s3: str, files_keys: List[str]) -> Dict[str, BytesIO]: + def get_files( + self, bucket_s3: str, files_keys: List[str] + ) -> Dict[str, BytesIO]: """ Downloads files from S3 storage """ @@ -52,7 +54,9 @@ def _check_bucket_exist(self, bucket_s3: str) -> Any: """ Checks if required bucket exists in S3 """ - all_s3_buckets = [bucket.name for bucket in self.resource.buckets.all()] + all_s3_buckets = [ + bucket.name for bucket in self.resource.buckets.all() + ] if bucket_s3 not in all_s3_buckets: raise exceptions.BucketError(f"bucket {bucket_s3} does not exist!") @@ -62,11 +66,15 @@ def _check_files_exist(self, bucket_s3: str, files_keys: List[str]) -> Any: """ all_files_in_bucket = [ content["Key"] - for content in self.client.list_objects(Bucket=bucket_s3)["Contents"] + for content in self.client.list_objects(Bucket=bucket_s3)[ + "Contents" + ] ] for file_key in files_keys: if file_key not in all_files_in_bucket: - raise exceptions.FileKeyError(f"file key {file_key} does not exist!") + raise exceptions.FileKeyError( + f"file key {file_key} does not exist!" + ) def check_s3(self, bucket_s3: str, files_keys: List[str]) -> Any: """ diff --git a/assets/tests/conftest.py b/assets/tests/conftest.py index 726955cfb..8e82fd48d 100644 --- a/assets/tests/conftest.py +++ b/assets/tests/conftest.py @@ -53,7 +53,9 @@ def setup_database(use_temp_env_var): except SQLAlchemyError as e: raise SQLAlchemyError(f"Got an Exception during migrations - {e}") - session_local = sessionmaker(autocommit=False, autoflush=False, bind=engine) + session_local = sessionmaker( + autocommit=False, autoflush=False, bind=engine + ) session = session_local() yield session session.close() @@ -87,11 +89,21 @@ def setup_tenant(): @pytest.fixture -def client_app_main(setup_database, minio_mock_exists_bucket_true, setup_tenant): - minio_mock_exists_bucket_true.get_object.return_value = urllib3.HTTPResponse() - minio_mock_exists_bucket_true.get_object.return_value.headers["Content-Type"] = "" - app.dependency_overrides[get_storage] = lambda: minio_mock_exists_bucket_true - app.dependency_overrides[session_scope_for_dependency] = lambda: setup_database +def client_app_main( + setup_database, minio_mock_exists_bucket_true, setup_tenant +): + minio_mock_exists_bucket_true.get_object.return_value = ( + urllib3.HTTPResponse() + ) + minio_mock_exists_bucket_true.get_object.return_value.headers[ + "Content-Type" + ] = "" + app.dependency_overrides[ + get_storage + ] = lambda: minio_mock_exists_bucket_true + app.dependency_overrides[ + session_scope_for_dependency + ] = lambda: setup_database app.dependency_overrides[tenant] = lambda: setup_tenant with patch.object(minio_utils, "delete_one_from_minio", return_value=True): @@ -103,8 +115,12 @@ def client_app_main(setup_database, minio_mock_exists_bucket_true, setup_tenant) def client_app_main_bucket_false( setup_database, minio_mock_exists_bucket_false, setup_tenant ): - app.dependency_overrides[get_storage] = lambda: minio_mock_exists_bucket_false - app.dependency_overrides[session_scope_for_dependency] = lambda: setup_database + app.dependency_overrides[ + get_storage + ] = lambda: minio_mock_exists_bucket_false + app.dependency_overrides[ + session_scope_for_dependency + ] = lambda: setup_database app.dependency_overrides[tenant] = lambda: setup_tenant client = TestClient(app) yield client diff --git a/assets/tests/test_helpers.py b/assets/tests/test_helpers.py index 641cf55bc..e4a14473b 100644 --- a/assets/tests/test_helpers.py +++ b/assets/tests/test_helpers.py @@ -5,7 +5,11 @@ from fastapi import HTTPException from assets.db.models import FileObject -from assets.db.service import delete_file_from_db, insert_file, update_file_status +from assets.db.service import ( + delete_file_from_db, + insert_file, + update_file_status, +) from assets.schemas import FileProcessingStatus from assets.utils.minio_utils import check_bucket, delete_one_from_minio @@ -60,7 +64,9 @@ def test_delete_one_from_minio(minio_mock_exists_bucket_true): with patch("tests.test_helpers.delete_one_from_minio") as mock_: mock_.side_effect = [True, False] random_name = uuid.uuid4().hex - minio_mock_exists_bucket_true.fput_object(random_name, "testfile", Mock()) + minio_mock_exists_bucket_true.fput_object( + random_name, "testfile", Mock() + ) x = delete_one_from_minio( random_name, "testfile", minio_mock_exists_bucket_true ) @@ -91,7 +97,11 @@ def test_put_to_db(setup_database): def test_update_file_status(file_): session = file_ - f = session.query(FileObject).filter(FileObject.original_name == "testname").first() + f = ( + session.query(FileObject) + .filter(FileObject.original_name == "testname") + .first() + ) assert f fi = update_file_status(f.id, FileProcessingStatus.UPLOADED, file_) assert fi.status == "uploaded" diff --git a/assets/tests/test_main.py b/assets/tests/test_main.py index 955cd9bb0..d4847b410 100644 --- a/assets/tests/test_main.py +++ b/assets/tests/test_main.py @@ -12,7 +12,9 @@ def test_create_bucket(client_app_main_bucket_false): random_name = "tests" + uuid.uuid4().hex bucket = {"name": random_name} - tests_bucket = client_app_main_bucket_false.post("/bucket", data=json.dumps(bucket)) + tests_bucket = client_app_main_bucket_false.post( + "/bucket", data=json.dumps(bucket) + ) assert tests_bucket.status_code == 201 @@ -27,7 +29,9 @@ def test_bucket_name_on_create_bucket_with_prefix( random_name = "tests" + uuid.uuid4().hex bucket = {"name": random_name} - response = client_app_main_bucket_false.post("/bucket", data=json.dumps(bucket)) + response = client_app_main_bucket_false.post( + "/bucket", data=json.dumps(bucket) + ) assert response.status_code == 201 assert ( response.json()["detail"] @@ -46,9 +50,14 @@ def test_bucket_name_on_create_bucket_without_prefix( random_name = "tests" + uuid.uuid4().hex bucket = {"name": random_name} - response = client_app_main_bucket_false.post("/bucket", data=json.dumps(bucket)) + response = client_app_main_bucket_false.post( + "/bucket", data=json.dumps(bucket) + ) assert response.status_code == 201 - assert response.json()["detail"] == f"Bucket {random_name} successfully created!" + assert ( + response.json()["detail"] + == f"Bucket {random_name} successfully created!" + ) def test_upload_and_delete_file_without_conversion(client_app_main): @@ -136,8 +145,12 @@ def test_get_file_by_id(client_app_main): ) file_id = response.json()[0]["id"] - search_body = {"filters": [{"field": "id", "operator": "eq", "value": file_id}]} - res_get_one = client_app_main.post("/files/search", data=json.dumps(search_body)) + search_body = { + "filters": [{"field": "id", "operator": "eq", "value": file_id}] + } + res_get_one = client_app_main.post( + "/files/search", data=json.dumps(search_body) + ) assert res_get_one.status_code == 200 assert res_get_one.json()["data"][0]["id"] == file_id @@ -174,7 +187,9 @@ def test_put_and_delete_dataset(client_app_main): body = {"name": random_name} res = client_app_main.post("/datasets", data=json.dumps(body)) assert res.status_code == 201 - assert res.json() == {"detail": f"Dataset {random_name} successfully created!"} + assert res.json() == { + "detail": f"Dataset {random_name} successfully created!" + } res_delete = client_app_main.delete("/datasets", data=json.dumps(body)) assert res_delete.status_code == 201 @@ -207,15 +222,21 @@ def test_bound_and_unbound(client_app_main): count_body = { "filters": [{"field": "name", "operator": "eq", "value": dataset_name}] } - res_count = client_app_main.post("/datasets/search", data=json.dumps(count_body)) + res_count = client_app_main.post( + "/datasets/search", data=json.dumps(count_body) + ) assert res_count.json()["data"][0]["count"] == 1 - res_unbound = client_app_main.delete("/datasets/bonds", data=json.dumps(data)) + res_unbound = client_app_main.delete( + "/datasets/bonds", data=json.dumps(data) + ) assert res_unbound.status_code == 201 assert file_id == res_unbound.json()[0]["id"] assert res_unbound.json()[0]["status"] - res_delete_dataset = client_app_main.delete("/datasets", data=json.dumps(body)) + res_delete_dataset = client_app_main.delete( + "/datasets", data=json.dumps(body) + ) assert res_delete_dataset.status_code == 201 assert res_delete_dataset.json() == { "detail": f"Dataset {dataset_name} successfully deleted!" @@ -246,7 +267,9 @@ def test_get_files_by_dataset(client_app_main): assert res_put.status_code == 201 bound_data = {"name": dataset_name, "objects": [file_id]} - res_bound = client_app_main.post("/datasets/bonds", data=json.dumps(bound_data)) + res_bound = client_app_main.post( + "/datasets/bonds", data=json.dumps(bound_data) + ) assert res_bound.status_code == 201 assert file_id == res_bound.json()[0]["id"] assert res_bound.json()[0]["status"] @@ -257,14 +280,18 @@ def test_get_files_by_dataset(client_app_main): assert res_get_by_dataset.status_code == 200 assert res_get_by_dataset.json()["data"][0]["id"] == file_id - res_delete_dataset = client_app_main.delete("/datasets", data=json.dumps(body)) + res_delete_dataset = client_app_main.delete( + "/datasets", data=json.dumps(body) + ) assert res_delete_dataset.status_code == 201 assert res_delete_dataset.json() == { "detail": f"Dataset {dataset_name} successfully deleted!" } file_body = {"objects": [file_id]} - res_delete_file = client_app_main.delete("/files", data=json.dumps(file_body)) + res_delete_file = client_app_main.delete( + "/files", data=json.dumps(file_body) + ) assert res_delete_file.status_code == 201 assert file_id == res_delete_file.json()[0]["id"] assert res_delete_file.json()[0]["status"] @@ -311,12 +338,16 @@ def test_get_dataset_by_name(client_app_main): body = {"name": random_name} res = client_app_main.post("/datasets", data=json.dumps(body)) assert res.status_code == 201 - assert res.json() == {"detail": f"Dataset {random_name} successfully created!"} + assert res.json() == { + "detail": f"Dataset {random_name} successfully created!" + } search_body = { "filters": [{"field": "name", "operator": "eq", "value": random_name}] } - res_id = client_app_main.post("/datasets/search", data=json.dumps(search_body)) + res_id = client_app_main.post( + "/datasets/search", data=json.dumps(search_body) + ) assert res_id.status_code == 200 assert res_id.json()["data"][0]["id"] == 1 assert res_id.json()["data"][0]["name"] == random_name @@ -359,9 +390,13 @@ def test_get_files_by_filename_positive(client_app_main): file_name = res_upload_1.json()[0]["file_name"] search_body = { - "filters": [{"field": "original_name", "operator": "eq", "value": file_name}] + "filters": [ + {"field": "original_name", "operator": "eq", "value": file_name} + ] } - get_by_name = client_app_main.post("/files/search", data=json.dumps(search_body)) + get_by_name = client_app_main.post( + "/files/search", data=json.dumps(search_body) + ) assert get_by_name.status_code == 200 all_names = [el["original_name"] for el in get_by_name.json()["data"]] @@ -383,8 +418,12 @@ def test_get_files_by_filename_empty_array(client_app_main): assert res_upload.status_code == 201 assert res_upload.json()[0]["status"] - search_body = {"filters": [{"field": "id", "operator": "eq", "value": id_ + 10111}]} - get_by_name = client_app_main.post("/files/search", data=json.dumps(search_body)) + search_body = { + "filters": [{"field": "id", "operator": "eq", "value": id_ + 10111}] + } + get_by_name = client_app_main.post( + "/files/search", data=json.dumps(search_body) + ) assert get_by_name.status_code == 200 assert get_by_name.json()["data"] == [] @@ -400,7 +439,9 @@ def test_download_negative(client_app_main): def test_download_positive(client_app_main): - with patch("assets.routers.minio_router.fastapi.responses.StreamingResponse"): + with patch( + "assets.routers.minio_router.fastapi.responses.StreamingResponse" + ): with NamedTemporaryFile(suffix=".jpg") as file: data = {"files": file} res_upload = client_app_main.post( @@ -417,11 +458,15 @@ def test_download_positive(client_app_main): @patch("assets.utils.common_utils.requests.post") -def test_download_positive_originals(gotenberg, pdf_file_bytes, client_app_main): +def test_download_positive_originals( + gotenberg, pdf_file_bytes, client_app_main +): response = Response() response._content = pdf_file_bytes gotenberg.return_value = response - with patch("assets.routers.minio_router.fastapi.responses.StreamingResponse"): + with patch( + "assets.routers.minio_router.fastapi.responses.StreamingResponse" + ): with NamedTemporaryFile(suffix=".doc", prefix="some_file") as file: data = {"files": file} res_upload = client_app_main.post( @@ -433,7 +478,9 @@ def test_download_positive_originals(gotenberg, pdf_file_bytes, client_app_main) assert res_upload.status_code == 201 assert res_upload.json()[0]["status"] - res_download = client_app_main.get(f"/download?file_id={id_}&original=true") + res_download = client_app_main.get( + f"/download?file_id={id_}&original=true" + ) assert res_download.status_code == 200 @@ -461,7 +508,9 @@ def test_count_changing(client_app_main): count_body = { "filters": [{"field": "name", "operator": "eq", "value": dataset_name}] } - res_count = client_app_main.post("/datasets/search", data=json.dumps(count_body)) + res_count = client_app_main.post( + "/datasets/search", data=json.dumps(count_body) + ) assert res_count.json()["data"][0]["count"] == 1 id_ = res_upload.json()[0]["id"] diff --git a/assets/tests/test_utils.py b/assets/tests/test_utils.py index d33478a82..19cf374c0 100644 --- a/assets/tests/test_utils.py +++ b/assets/tests/test_utils.py @@ -186,7 +186,9 @@ def test_file_processor_is_file_updated_status_not_updated(update_file_status): @patch("assets.utils.common_utils.FileProcessor.is_file_updated") @patch("assets.utils.common_utils.FileProcessor.is_blank_is_created") -@patch("assets.utils.common_utils.FileProcessor.is_original_file_uploaded_to_storage") +@patch( + "assets.utils.common_utils.FileProcessor.is_original_file_uploaded_to_storage" +) @patch("assets.utils.common_utils.FileProcessor.is_uploaded_to_storage") @patch("assets.utils.common_utils.FileProcessor.is_inserted_to_database") @patch("assets.utils.common_utils.FileProcessor.is_converted_file") @@ -428,7 +430,9 @@ def test_s3_manager_get_files(): @patch("assets.utils.s3_utils.S3Manager._check_bucket_exist") @patch("assets.utils.s3_utils.S3Manager._check_files_exist") -def test_s3_manager_check_s3_buckets_and_files_exist(check_buckets, check_files): +def test_s3_manager_check_s3_buckets_and_files_exist( + check_buckets, check_files +): s3 = S3Manager("a", "b", endpoint_url=None) check_buckets.return_value = None check_files.return_value = None @@ -474,7 +478,9 @@ def test_check_uploading_limit_not_exceed(): @patch("assets.utils.common_utils.get_mimetype") @patch("assets.utils.common_utils.requests.post") -def test_file_processor_conversion_error(gotenberg, get_mimetype, pdf_file_bytes): +def test_file_processor_conversion_error( + gotenberg, get_mimetype, pdf_file_bytes +): response = Response() response._content = pdf_file_bytes gotenberg.return_value = response @@ -524,7 +530,9 @@ def test_file_converted_converted_to_pdf_side_effect( def test_file_converted_converted_to_jpg(png_bytes): new_db_file = FileObject() - converter = FileConverter(png_bytes, "some_file.png", ".png", "test", new_db_file) + converter = FileConverter( + png_bytes, "some_file.png", ".png", "test", new_db_file + ) assert converter.convert() is True @@ -621,6 +629,8 @@ def test_get_pixel_bbox_size( current_pixel_size, original_pts_size, bbox, expected_result ): assert ( - minio_utils.get_pixel_bbox_size(current_pixel_size, original_pts_size, bbox) + minio_utils.get_pixel_bbox_size( + current_pixel_size, original_pts_size, bbox + ) == expected_result ) diff --git a/common/minio_service/minio_service/minio_api.py b/common/minio_service/minio_service/minio_api.py index fb75b3997..82fbe08a6 100644 --- a/common/minio_service/minio_service/minio_api.py +++ b/common/minio_service/minio_service/minio_api.py @@ -7,7 +7,9 @@ from . import logger # type: ignore -LOGGER = logger.get_logger(__name__, "MINIO_COMMUNICATION_PATH", "minio_communication") +LOGGER = logger.get_logger( + __name__, "MINIO_COMMUNICATION_PATH", "minio_communication" +) class BucketExistsError(Exception): @@ -74,10 +76,14 @@ def __init__( self, minio_server: str, minio_root_user: str, minio_root_password: str ) -> None: if not MinioCommunicator.client: - self.create_client(minio_server, minio_root_user, minio_root_password) + self.create_client( + minio_server, minio_root_user, minio_root_password + ) @classmethod - def create_client(cls, minio_server, minio_root_user, minio_root_password) -> None: + def create_client( + cls, minio_server, minio_root_user, minio_root_password + ) -> None: """ Create connection with minio service. Returns: @@ -130,7 +136,9 @@ def download_file(self, bucket: str, path: str, local_path: Path) -> None: ) self.client.fget_object(bucket, path, str(local_path)) - def download_directory(self, bucket: str, path: str, local_dir: Path) -> None: + def download_directory( + self, bucket: str, path: str, local_dir: Path + ) -> None: """ Download directory from minio to indicated local directory. Args: @@ -164,10 +172,16 @@ def upload_file(self, bucket: str, path: str, local_path: Path) -> None: if not local_path.exists() or not local_path.suffix: LOGGER.error("file %s doesn't exist", local_path) raise FileExistsError(f"file {local_path} doesn't exist") - LOGGER.info("Uploading from file %s to %s/%s", local_path, bucket, path) - self.client.fput_object(bucket, os.path.join(path, local_path), local_path) + LOGGER.info( + "Uploading from file %s to %s/%s", local_path, bucket, path + ) + self.client.fput_object( + bucket, os.path.join(path, local_path), local_path + ) - def upload_directory(self, bucket: str, path: str, local_dir: Path) -> None: + def upload_directory( + self, bucket: str, path: str, local_dir: Path + ) -> None: """ Upload directory to minio. Args: diff --git a/common/model_api/model_api/common/models.py b/common/model_api/model_api/common/models.py index 8cb24b170..a86658e84 100644 --- a/common/model_api/model_api/common/models.py +++ b/common/model_api/model_api/common/models.py @@ -47,7 +47,9 @@ class Args(BaseModel): class ClassifierRequest(BaseModel): """Request to classify DOD's bboxes.""" - input_path: Path = Field(example=Path("ternary_out/molecule_annotation.json")) + input_path: Path = Field( + example=Path("ternary_out/molecule_annotation.json") + ) input_field: Dict[str, Dict[str, List[str]]] = Field( alias="input", example={ diff --git a/common/model_api/model_api/preprocessing.py b/common/model_api/model_api/preprocessing.py index 4f30d4d82..2cbe03701 100644 --- a/common/model_api/model_api/preprocessing.py +++ b/common/model_api/model_api/preprocessing.py @@ -32,7 +32,9 @@ def crop_page_images( figure_image: Image = pdf_page.to_image( resolution=calculate_dpi(figure_bbox) ).original.crop(figure_bbox) - image_path = output_path / f"{obj.idx}.{settings.training_image_format}" + image_path = ( + output_path / f"{obj.idx}.{settings.training_image_format}" + ) figure_image.save(str(image_path)) yield image_path @@ -45,8 +47,12 @@ def convert_figure_bbox_in_points( page_width_inch = page_pdf_bbox[3] - page_pdf_bbox[1] page_height_inch = page_pdf_bbox[2] - page_pdf_bbox[0] try: - figure_to_page_w_points = page_width_inch / Decimal(page_dod_size.width) - figure_to_page_h_points = page_height_inch / Decimal(page_dod_size.height) + figure_to_page_w_points = page_width_inch / Decimal( + page_dod_size.width + ) + figure_to_page_h_points = page_height_inch / Decimal( + page_dod_size.height + ) except ZeroDivisionError as err: logger.error("Page size from DOD is wrong! %s", page_dod_size) raise err diff --git a/common/model_api/model_api/storage_exchange.py b/common/model_api/model_api/storage_exchange.py index 034afd6dc..edfb33961 100644 --- a/common/model_api/model_api/storage_exchange.py +++ b/common/model_api/model_api/storage_exchange.py @@ -15,7 +15,9 @@ def get_document( """Get a document from s3-storage.""" logger.info("Get a document from minio") document_path = work_dir / request.file.name - loader.client.fget_object(request.bucket, str(request.file), str(document_path)) + loader.client.fget_object( + request.bucket, str(request.file), str(document_path) + ) return document_path @@ -43,7 +45,9 @@ def put_annotation( """Put an annotation to s3-storage.""" logger.info("Put an annotation to minio") updated_annotation_path = Path(work_dir) / f"out_{request.input_path.name}" - output_annotation = m.AnnotationFromS3(pages=annotation).json(by_alias=True) + output_annotation = m.AnnotationFromS3(pages=annotation).json( + by_alias=True + ) updated_annotation_path.write_text(output_annotation) loader.client.fput_object( request.output_bucket, diff --git a/common/model_api/model_api/utils.py b/common/model_api/model_api/utils.py index 33b824fdd..1daeaa1fd 100644 --- a/common/model_api/model_api/utils.py +++ b/common/model_api/model_api/utils.py @@ -84,7 +84,9 @@ def update_annotation_categories( required_obj_ids: Optional[Tuple[str, ...]] = None, ) -> None: if page.page_num > len(pdf.pages): - logger.error("page %s in annotations doesn't exit in pdf", page.page_num) + logger.error( + "page %s in annotations doesn't exit in pdf", page.page_num + ) return bboxes_inference_result = { (page.page_num, Path(image).stem): inference_result @@ -112,7 +114,9 @@ def update_annotation_categories( obj.data = {} inference_key = (page.page_num, obj.idx) - if (data_field := "data") in bboxes_inference_result[inference_key].keys(): + if (data_field := "data") in bboxes_inference_result[ + inference_key + ].keys(): obj.data = { **obj.data, **bboxes_inference_result[inference_key][data_field], @@ -120,7 +124,9 @@ def update_annotation_categories( if (category_field := "category") in bboxes_inference_result[ inference_key ].keys(): - obj.category = bboxes_inference_result[inference_key][category_field] + obj.category = bboxes_inference_result[inference_key][ + category_field + ] logger.info( "An annotation of a page %s with %s updated", diff --git a/common/model_api/tests/test_api.py b/common/model_api/tests/test_api.py index bb04baf83..212a7c529 100644 --- a/common/model_api/tests/test_api.py +++ b/common/model_api/tests/test_api.py @@ -14,7 +14,9 @@ # from model_api.inference import inference -@pytest.mark.skip(reason="this is a test from a different, but similar service") +@pytest.mark.skip( + reason="this is a test from a different, but similar service" +) def test_inference(monkeypatch): model_mock = MagicMock() monkeypatch.setattr( @@ -60,7 +62,9 @@ def test_update_annotation_categories_updating(monkeypatch): ) pdf = MagicMock() setattr(pdf, "pages", [1]) - update_annotation_categories(inference, None, page_dod, pdf, ["1", "3"], ...) + update_annotation_categories( + inference, None, page_dod, pdf, ["1", "3"], ... + ) assert page_dod == m.PageDOD( page_num=1, @@ -102,7 +106,9 @@ def test_update_annotation_categories_without_updating(monkeypatch): inference = MagicMock(return_value=[]) pdf = MagicMock() setattr(pdf, "pages", [1]) - update_annotation_categories(inference, None, page_dod, pdf, ["1", "3"], ...) + update_annotation_categories( + inference, None, page_dod, pdf, ["1", "3"], ... + ) assert page_dod == m.PageDOD( page_num=1, diff --git a/common/model_api/tests/test_preprocessing.py b/common/model_api/tests/test_preprocessing.py index f3d1174d7..86514aa6d 100644 --- a/common/model_api/tests/test_preprocessing.py +++ b/common/model_api/tests/test_preprocessing.py @@ -40,9 +40,15 @@ def test_calculate_dpi(): def test_preprocessing(tmpdir, monkeypatch): - obj1 = GeometryObject(category="1", bbox=(300, 300, 800, 800), id="object_id") - obj2 = GeometryObject(category="100500", bbox=(0, 0, 0, 0), id="does not matter") - page_dod = PageDOD(page_num=1, size=Size(width=595, height=841), objs=[obj1, obj2]) + obj1 = GeometryObject( + category="1", bbox=(300, 300, 800, 800), id="object_id" + ) + obj2 = GeometryObject( + category="100500", bbox=(0, 0, 0, 0), id="does not matter" + ) + page_dod = PageDOD( + page_num=1, size=Size(width=595, height=841), objs=[obj1, obj2] + ) page_mock = MagicMock() image_mock = MagicMock() diff --git a/common/model_api/tests/test_smoke.py b/common/model_api/tests/test_smoke.py index ed5386d3f..a2cffa4d7 100644 --- a/common/model_api/tests/test_smoke.py +++ b/common/model_api/tests/test_smoke.py @@ -103,11 +103,15 @@ def inference_return(model, images): for image in images: if image == Path("aab83828-cd8b-41f7-a3c3-943f13e67c2c.png"): print("inference yield 1") - yield "aab83828-cd8b-41f7-a3c3-943f13e67c2c", {"chemical_formula": "31"} + yield "aab83828-cd8b-41f7-a3c3-943f13e67c2c", { + "chemical_formula": "31" + } if image == Path("732f2735-3369-4305-9d29-fa3be99d72dd.png"): print("inference yield 2") - yield "732f2735-3369-4305-9d29-fa3be99d72dd", {"chemical_formula": "31"} + yield "732f2735-3369-4305-9d29-fa3be99d72dd", { + "chemical_formula": "31" + } def crop_page_return(pdf_page, dod_page: m.PageDOD, categories, output_path): diff --git a/common/page_rendering/page_rendering/page_rendering.py b/common/page_rendering/page_rendering/page_rendering.py index e3f055a8e..25596aa46 100644 --- a/common/page_rendering/page_rendering/page_rendering.py +++ b/common/page_rendering/page_rendering/page_rendering.py @@ -44,7 +44,9 @@ def render( page = pdf.pages[page_number - 1] img = page.to_image(resolution=self.dpi) file_name = full_file_name.name.split(".")[0] - filename = dir_with_images / self.name_image(file_name, page_number) + filename = dir_with_images / self.name_image( + file_name, page_number + ) logger.info("Render page %s", page_number) img.save(filename, format=self.image_format) diff --git a/convert/convert/coco_export/convert.py b/convert/convert/coco_export/convert.py index d0cf1c384..2420e7f05 100644 --- a/convert/convert/coco_export/convert.py +++ b/convert/convert/coco_export/convert.py @@ -52,7 +52,9 @@ def download_image( image_folder = f"{Path(file_path).parent.parent}/" if not os.path.exists(image_folder): os.makedirs(image_folder, exist_ok=True) - image_local_path = f"{image_folder}/{self.job_id}_{Path(file_path).name}" + image_local_path = ( + f"{image_folder}/{self.job_id}_{Path(file_path).name}" + ) minio_resource.meta.client.download_file( self.bucket_name, file_path, image_local_path ) @@ -107,7 +109,9 @@ def download_annotation( f"{work_dir}/{page_name}.json", f"{local_path}/{page_name}.json", ) - add_to_zip_and_local_remove(f"{local_path}/{page_name}.json", zip_file) + add_to_zip_and_local_remove( + f"{local_path}/{page_name}.json", zip_file + ) def get_annotation_body( self, @@ -144,7 +148,9 @@ def fetch( """ work_dir = Path(manifest).parent manifest_content = json.loads( - minio_client.get_object(Bucket=self.bucket_name, Key=manifest)["Body"] + minio_client.get_object(Bucket=self.bucket_name, Key=manifest)[ + "Body" + ] .read() .decode("utf-8") ) @@ -276,7 +282,8 @@ def convert(self) -> ZipFile: annotation_num = 1 categories = loader.get_categories(self.token) category_names = { - category.lower(): number for number, category in enumerate(categories) + category.lower(): number + for number, category in enumerate(categories) } for page in file_id: files = minio_client.list_objects( @@ -309,7 +316,9 @@ def convert(self) -> ZipFile: key=lambda x: x["id"], # type: ignore ) export_save_to_json("coco", coco_annotation.dict()) - LOGGER.info("Converting of the job %s to coco has been finished", self.job_id) + LOGGER.info( + "Converting of the job %s to coco has been finished", self.job_id + ) self.zip_file.close() return self.zip_file @@ -404,7 +413,9 @@ def convert(self) -> ZipFile: minio_client.download_file( self.bucket_name, manifest_path, annotation_local_path ) - LOGGER.info("manifest.json was downloaded for the job %s", self.job_id) + LOGGER.info( + "manifest.json was downloaded for the job %s", self.job_id + ) add_to_zip_and_local_remove(annotation_local_path, self.zip_file) loader.fetch( manifest_path, diff --git a/convert/convert/coco_import/convert.py b/convert/convert/coco_import/convert.py index f46c35fed..706eaf541 100644 --- a/convert/convert/coco_import/convert.py +++ b/convert/convert/coco_import/convert.py @@ -101,11 +101,17 @@ def check_category(self) -> Set[str]: "metadata": {"color": category["color"]}, "is_link": False, } - get_category_details_url = f"{categories_services_url}{category['id']}" - response = SESSION.get(url=get_category_details_url, headers=headers) + get_category_details_url = ( + f"{categories_services_url}{category['id']}" + ) + response = SESSION.get( + url=get_category_details_url, headers=headers + ) category_id = response.json().get("id", None) if not category_id: - SESSION.post(url=categories_services_url, json=body, headers=headers) + SESSION.post( + url=categories_services_url, json=body, headers=headers + ) LOGGER.info("Created category %s", category["name"]) LOGGER.info(response.json()) LOGGER.info("Checking categories has been finished") @@ -128,7 +134,9 @@ def convert(self) -> None: annotation["page_num"] = 1 pages["pages"].append(annotation) import_save_to_json( - os.path.join(Path(self.s3_data.bucket_s3).stem, str(image_id)), + os.path.join( + Path(self.s3_data.bucket_s3).stem, str(image_id) + ), str(obj["id"]), annotation, file_id=image_id, diff --git a/convert/convert/coco_import/import_service.py b/convert/convert/coco_import/import_service.py index dab9783e8..4292fbda7 100644 --- a/convert/convert/coco_import/import_service.py +++ b/convert/convert/coco_import/import_service.py @@ -23,7 +23,9 @@ def download_coco_from_aws(s3_data: coco.DataS3) -> S3Manager: try: check_uploading_limit(s3_data.files_keys) except UploadLimitExceedError as e: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail=str(e) + ) s3 = S3Manager(s3_data.aws_access_key_id, s3_data.aws_secret_access_key) s3_download_files(s3, s3_data.bucket_s3, s3_data.files_keys) return s3 @@ -66,7 +68,9 @@ def import_run( json=body, headers={"X-Current-Tenant": current_tenant, "Authorization": token}, ) - converter.upload_annotations(job_id, s3_data.bucket_s3, annotation_by_image) + converter.upload_annotations( + job_id, s3_data.bucket_s3, annotation_by_image + ) return { "msg": f"Dataset was converted to {import_format} " f"format and upload to bucket {current_tenant}" diff --git a/convert/convert/config.py b/convert/convert/config.py index 6f9b19f55..1a3a070e7 100644 --- a/convert/convert/config.py +++ b/convert/convert/config.py @@ -114,7 +114,9 @@ def create_boto3_config(): "s3 connection is not properly configured - " "s3_credentials_provider is not set" ) - logger_.info(f"S3_Credentials provider - {settings.s3_credentials_provider}") + logger_.info( + f"S3_Credentials provider - {settings.s3_credentials_provider}" + ) return boto3_config diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py index ea74a7c77..dd0d1148c 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py @@ -56,7 +56,9 @@ def convert( badgerdoc_tokens, ) for labelstudio_item in annotation.result: - self.process_relations(badgerdoc_annotations, labelstudio_item) + self.process_relations( + badgerdoc_annotations, labelstudio_item + ) badgerdoc_annotations_practic = AnnotationConverterPractic( badgerdoc_annotations, badgerdoc_tokens ).convert() @@ -144,9 +146,14 @@ def get_token_indexes_and_form_bbox( offset_end: int, badgerdoc_tokens: BadgerdocTokensPage, ) -> Tuple[List[int], List[float]]: - badgerdoc_annotation_token_indexes = list(range(offset_begin, offset_end)) + badgerdoc_annotation_token_indexes = list( + range(offset_begin, offset_end) + ) bbox = self.form_common_bbox( - [badgerdoc_tokens.objs[t].bbox for t in badgerdoc_annotation_token_indexes] + [ + badgerdoc_tokens.objs[t].bbox + for t in badgerdoc_annotation_token_indexes + ] ) return badgerdoc_annotation_token_indexes, bbox diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py index e2e4911be..0178a8dd3 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py @@ -47,7 +47,9 @@ def convert_objs( text=text, data=bd_annotation_model_practic.AnnotationTokens( tokens=tokens, - dataAttributes=obj_theoretic.data.get("dataAttributes", []), + dataAttributes=obj_theoretic.data.get( + "dataAttributes", [] + ), ), links=links, ) @@ -76,7 +78,9 @@ def convert_links( ) -> List[bd_annotation_model_practic.AnnotationLink]: links = [] for link_theoretic in theoretic_links: - link = bd_annotation_model_practic.AnnotationLink(**link_theoretic.dict()) + link = bd_annotation_model_practic.AnnotationLink( + **link_theoretic.dict() + ) links.append(link) return links diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py index a575d1bd4..17f72c9d4 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/pdf_renderer.py @@ -24,10 +24,18 @@ def __init__( self.font_name = font_name self.font_size = font_size - def render_tokens(self, tokens: List[BadgerdocToken], save_path: Path) -> None: + def render_tokens( + self, tokens: List[BadgerdocToken], save_path: Path + ) -> None: with fitz.open() as doc: - width = max(token.bbox[2] for token in tokens) + self.page_border_offset - height = max(token.bbox[3] for token in tokens) + self.page_border_offset + width = ( + max(token.bbox[2] for token in tokens) + + self.page_border_offset + ) + height = ( + max(token.bbox[3] for token in tokens) + + self.page_border_offset + ) page = doc.new_page(height=height, width=width) for token in tokens: diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py index 345613b91..14c72b9c7 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py @@ -13,7 +13,9 @@ def generate_chunks(obj_to_split: List[str], size: int) -> List[List[str]]: - return [obj_to_split[i : i + size] for i in range(0, len(obj_to_split), size)] + return [ + obj_to_split[i : i + size] for i in range(0, len(obj_to_split), size) + ] class TextWrapper: diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py index 3ec866065..c956f2162 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py @@ -79,7 +79,9 @@ def download_badgerdoc_from_s3( input_annotations = self.download_file_from_s3( s3_input_annotations, tmp_dir ) - input_manifest = self.download_file_from_s3(s3_input_manifest, tmp_dir) + input_manifest = self.download_file_from_s3( + s3_input_manifest, tmp_dir + ) LOGGER.debug("input_manifest: %s", input_manifest.read_text()) page = Page.parse_file(input_tokens) @@ -89,7 +91,9 @@ def download_badgerdoc_from_s3( ) ).convert() manifest = Manifest.parse_file(input_manifest) - return BadgerdocData(page=page, annotation=annotation, manifest=manifest) + return BadgerdocData( + page=page, annotation=annotation, manifest=manifest + ) def download_file_from_s3(self, s3_path: S3Path, tmp_dir: Path) -> Path: local_file_path = tmp_dir / Path(s3_path.path).name @@ -107,7 +111,9 @@ def upload_labelstudio_to_s3( with tempfile.TemporaryDirectory() as tmp_dirname: tmp_dir = Path(tmp_dirname) - badgerdoc_annotations_path = tmp_dir / Path("labelstudio_format.json") + badgerdoc_annotations_path = tmp_dir / Path( + "labelstudio_format.json" + ) self.labelstudio_format.export_json(badgerdoc_annotations_path) self.s3_client.upload_file( str(badgerdoc_annotations_path), diff --git a/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py b/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py index 4ee40a507..11e90cb5b 100644 --- a/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py +++ b/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py @@ -100,7 +100,9 @@ def parse_document_links_from_labelstudio_format( self, label_studio_format: LabelStudioModel ) -> List[DocumentLink]: return [ - DocumentLink(to=relation.to, category=relation.category, type=relation.type) + DocumentLink( + to=relation.to, category=relation.category, type=relation.type + ) for relation in label_studio_format.__root__[0].meta.relations ] @@ -126,17 +128,19 @@ def execute(self) -> None: self.badgerdoc_format.convert_from_labelstudio(label_studio_format) LOGGER.debug("Tokens and annotations are converted") file_id_in_assets = self.upload_output_pdf_to_s3() - annotation_job_id_created = self.import_annotations_to_annotation_microservice( - file_id_in_assets=file_id_in_assets, - owner=self.token_data.user_id, - validation_type=self.validation_type, - deadline=self.deadline, - extensive_coverage=self.extensive_coverage, - annotators=self.annotators, - validators=self.validators, - document_labels=document_labels, - categories_to_taxonomy_mapping=categories_to_taxonomy_mapping, - document_links=document_links, + annotation_job_id_created = ( + self.import_annotations_to_annotation_microservice( + file_id_in_assets=file_id_in_assets, + owner=self.token_data.user_id, + validation_type=self.validation_type, + deadline=self.deadline, + extensive_coverage=self.extensive_coverage, + annotators=self.annotators, + validators=self.validators, + document_labels=document_labels, + categories_to_taxonomy_mapping=categories_to_taxonomy_mapping, + document_links=document_links, + ) ) self.upload_badgerdoc_annotations_and_tokens_to_s3( annotation_job_id_created, file_id_in_assets @@ -148,7 +152,9 @@ def download_label_studio_from_s3( s3_input_annotation: S3Path, ) -> LabelStudioModel: with tempfile.TemporaryDirectory() as tmp_dirname: - input_file = Path(tmp_dirname) / Path(s3_input_annotation.path).name + input_file = ( + Path(tmp_dirname) / Path(s3_input_annotation.path).name + ) try: self.s3_client.download_file( @@ -170,7 +176,9 @@ def download_label_studio_from_s3( return LabelStudioModel.parse_file(input_file) def get_output_tokens_path(self, file_id_in_assets: int) -> str: - return f"files/{file_id_in_assets}/ocr/{self.converted_tokens_filename}" + return ( + f"files/{file_id_in_assets}/ocr/{self.converted_tokens_filename}" + ) def get_output_pdf_path(self, file_id_in_assets: int) -> str: return f"files/{file_id_in_assets}/{file_id_in_assets}.pdf" @@ -196,7 +204,9 @@ def make_upload_file_request_to_assets(self, pdf_path: Path) -> int: ) request_to_post_assets.raise_for_status() except requests.exceptions.RequestException as e: - LOGGER.exception("Failed request to 'assets' to post converted pdf-file") + LOGGER.exception( + "Failed request to 'assets' to post converted pdf-file" + ) raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail="Failed request to 'assets' to post converted pdf-file", @@ -208,7 +218,9 @@ def upload_output_pdf_to_s3(self) -> int: pdf_path = tmp_dirname / Path(self.output_pdf_filename) self.badgerdoc_format.export_pdf(pdf_path) - file_id_in_assets = self.make_upload_file_request_to_assets(pdf_path) + file_id_in_assets = self.make_upload_file_request_to_assets( + pdf_path + ) return file_id_in_assets def upload_badgerdoc_annotations_and_tokens_to_s3( @@ -217,9 +229,13 @@ def upload_badgerdoc_annotations_and_tokens_to_s3( with tempfile.TemporaryDirectory() as tmp_dirname: tmp_dirname = Path(tmp_dirname) - s3_output_tokens_path = self.get_output_tokens_path(file_id_in_assets) + s3_output_tokens_path = self.get_output_tokens_path( + file_id_in_assets + ) - badgerdoc_tokens_path = tmp_dirname / Path(self.badgerdoc_tokens_filename) + badgerdoc_tokens_path = tmp_dirname / Path( + self.badgerdoc_tokens_filename + ) self.badgerdoc_format.export_tokens(badgerdoc_tokens_path) self.s3_client.upload_file( str(badgerdoc_tokens_path), @@ -230,7 +246,9 @@ def upload_badgerdoc_annotations_and_tokens_to_s3( badgerdoc_annotations_path = tmp_dirname / Path( self.badgerdoc_annotations_filename ) - self.badgerdoc_format.export_annotations(badgerdoc_annotations_path) + self.badgerdoc_format.export_annotations( + badgerdoc_annotations_path + ) s3_output_annotations_path = self.get_output_annotations_path( importjob_id_created, file_id_in_assets ) @@ -295,9 +313,13 @@ def request_jobs_to_create_annotation_job( "validators": validators, } if deadline: - post_annotation_job_body.update({"deadline": jsonable_encoder(deadline)}) + post_annotation_job_body.update( + {"deadline": jsonable_encoder(deadline)} + ) if extensive_coverage is not None: - post_annotation_job_body.update({"extensive_coverage": extensive_coverage}) + post_annotation_job_body.update( + {"extensive_coverage": extensive_coverage} + ) LOGGER.debug( "Making a request to create an Annotation Job in 'jobs' to url: %s with request body: %s", post_annotation_job_url, @@ -324,7 +346,9 @@ def request_jobs_to_create_annotation_job( ) return request_to_post_annotation_job.json()["id"] - def get_categories_of_links(self, pages_objs: List[BadgerdocToken]) -> List[str]: + def get_categories_of_links( + self, pages_objs: List[BadgerdocToken] + ) -> List[str]: result = [] for pages_obj in pages_objs: for link in pages_obj.links: @@ -334,7 +358,9 @@ def get_categories_of_links(self, pages_objs: List[BadgerdocToken]) -> List[str] def get_box_and_link_categories(self) -> List[str]: pages_objs = self.badgerdoc_format.badgerdoc_annotation.objs - categories_of_type_box = {pages_obj.category for pages_obj in pages_objs} + categories_of_type_box = { + pages_obj.category for pages_obj in pages_objs + } categories_of_type_link = self.get_categories_of_links(pages_objs) return [*categories_of_type_box, *categories_of_type_link] @@ -356,7 +382,9 @@ def request_annotation_to_post_annotations( "failed_validation_pages": [], "similar_revisions": [], # TODO: 'simial_revisions' will be replaced with 'links' with unknown format "categories": list(document_labels), - "links_json": [document_link.dict() for document_link in document_links], + "links_json": [ + document_link.dict() for document_link in document_links + ], } LOGGER.debug( "Making request to annotation to post annotations to url: %s with request body: %s", diff --git a/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py b/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py index f6f1f319d..a7987b42f 100644 --- a/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py +++ b/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py @@ -58,8 +58,8 @@ def from_badgerdoc( ] job_id = badgerdoc_manifest.job_id - categories_linked_with_taxonomies = self.get_categories_linked_with_taxonomies( - job_id, request_headers + categories_linked_with_taxonomies = ( + self.get_categories_linked_with_taxonomies(job_id, request_headers) ) LOGGER.debug( "Got there categories linked to taxonomies: %s", @@ -221,7 +221,9 @@ def get_corresponding_taxonomy_obj( detail="Failed request to 'taxonomy' to get corresponding taxonomy", ) from e response_content = request_to_get_taxonomy.json() - LOGGER.debug("Got this response from taxonomy service: %s", response_content) + LOGGER.debug( + "Got this response from taxonomy service: %s", response_content + ) return [ {"taxonomy_id": element["id"], "version": element["version"]} @@ -277,7 +279,9 @@ def get_taxonomy_to_taxons_mapping( detail="Failed request to 'taxonomy' to get taxons_used", ) from e response_content = request_to_get_taxons_used.json() - LOGGER.debug("Got this response from taxonomy service: %s", response_content) + LOGGER.debug( + "Got this response from taxonomy service: %s", response_content + ) result = {taxonomy_id: [] for taxonomy_id in all_taxonomies_ids_used} for taxon_obj in response_content["data"]: diff --git a/convert/convert/models/coco.py b/convert/convert/models/coco.py index a220cfbfa..729cc13eb 100644 --- a/convert/convert/models/coco.py +++ b/convert/convert/models/coco.py @@ -35,7 +35,9 @@ def __init__(self, iterable: Iterable[Any]) -> None: "Bounding box should contains only numeric values" ) from err if len(tmp) != 4: - raise ValueError("Bounding box must contains x, y, width and height") + raise ValueError( + "Bounding box must contains x, y, width and height" + ) super().__init__() diff --git a/convert/convert/routers/coco.py b/convert/convert/routers/coco.py index 37bbdb9b8..200ae48df 100644 --- a/convert/convert/routers/coco.py +++ b/convert/convert/routers/coco.py @@ -20,7 +20,9 @@ router = APIRouter(prefix="/coco", tags=["coco"]) LOGGER = get_logger(__file__) -tenant = get_tenant_info(url=settings.keycloak_url, algorithm="RS256", debug=True) +tenant = get_tenant_info( + url=settings.keycloak_url, algorithm="RS256", debug=True +) @router.post( @@ -136,7 +138,9 @@ def download_dataset( parsed = urlparse(url) minio_path = parsed.path[1:].split("/") bucket, key = minio_path[0], str.join("/", minio_path[1:-1]) - zip_file = minio_client.get_object(Bucket=bucket, Key=str.join("/", minio_path[1:])) + zip_file = minio_client.get_object( + Bucket=bucket, Key=str.join("/", minio_path[1:]) + ) background.add_task( minio_client.delete_object, Bucket=bucket, diff --git a/convert/convert/routers/label_studio.py b/convert/convert/routers/label_studio.py index c484acfae..57628ceba 100644 --- a/convert/convert/routers/label_studio.py +++ b/convert/convert/routers/label_studio.py @@ -15,7 +15,9 @@ from tenant_dependency import TenantData, get_tenant_info router = APIRouter(prefix="/label_studio", tags=["label_studio"]) -tenant = get_tenant_info(url=settings.keycloak_url, algorithm="RS256", debug=True) +tenant = get_tenant_info( + url=settings.keycloak_url, algorithm="RS256", debug=True +) @router.post( diff --git a/convert/convert/utils/json_utils.py b/convert/convert/utils/json_utils.py index 0c3a49f35..b03c98f90 100644 --- a/convert/convert/utils/json_utils.py +++ b/convert/convert/utils/json_utils.py @@ -24,7 +24,9 @@ def load_from_json(file_name: str) -> Any: return json.load(f_o) except FileNotFoundError: LOGGER.error(f"[Errno 2] No such file or directory: {file_name}") - raise FileNotFoundError(f"[Errno 2] No such file or directory: {file_name}") + raise FileNotFoundError( + f"[Errno 2] No such file or directory: {file_name}" + ) def annotation_category_change( @@ -70,7 +72,9 @@ def merge_jobs_annotation( last_annotation_id = file_annotation["annotations"][-1]["id"] last_image_id = file_annotation["images"][-1]["id"] last_category_id = file_annotation["categories"][-1]["id"] - file_categories = [category["name"] for category in file_annotation["categories"]] + file_categories = [ + category["name"] for category in file_annotation["categories"] + ] for category_merge in merge_annotation["categories"]: if category_merge["name"] in file_categories: continue @@ -112,7 +116,9 @@ def export_save_to_json( with open(file_name) as f_obr: annotations_in_file = json.load(f_obr) with open(file_name, "w") as f_obw: - annotation = merge_jobs_annotation(annotations_in_file, annotations) + annotation = merge_jobs_annotation( + annotations_in_file, annotations + ) json.dump(annotation, f_obw, default=str) diff --git a/convert/convert/utils/render_pdf_page.py b/convert/convert/utils/render_pdf_page.py index e43802abf..4e653cb89 100644 --- a/convert/convert/utils/render_pdf_page.py +++ b/convert/convert/utils/render_pdf_page.py @@ -28,9 +28,15 @@ def pdf_page_to_jpg( if validated_pages and num not in validated_pages: continue image = page.to_image(resolution=settings.dpi).original - image_path = output_path / f"{job_id}_{num}.{settings.coco_image_format}" + image_path = ( + output_path / f"{job_id}_{num}.{settings.coco_image_format}" + ) image.save(image_path) - LOGGER.info("Page %s was rendered and saved to %s", num, image_path) - LOGGER.info("Page %s was written to archive %s", num, zip_file.filename) + LOGGER.info( + "Page %s was rendered and saved to %s", num, image_path + ) + LOGGER.info( + "Page %s was written to archive %s", num, zip_file.filename + ) LOGGER.info("Page %s was removed", num) add_to_zip_and_local_remove(str(image_path), zip_file) diff --git a/convert/convert/utils/s3_utils.py b/convert/convert/utils/s3_utils.py index d17dca8ff..12a119b05 100644 --- a/convert/convert/utils/s3_utils.py +++ b/convert/convert/utils/s3_utils.py @@ -59,7 +59,9 @@ def _check_bucket_exist(self, bucket_s3: str) -> Any: """ Checks if required bucket exists in S3 """ - all_s3_buckets = [bucket.name for bucket in self.resource.buckets.all()] + all_s3_buckets = [ + bucket.name for bucket in self.resource.buckets.all() + ] if bucket_s3 not in all_s3_buckets: raise BucketError(f"bucket {bucket_s3} does not exist!") @@ -69,7 +71,9 @@ def _check_files_exist(self, bucket_s3: str, files_keys: List[str]) -> Any: """ all_files_in_bucket = [ content["Key"] - for content in self.client.list_objects(Bucket=bucket_s3)["Contents"] + for content in self.client.list_objects(Bucket=bucket_s3)[ + "Contents" + ] ] for file_key in files_keys: if file_key not in all_files_in_bucket: @@ -93,7 +97,9 @@ def check_s3(self, bucket_s3: str, files_keys: List[str]) -> Any: raise urllib3.exceptions.MaxRetryError -def s3_download_files(s3: S3Manager, bucket_s3: str, files_keys: List[str]) -> None: +def s3_download_files( + s3: S3Manager, bucket_s3: str, files_keys: List[str] +) -> None: """ Tue function downloads list of the files from s3 storage Args: @@ -105,7 +111,9 @@ def s3_download_files(s3: S3Manager, bucket_s3: str, files_keys: List[str]) -> N s3.check_s3(bucket_s3, files_keys) except (FileKeyError, BucketError) as e: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail=str(e) + ) except urllib3.exceptions.MaxRetryError as e: raise HTTPException( @@ -121,7 +129,9 @@ def download_file_from_aws(s3_data: coco.DataS3) -> S3Manager: try: check_uploading_limit(s3_data.files_keys) except UploadLimitExceedError as e: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail=str(e) + ) s3 = S3Manager(s3_data.aws_access_key_id, s3_data.aws_secret_access_key) s3_download_files(s3, s3_data.bucket_s3, s3_data.files_keys) return s3 diff --git a/convert/tests/test_label_studio/test_export.py b/convert/tests/test_label_studio/test_export.py index cbe3db6f7..e3a30b379 100644 --- a/convert/tests/test_label_studio/test_export.py +++ b/convert/tests/test_label_studio/test_export.py @@ -28,11 +28,13 @@ def test_annotation_converter(): TEST_FILES_DIR / "badgerdoc_etalon" / "manifest.json" ) page_annotation_file_name = f"{manifest_test.pages['1']}.json" - annotations_test = annotation_converter_practic.AnnotationConverterToTheory( - bd_annotation_model_practic.BadgerdocAnnotation.parse_file( - TEST_FILES_DIR / "badgerdoc_etalon" / page_annotation_file_name - ) - ).convert() + annotations_test = ( + annotation_converter_practic.AnnotationConverterToTheory( + bd_annotation_model_practic.BadgerdocAnnotation.parse_file( + TEST_FILES_DIR / "badgerdoc_etalon" / page_annotation_file_name + ) + ).convert() + ) labelstudio_format_test = LabelStudioFormat() labelstudio_format_test.from_badgerdoc( @@ -61,5 +63,6 @@ def test_annotation_converter(): str(relation) for relation in labelstudio_model_etalon.__root__[0].meta.relations ) == set( - str(relation) for relation in labelstudio_model_test.__root__[0].meta.relations + str(relation) + for relation in labelstudio_model_test.__root__[0].meta.relations ) diff --git a/convert/tests/test_label_studio/test_import.py b/convert/tests/test_label_studio/test_import.py index 2300512f1..048bbe6e0 100644 --- a/convert/tests/test_label_studio/test_import.py +++ b/convert/tests/test_label_studio/test_import.py @@ -73,7 +73,9 @@ def test_annotation_converter(): tokens_test = json.loads(tokens_test_path.read_text()) annotations_test = json.loads(annotations_test_path.read_text()) - tokens_etalon_path = TEST_FILES_DIR / "badgerdoc_etalon" / "tokens_test.json" + tokens_etalon_path = ( + TEST_FILES_DIR / "badgerdoc_etalon" / "tokens_test.json" + ) annotations_etalon_path = ( TEST_FILES_DIR / "badgerdoc_etalon" / "annotations_test.json" ) @@ -103,7 +105,9 @@ def test_import_document_links(): tokens_test = json.loads(tokens_test_path.read_text()) annotations_test = json.loads(annotations_test_path.read_text()) - tokens_etalon_path = TEST_FILES_DIR / "badgerdoc_etalon" / "tokens_test.json" + tokens_etalon_path = ( + TEST_FILES_DIR / "badgerdoc_etalon" / "tokens_test.json" + ) annotations_etalon_path = ( TEST_FILES_DIR / "badgerdoc_etalon" / "annotations_test.json" ) diff --git a/convert/tests/test_label_studio/test_text_wrapper.py b/convert/tests/test_label_studio/test_text_wrapper.py index a43ef0f40..aef00398f 100644 --- a/convert/tests/test_label_studio/test_text_wrapper.py +++ b/convert/tests/test_label_studio/test_text_wrapper.py @@ -117,7 +117,10 @@ def test_wrap_single_paragraph_text(): def test_wrap_text_with_several_paragraphs(): tw = TextWrapper(line_length=20) - text = "Text which contains more then one paragraph\n" "It is the second paragraph" + text = ( + "Text which contains more then one paragraph\n" + "It is the second paragraph" + ) result = tw.wrap(text) assert result == [ diff --git a/dev_runner/dev_runner/runners/base_runner.py b/dev_runner/dev_runner/runners/base_runner.py index 449080a57..e93730562 100644 --- a/dev_runner/dev_runner/runners/base_runner.py +++ b/dev_runner/dev_runner/runners/base_runner.py @@ -38,7 +38,8 @@ async def run(mcs, services: tuple[str]): service.__name__ = runner.PACKAGE_NAME runners.append(service) done, pending = await asyncio.wait( - [service for service in runners], return_when=asyncio.FIRST_COMPLETED + [service for service in runners], + return_when=asyncio.FIRST_COMPLETED, ) for task in pending: task.cancel() diff --git a/dev_runner/start.py b/dev_runner/start.py index 068f9fc6e..f4e9d0ab8 100644 --- a/dev_runner/start.py +++ b/dev_runner/start.py @@ -26,7 +26,9 @@ def _info(message): @click.command() @click.argument( - "services", nargs=-1, type=click.Choice(RunnerRegistry.get_runners().keys()) + "services", + nargs=-1, + type=click.Choice(RunnerRegistry.get_runners().keys()), ) def cli(services): _info( diff --git a/jobs/alembic/env.py b/jobs/alembic/env.py index 846f0eb98..adc3a929f 100644 --- a/jobs/alembic/env.py +++ b/jobs/alembic/env.py @@ -2,10 +2,10 @@ import os from logging.config import fileConfig +from jobs.utils import get_test_db_url from sqlalchemy import engine_from_config, pool from alembic import context -from jobs.utils import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -32,7 +32,9 @@ if not os.getenv("USE_TEST_DB"): config.set_main_option("sqlalchemy.url", main_database_url) else: - config.set_main_option("sqlalchemy.url", get_test_db_url(main_database_url)) + config.set_main_option( + "sqlalchemy.url", get_test_db_url(main_database_url) + ) def run_migrations_offline(): @@ -74,7 +76,9 @@ def run_migrations_online(): ) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, target_metadata=target_metadata + ) with context.begin_transaction(): context.run_migrations() diff --git a/jobs/alembic/versions/3f5b2d199d38_.py b/jobs/alembic/versions/3f5b2d199d38_.py index bc4027b3e..8b1872cd8 100644 --- a/jobs/alembic/versions/3f5b2d199d38_.py +++ b/jobs/alembic/versions/3f5b2d199d38_.py @@ -19,7 +19,9 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column("job", sa.Column("mode", sa.String(length=30), nullable=True)) + op.add_column( + "job", sa.Column("mode", sa.String(length=30), nullable=True) + ) # ### end Alembic commands ### diff --git a/jobs/alembic/versions/7511c6790067_.py b/jobs/alembic/versions/7511c6790067_.py index fc8b8bc05..2857a099a 100644 --- a/jobs/alembic/versions/7511c6790067_.py +++ b/jobs/alembic/versions/7511c6790067_.py @@ -20,7 +20,9 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.alter_column("job", "users", nullable=True, new_column_name="annotators") + op.alter_column( + "job", "users", nullable=True, new_column_name="annotators" + ) op.add_column( "job", sa.Column( @@ -31,14 +33,18 @@ def upgrade(): ) op.add_column( "job", - sa.Column("owners", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column( + "owners", postgresql.JSONB(astext_type=sa.Text()), nullable=True + ), ) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.alter_column("job", "annotators", nullable=True, new_column_name="users") + op.alter_column( + "job", "annotators", nullable=True, new_column_name="users" + ) op.drop_column("job", "owners") op.drop_column("job", "validators") # ### end Alembic commands ### diff --git a/jobs/alembic/versions/9229e70d2791_.py b/jobs/alembic/versions/9229e70d2791_.py index ed11eeaa9..19b0c9c87 100644 --- a/jobs/alembic/versions/9229e70d2791_.py +++ b/jobs/alembic/versions/9229e70d2791_.py @@ -25,11 +25,17 @@ def upgrade(): sa.Column("id", sa.Integer(), nullable=False), sa.Column("name", sa.String(length=250), nullable=True), sa.Column("status", sa.String(length=250), nullable=True), - sa.Column("files", postgresql.JSONB(astext_type=sa.Text()), nullable=True), - sa.Column("datasets", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column( + "files", postgresql.JSONB(astext_type=sa.Text()), nullable=True + ), + sa.Column( + "datasets", postgresql.JSONB(astext_type=sa.Text()), nullable=True + ), sa.Column("creation_datetime", sa.DateTime(), nullable=True), sa.Column("type", sa.String(length=20), nullable=True), - sa.Column("users", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column( + "users", postgresql.JSONB(astext_type=sa.Text()), nullable=True + ), sa.Column( "categories", postgresql.JSONB(astext_type=sa.Text()), diff --git a/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py b/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py index eeef4ce61..fd8323931 100644 --- a/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py +++ b/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py @@ -19,7 +19,10 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column( - "job", sa.Column("start_manual_job_automatically", sa.Boolean(), nullable=True) + "job", + sa.Column( + "start_manual_job_automatically", sa.Boolean(), nullable=True + ), ) # ### end Alembic commands ### diff --git a/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py b/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py index 46bb7e25a..2ae6058a5 100644 --- a/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py +++ b/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py @@ -18,7 +18,9 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column("job", sa.Column("extensive_coverage", sa.Integer(), nullable=True)) + op.add_column( + "job", sa.Column("extensive_coverage", sa.Integer(), nullable=True) + ) # ### end Alembic commands ### diff --git a/jobs/jobs/create_job_funcs.py b/jobs/jobs/create_job_funcs.py index e1f8a1536..25550c1a4 100644 --- a/jobs/jobs/create_job_funcs.py +++ b/jobs/jobs/create_job_funcs.py @@ -1,14 +1,13 @@ import itertools from typing import Any, Dict, List, Tuple -from fastapi import Depends, HTTPException, status -from sqlalchemy.orm import Session - import jobs.db_service as db_service import jobs.models as dbm import jobs.schemas as schemas import jobs.utils as utils +from fastapi import Depends, HTTPException, status from jobs.schemas import ExtractionJobParams +from sqlalchemy.orm import Session async def get_all_datasets_and_files_data( @@ -57,7 +56,9 @@ async def create_extraction_job( ) pipeline_id = pipeline_instance.get("id") - pipeline_categories = pipeline_instance.get("meta", {}).get("categories", []) + pipeline_categories = pipeline_instance.get("meta", {}).get( + "categories", [] + ) ( files_data, @@ -149,7 +150,9 @@ async def create_extraction_annotation_job( detail="No valid data (files, datasets) provided", ) - pipeline_categories = pipeline_instance.get("meta", {}).get("categories", []) + pipeline_categories = pipeline_instance.get("meta", {}).get( + "categories", [] + ) manual_categories = extraction_annotation_job_input.categories categories = list( set( diff --git a/jobs/jobs/db_service.py b/jobs/jobs/db_service.py index ff43ce008..7955eac2e 100644 --- a/jobs/jobs/db_service.py +++ b/jobs/jobs/db_service.py @@ -1,12 +1,11 @@ from datetime import datetime from typing import Any, Dict, Generator, List, Union -from sqlalchemy import create_engine -from sqlalchemy.orm import Session, sessionmaker - import jobs.config as config import jobs.models as dbm import jobs.schemas as schemas +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker engine = create_engine(config.POSTGRESQL_JOBMANAGER_DATABASE_URI) LocalSession = sessionmaker(autocommit=False, autoflush=False, bind=engine) @@ -136,7 +135,9 @@ def get_all_jobs(db: Session) -> List[Dict[str, Any]]: return [job.as_dict for job in db.query(dbm.CombinedJob)] -def get_job_in_db_by_id(db: Session, job_id: int) -> Union[dbm.CombinedJob, Any]: +def get_job_in_db_by_id( + db: Session, job_id: int +) -> Union[dbm.CombinedJob, Any]: """Getting hold on a job in the database by its id""" job_needed = db.query(dbm.CombinedJob).get(job_id) return job_needed diff --git a/jobs/jobs/main.py b/jobs/jobs/main.py index 4faaab45b..1840355f0 100644 --- a/jobs/jobs/main.py +++ b/jobs/jobs/main.py @@ -1,19 +1,18 @@ import asyncio from typing import Any, Dict, List, Optional, Union -from fastapi import Depends, FastAPI, Header, HTTPException, status -from filter_lib import Page, form_query, map_request_to_filter, paginate -from sqlalchemy.orm import Session -from sqlalchemy_filters.exceptions import BadFilterFormat -from tenant_dependency import TenantData, get_tenant_info - import jobs.create_job_funcs as create_job_funcs import jobs.db_service as db_service import jobs.models as dbm import jobs.run_job_funcs as run_job_funcs import jobs.schemas as schemas import jobs.utils as utils +from fastapi import Depends, FastAPI, Header, HTTPException, status +from filter_lib import Page, form_query, map_request_to_filter, paginate from jobs.config import KEYCLOAK_HOST, ROOT_PATH, API_current_version +from sqlalchemy.orm import Session +from sqlalchemy_filters.exceptions import BadFilterFormat +from tenant_dependency import TenantData, get_tenant_info tenant = get_tenant_info(url=KEYCLOAK_HOST, algorithm="RS256", debug=True) app = FastAPI( @@ -218,7 +217,8 @@ async def change_job( if (owners := job_to_change.owners) and user_id not in owners: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, - detail="Access denied. This user is not " "allowed to change the job", + detail="Access denied. This user is not " + "allowed to change the job", ) if ( @@ -248,7 +248,9 @@ async def change_job( schemas.JobType.AnnotationJob, schemas.JobType.ExtractionWithAnnotationJob, ]: - new_job_params_for_annotation = utils.pick_params_for_annotation(new_job_params) + new_job_params_for_annotation = utils.pick_params_for_annotation( + new_job_params + ) if new_job_params_for_annotation.dict(exclude_defaults=True): await utils.update_job_in_annotation( job_id=job_id, diff --git a/jobs/jobs/schemas.py b/jobs/jobs/schemas.py index 893a7cc5a..90cbb386c 100644 --- a/jobs/jobs/schemas.py +++ b/jobs/jobs/schemas.py @@ -62,7 +62,9 @@ class AnnotationJobParams(BaseModel): extensive_coverage: int = 1 -class ExtractionWithAnnotationJobParams(ExtractionJobParams, AnnotationJobParams): +class ExtractionWithAnnotationJobParams( + ExtractionJobParams, AnnotationJobParams +): start_manual_job_automatically: Optional[bool] = True @@ -115,7 +117,9 @@ def check_files_and_datasets_are_not_empty( # pylint: disable=no-self-argument ) -> List[int]: if not values.get("type") == JobType.ImportJob: if not v and not values.get("files"): - raise ValueError("files and datasets cannot be empty at the same time") + raise ValueError( + "files and datasets cannot be empty at the same time" + ) return v # ---- AnnotationJob and ExtractionWithAnnotationJob attributes ---- # @@ -124,7 +128,9 @@ def check_is_auto_distribution( # pylint: disable=no-self-argument cls, v: bool, values: Dict[str, Any] ) -> bool: if values.get("type") == JobType.ExtractionJob and v: - raise ValueError("is_auto_distribution cannot be assigned to ExtractionJob") + raise ValueError( + "is_auto_distribution cannot be assigned to ExtractionJob" + ) return v @validator( @@ -142,7 +148,9 @@ def check_annotationjob_attributes( job_type = values.get("type") if v: if job_type == JobType.ExtractionJob: - raise ValueError(f"{field.name} cannot be assigned to ExtractionJob") + raise ValueError( + f"{field.name} cannot be assigned to ExtractionJob" + ) elif job_type == JobType.AnnotationJob: raise ValueError(f"{field.name} cannot be empty for {job_type}") @@ -155,17 +163,23 @@ def check_annotators( # pylint: disable=no-self-argument job_type = values.get("type") validation_type = values.get("validation_type") if job_type == JobType.ExtractionJob: - raise ValueError(f"{field.name} cannot be assigned to ExtractionJob") + raise ValueError( + f"{field.name} cannot be assigned to ExtractionJob" + ) require_annotators = { ValidationType.hierarchical, ValidationType.cross, } if v and validation_type == ValidationType.validation_only: - raise ValueError(f"{field.name} should be empty with {validation_type=}") + raise ValueError( + f"{field.name} should be empty with {validation_type=}" + ) elif not v and validation_type in require_annotators: - raise ValueError(f"{field.name} cannot be empty with {validation_type=}") + raise ValueError( + f"{field.name} cannot be empty with {validation_type=}" + ) elif len(v) < 2 and validation_type == ValidationType.cross: raise ValueError( @@ -183,17 +197,23 @@ def check_validators( # pylint: disable=no-self-argument validation_type = values.get("validation_type") if job_type == JobType.ExtractionJob: - raise ValueError(f"{field.name} cannot be assigned to ExtractionJob") + raise ValueError( + f"{field.name} cannot be assigned to ExtractionJob" + ) if ( validation_type in [ValidationType.hierarchical, ValidationType.validation_only] and not v ): - raise ValueError(f"{field.name} cannot be empty with {validation_type=}") + raise ValueError( + f"{field.name} cannot be empty with {validation_type=}" + ) if validation_type == ValidationType.cross and v: - raise ValueError(f"{field.name} should be empty with {validation_type=}") + raise ValueError( + f"{field.name} should be empty with {validation_type=}" + ) return v @@ -205,7 +225,9 @@ def check_import_job_attributes( # pylint: disable=no-self-argument if job_type != JobType.ImportJob and v: raise ValueError(f"{field.name} cannot be assigned to {job_type}") if job_type == JobType.ImportJob and not v: - raise ValueError(f"{field.name} cannot be empty in {JobType.ImportJob}") + raise ValueError( + f"{field.name} cannot be empty in {JobType.ImportJob}" + ) return v @validator("extensive_coverage") @@ -214,12 +236,18 @@ def check_extensive_coverage( ): validation_type = values.get("validation_type") if validation_type != ValidationType.extensive_coverage and v: - raise ValueError(f"{field.name} cannot be assigned to {validation_type}.") + raise ValueError( + f"{field.name} cannot be assigned to {validation_type}." + ) if validation_type != ValidationType.extensive_coverage and not v: - raise ValueError(f"{field.name} cannot be empty with {validation_type=}.") + raise ValueError( + f"{field.name} cannot be empty with {validation_type=}." + ) annotators = values.get("annotators") if v > len(annotators): - raise ValueError(f"{field.name} cannot be less then number of annotators.") + raise ValueError( + f"{field.name} cannot be less then number of annotators." + ) return v # ---- ExtractionJob and ExtractionWithAnnotationJob attributes ---- # @@ -228,12 +256,16 @@ def check_pipeline_name( # pylint: disable=no-self-argument cls, v: str, values: Dict[str, Any] ) -> str: if values.get("type") == JobType.AnnotationJob and v: - raise ValueError("pipeline_name cannot be assigned to AnnotationJob") + raise ValueError( + "pipeline_name cannot be assigned to AnnotationJob" + ) if ( values.get("type") == JobType.ExtractionJob or values.get("type") == JobType.ExtractionWithAnnotationJob ) and not v: - raise ValueError(f'pipeline cannot be empty for {values.get("type")}') + raise ValueError( + f'pipeline cannot be empty for {values.get("type")}' + ) return v diff --git a/jobs/jobs/utils.py b/jobs/jobs/utils.py index a9e78e006..c00b56b9d 100644 --- a/jobs/jobs/utils.py +++ b/jobs/jobs/utils.py @@ -2,8 +2,6 @@ import aiohttp.client_exceptions import fastapi.encoders -from sqlalchemy.orm import Session - from jobs import db_service from jobs.config import ( HOST_ANNOTATION, @@ -23,6 +21,7 @@ JobMode, JobParamsToChange, ) +from sqlalchemy.orm import Session async def get_files_data_from_datasets( @@ -49,7 +48,9 @@ async def get_files_data_from_datasets( raise_for_status=True, ) if status == 404: - logger.error(f"Failed request to the Dataset Manager: {response}") + logger.error( + f"Failed request to the Dataset Manager: {response}" + ) continue except aiohttp.client_exceptions.ClientError as err: logger.error(f"Failed request to the Dataset Manager: {err}") @@ -85,7 +86,8 @@ async def get_files_data_from_separate_files( "filters": [{"field": "id", "operator": "in", "value": batch}], } logger.info( - "Sending request to the dataset manager " "to get info about files" + "Sending request to the dataset manager " + "to get info about files" ) _, response = await fetch( method="POST", @@ -106,7 +108,9 @@ async def get_files_data_from_separate_files( all_files_data.extend(response["data"]) - valid_separate_files_uuids = [file_data["id"] for file_data in all_files_data] + valid_separate_files_uuids = [ + file_data["id"] for file_data in all_files_data + ] return all_files_data, valid_separate_files_uuids @@ -173,7 +177,9 @@ def convert_files_data_for_inference( ) ) else: - for batch_id, pages_list_chunk in enumerate(divided_pages_list, start=1): + for batch_id, pages_list_chunk in enumerate( + divided_pages_list, start=1 + ): converted_data.append( generate_file_data( file_data, @@ -339,7 +345,9 @@ async def execute_in_annotation_microservice( return None -def delete_duplicates(files_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]: +def delete_duplicates( + files_data: List[Dict[str, Any]] +) -> List[Dict[str, Any]]: """Delete duplicates""" used_file_ids = set() @@ -355,7 +363,9 @@ def delete_duplicates(files_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]: def pick_params_for_annotation( new_job_params: JobParamsToChange, ) -> AnnotationJobUpdateParamsInAnnotation: - picked_params = AnnotationJobUpdateParamsInAnnotation.parse_obj(new_job_params) + picked_params = AnnotationJobUpdateParamsInAnnotation.parse_obj( + new_job_params + ) return picked_params @@ -382,7 +392,9 @@ async def start_job_in_annotation( raise_for_status=True, ) except aiohttp.client_exceptions.ClientError as err: - logger.error("Failed request to the Annotation Manager: {}".format(err)) + logger.error( + "Failed request to the Annotation Manager: {}".format(err) + ) raise fastapi.HTTPException( status_code=fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY, detail="Failed request to the Annotation Manager: {}".format(err), @@ -541,7 +553,9 @@ def get_categories_ids( categories: List[Union[str, CategoryLinkInput]] ) -> Tuple[List[str], List[CategoryLinkInput]]: categories_ids = [ - category_id for category_id in categories if isinstance(category_id, str) + category_id + for category_id in categories + if isinstance(category_id, str) ] categories_links = [ category_link diff --git a/jobs/tests/conftest.py b/jobs/tests/conftest.py index 532d454e6..667f04a4d 100644 --- a/jobs/tests/conftest.py +++ b/jobs/tests/conftest.py @@ -101,7 +101,9 @@ def setup_tenant(): def testing_app(testing_engine, testing_session, setup_tenant): with patch("jobs.db_service.LocalSession", testing_session): main.app.dependency_overrides[main.tenant] = lambda: setup_tenant - main.app.dependency_overrides[service.get_session] = lambda: testing_session + main.app.dependency_overrides[ + service.get_session + ] = lambda: testing_session client = TestClient(main.app) yield client @@ -221,7 +223,9 @@ def mock_data_dataset22(): def request_body_for_invalid_file(): request_body = { "pagination": {"page_num": 1, "page_size": 15}, - "filters": [{"field": "id", "operator": "eq", "value": "some invalid file id"}], + "filters": [ + {"field": "id", "operator": "eq", "value": "some invalid file id"} + ], "sorting": [{"field": "id", "direction": "asc"}], } return request_body @@ -258,7 +262,8 @@ def pipeline_info_from_pipeline_manager(): { "id": "7571f17b-d9f1-4d31-af42-7f29fbfd0fb9", "model": "ternary", - "model_url": "http://ternary.dev1/v1/models/" "ternary:predict", + "model_url": "http://ternary.dev1/v1/models/" + "ternary:predict", "categories": ["mrt"], "steps": [], } diff --git a/jobs/tests/test_API_functions/test_ExtractionWithAnnotationJob_workflows.py b/jobs/tests/test_API_functions/test_ExtractionWithAnnotationJob_workflows.py index 887954ef7..f783fdb3b 100644 --- a/jobs/tests/test_API_functions/test_ExtractionWithAnnotationJob_workflows.py +++ b/jobs/tests/test_API_functions/test_ExtractionWithAnnotationJob_workflows.py @@ -41,7 +41,9 @@ def test_change_extraction_job_to_extraction_with_annotation_job_and_run_it( job_id = int(response1.json()["id"]) # --------- Changing Job Status to Finished - imitates callback from Pipeline Manager -------- # - response2 = testing_app.put(f"/jobs/{job_id}", json={"status": "Finished"}) + response2 = testing_app.put( + f"/jobs/{job_id}", json={"status": "Finished"} + ) assert response2.status_code == 200 assert response2.json()["status"] == schemas.Status.finished @@ -61,8 +63,13 @@ def test_change_extraction_job_to_extraction_with_annotation_job_and_run_it( }, ) assert response3.status_code == 200 - assert response3.json()["type"] == schemas.JobType.ExtractionWithAnnotationJob - assert response3.json()["status"] == schemas.Status.ready_for_annotation + assert ( + response3.json()["type"] + == schemas.JobType.ExtractionWithAnnotationJob + ) + assert ( + response3.json()["status"] == schemas.Status.ready_for_annotation + ) assert response3.json()["mode"] == schemas.JobMode.Manual # ---------- Running ExtractionWithAnnotationJob - only manual part ------ # @@ -123,9 +130,13 @@ def test_create_extraction_with_annotation_job_and_run_it( test_job_id = int(response.json()["id"]) # --------- Changing Job Status to Finished - imitates callback from Pipeline Manager -------- # - response2 = testing_app.put(f"/jobs/{test_job_id}", json={"status": "Finished"}) + response2 = testing_app.put( + f"/jobs/{test_job_id}", json={"status": "Finished"} + ) assert response2.status_code == 200 - assert response2.json()["status"] == schemas.Status.ready_for_annotation + assert ( + response2.json()["status"] == schemas.Status.ready_for_annotation + ) assert response2.json()["mode"] == schemas.JobMode.Manual # --------- Changing Job Status to In Progress - imitates callback from Annotation Manager -------- # @@ -139,7 +150,9 @@ def test_create_extraction_with_annotation_job_and_run_it( # Then Manual Part executes from Annotation Microservice # --------- Changing Job Status to Finished - imitates callback from Annotation Manager -------- # - response5 = testing_app.put(f"/jobs/{test_job_id}", json={"status": "Finished"}) + response5 = testing_app.put( + f"/jobs/{test_job_id}", json={"status": "Finished"} + ) assert response5.status_code == 200 assert response5.json()["status"] == schemas.Status.finished @@ -192,7 +205,11 @@ def test_create_extraction_with_annotation_job_and_autostart_false( test_job_id = int(response.json()["id"]) # --------- Changing Job Status to Finished - imitates callback from Pipeline Manager -------- # - response2 = testing_app.put(f"/jobs/{test_job_id}", json={"status": "Finished"}) + response2 = testing_app.put( + f"/jobs/{test_job_id}", json={"status": "Finished"} + ) assert response2.status_code == 200 - assert response2.json()["status"] == schemas.Status.ready_for_annotation + assert ( + response2.json()["status"] == schemas.Status.ready_for_annotation + ) assert response2.json()["mode"] == schemas.JobMode.Manual diff --git a/jobs/tests/test_API_functions/test_args_validation.py b/jobs/tests/test_API_functions/test_args_validation.py index 1d1ed8ea2..06af3b196 100644 --- a/jobs/tests/test_API_functions/test_args_validation.py +++ b/jobs/tests/test_API_functions/test_args_validation.py @@ -12,7 +12,9 @@ def test_create_annotation_job_lack_of_data(testing_app): "datasets": [1, 2], "files": [], "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), "is_draft": False, }, ) @@ -52,7 +54,9 @@ def test_create_annotation_job_excessive_data(testing_app): "validators": ["validator1", "validator2"], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), "pipeline_name": "pipeline", }, ) @@ -124,7 +128,9 @@ def test_create_extraction_with_annotation_job_lack_of_data(testing_app): "users": [1, 2], "files": [1, 2], "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), "is_draft": False, }, ) @@ -157,7 +163,9 @@ def test_create_annotation_job_cross_validation_with_validators(testing_app): "validators": ["validator1"], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), }, ) assert response.status_code == 422 @@ -189,7 +197,9 @@ def test_create_annotation_job_cross_validation_without_annotators( "validators": [], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), }, ) assert response.status_code == 422 @@ -221,7 +231,9 @@ def test_create_annotation_job_cross_validation_annotators_not_enough( "validators": [], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), }, ) assert response.status_code == 422 @@ -253,7 +265,9 @@ def test_create_annotation_job_hierarchichal_validation_without_validators( "validators": [], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), }, ) assert response.status_code == 422 @@ -310,7 +324,9 @@ def test_create_annotationjob_validation_only_validation_type_with_annotators( "validators": ["validator1"], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), }, ) assert response.status_code == 422 @@ -343,7 +359,9 @@ def test_create_annotationjob_validation_only_validation_type_without_validators "validators": [], "categories": ["category1", "category2"], "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), }, ) assert response.status_code == 422 diff --git a/jobs/tests/test_API_functions/test_change_job-proxy_to_annotation.py b/jobs/tests/test_API_functions/test_change_job-proxy_to_annotation.py index 719505dc4..2283747d1 100644 --- a/jobs/tests/test_API_functions/test_change_job-proxy_to_annotation.py +++ b/jobs/tests/test_API_functions/test_change_job-proxy_to_annotation.py @@ -9,7 +9,9 @@ def test_change_annotation_job_with_request_to_annotation( ): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: mock.side_effect = [(200, {})] - create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams2) + create_mock_annotation_job_in_db( + testing_session, mock_AnnotationJobParams2 + ) response = testing_app.put( "/jobs/1", json={ @@ -31,7 +33,9 @@ def test_change_annotation_job_without_request_to_annotation( testing_app, testing_session, mock_AnnotationJobParams2 ): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: - create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams2) + create_mock_annotation_job_in_db( + testing_session, mock_AnnotationJobParams2 + ) response = testing_app.put("/jobs/1", json={"status": "Finished"}) assert response.status_code == 200 assert response.json()["status"] == "Finished" @@ -44,7 +48,9 @@ def test_change_annotation_job_with_partial_request_to_annotation( ): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: mock.side_effect = [(200, {})] - create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams2) + create_mock_annotation_job_in_db( + testing_session, mock_AnnotationJobParams2 + ) response = testing_app.put( "/jobs/1", json={ diff --git a/jobs/tests/test_API_functions/test_change_job.py b/jobs/tests/test_API_functions/test_change_job.py index 2f7793d1c..dff718b0c 100644 --- a/jobs/tests/test_API_functions/test_change_job.py +++ b/jobs/tests/test_API_functions/test_change_job.py @@ -48,7 +48,9 @@ def test_change_job_status_with_validation_incorrect_job_owner( ): create_mock_extraction_job_in_db(testing_session) - create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams2) + create_mock_annotation_job_in_db( + testing_session, mock_AnnotationJobParams2 + ) response2 = testing_app.put( "/jobs/2", json={"status": "Finished"}, @@ -59,7 +61,9 @@ def test_change_job_status_with_validation_incorrect_job_owner( } -def test_change_job_pipeline_id(testing_app, testing_session, mock_AnnotationJobParams): +def test_change_job_pipeline_id( + testing_app, testing_session, mock_AnnotationJobParams +): create_mock_extraction_job_in_db(testing_session) response = testing_app.put("/jobs/1", json={"pipeline_id": 555}) assert response.status_code == 200 diff --git a/jobs/tests/test_API_functions/test_create_job.py b/jobs/tests/test_API_functions/test_create_job.py index 33f6f3364..31a91435e 100644 --- a/jobs/tests/test_API_functions/test_create_job.py +++ b/jobs/tests/test_API_functions/test_create_job.py @@ -24,7 +24,9 @@ def test_create_annotation_job_draft(testing_app, jw_token): "categories": ["category1", "category2"], "validation_type": schemas.ValidationType.hierarchical, "is_auto_distribution": False, - "deadline": str(datetime.datetime.utcnow() + datetime.timedelta(days=1)), + "deadline": str( + datetime.datetime.utcnow() + datetime.timedelta(days=1) + ), "is_draft": True, }, ) @@ -192,7 +194,9 @@ def test_schedule_manual_job_valid_datasets( assert response.json()["name"] == "MockAnnotationJob" -def test_schedule_manual_job_one_invalid_dataset(testing_app, mock_data_dataset11): +def test_schedule_manual_job_one_invalid_dataset( + testing_app, mock_data_dataset11 +): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: mock.side_effect = [ (200, mock_data_dataset11), diff --git a/jobs/tests/test_API_functions/test_other_API_functions.py b/jobs/tests/test_API_functions/test_other_API_functions.py index 144e0adb1..8447713ed 100644 --- a/jobs/tests/test_API_functions/test_other_API_functions.py +++ b/jobs/tests/test_API_functions/test_other_API_functions.py @@ -7,7 +7,9 @@ ) -def test_get_all_jobs_endpoint(testing_app, testing_session, mock_AnnotationJobParams): +def test_get_all_jobs_endpoint( + testing_app, testing_session, mock_AnnotationJobParams +): create_mock_extraction_job_in_db(testing_session) create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams) @@ -18,7 +20,9 @@ def test_get_all_jobs_endpoint(testing_app, testing_session, mock_AnnotationJobP assert response.json()[1]["name"] == "MockAnnotationJob" -def test_get_job_by_id_positive(testing_app, testing_session, mock_AnnotationJobParams): +def test_get_job_by_id_positive( + testing_app, testing_session, mock_AnnotationJobParams +): create_mock_extraction_job_in_db(testing_session) create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams) response = testing_app.get("/jobs/2") @@ -26,7 +30,9 @@ def test_get_job_by_id_positive(testing_app, testing_session, mock_AnnotationJob assert response.json()["name"] == "MockAnnotationJob" -def test_get_job_by_id_negative(testing_app, testing_session, mock_AnnotationJobParams): +def test_get_job_by_id_negative( + testing_app, testing_session, mock_AnnotationJobParams +): create_mock_extraction_job_in_db( testing_session, ) @@ -36,11 +42,15 @@ def test_get_job_by_id_negative(testing_app, testing_session, mock_AnnotationJob assert response.json()["detail"] == "Job with this id does not exist." -def test_delete_job_positive(testing_app, testing_session, mock_AnnotationJobParams): +def test_delete_job_positive( + testing_app, testing_session, mock_AnnotationJobParams +): with patch("jobs.utils.fetch", return_value=asyncio.Future()) as mock: mock.side_effect = [(200, {})] create_mock_extraction_job_in_db(testing_session) - create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams) + create_mock_annotation_job_in_db( + testing_session, mock_AnnotationJobParams + ) response = testing_app.delete( "/jobs/2", ) diff --git a/jobs/tests/test_API_functions/test_search_jobs.py b/jobs/tests/test_API_functions/test_search_jobs.py index 1ea01cdf9..e53234843 100644 --- a/jobs/tests/test_API_functions/test_search_jobs.py +++ b/jobs/tests/test_API_functions/test_search_jobs.py @@ -10,7 +10,9 @@ def test_search_job_positive(testing_app, testing_session): "/jobs/search", json={ "pagination": {"page_num": 1, "page_size": 15}, - "filters": [{"field": "id", "operator": "is_not_null", "value": "string"}], + "filters": [ + {"field": "id", "operator": "is_not_null", "value": "string"} + ], "sorting": [{"field": "id", "direction": "asc"}], }, ) @@ -36,7 +38,9 @@ def test_search_job_invalid_field(testing_app, testing_session): ) assert response.status_code == 422 response_message = response.json()["detail"][0]["msg"] - assert response_message.startswith("value is not a valid enumeration member") + assert response_message.startswith( + "value is not a valid enumeration member" + ) def test_search_job_without_filters( @@ -71,13 +75,17 @@ def test_search_job_has_pagination( ): for _ in range(25): create_mock_extraction_job_in_db(testing_session) - create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams) + create_mock_annotation_job_in_db( + testing_session, mock_AnnotationJobParams + ) response1 = testing_app.post( "/jobs/search", json={ "pagination": {"page_num": 1, "page_size": 15}, - "filters": [{"field": "id", "operator": "is_not_null", "value": "string"}], + "filters": [ + {"field": "id", "operator": "is_not_null", "value": "string"} + ], "sorting": [{"field": "id", "direction": "asc"}], }, ) @@ -94,7 +102,9 @@ def test_search_job_has_pagination( "/jobs/search", json={ "pagination": {"page_num": 2, "page_size": 15}, - "filters": [{"field": "id", "operator": "is_not_null", "value": "string"}], + "filters": [ + {"field": "id", "operator": "is_not_null", "value": "string"} + ], "sorting": [{"field": "id", "direction": "asc"}], }, ) diff --git a/jobs/tests/test_db.py b/jobs/tests/test_db.py index 95e149580..63b580536 100644 --- a/jobs/tests/test_db.py +++ b/jobs/tests/test_db.py @@ -92,7 +92,9 @@ def test_create_extraction_job_in_db(testing_session): assert second_quantity_of_jobs - first_quantity_of_jobs == 1 -def test_create_annotation_job_in_db(testing_session, mock_AnnotationJobParams): +def test_create_annotation_job_in_db( + testing_session, mock_AnnotationJobParams +): first_quantity_of_jobs = len(db_service.get_all_jobs(testing_session)) assert db_service.create_annotation_job( db=testing_session, @@ -131,7 +133,9 @@ def create_mock_extraction_job_in_db_draft(testing_session): return result -def create_mock_annotation_job_in_db(testing_session, mock_AnnotationJobParams): +def create_mock_annotation_job_in_db( + testing_session, mock_AnnotationJobParams +): result = db_service.create_annotation_job( db=testing_session, annotation_job_input=mock_AnnotationJobParams, @@ -188,7 +192,9 @@ def test_create_ImportJob(testing_session): import_bucket="jpg", ) - new_import_job = db_service.create_import_job(testing_session, mockImportJobParams) + new_import_job = db_service.create_import_job( + testing_session, mockImportJobParams + ) assert new_import_job assert new_import_job.name == "MockImportJob" assert new_import_job.type == schemas.JobType.ImportJob diff --git a/jobs/tests/test_utils.py b/jobs/tests/test_utils.py index a15ad3449..ec68c45a2 100644 --- a/jobs/tests/test_utils.py +++ b/jobs/tests/test_utils.py @@ -75,7 +75,9 @@ async def test_positive_get_files_data_from_datasets( ) assert ( - await utils.get_files_data_from_datasets([1, 2], "test_tenant", jw_token) + await utils.get_files_data_from_datasets( + [1, 2], "test_tenant", jw_token + ) == expected_result ) @@ -123,7 +125,9 @@ async def test_get_files_data_from_datasets_with_one_invalid_tag( [1], ) assert ( - await utils.get_files_data_from_datasets([1, 444], "test_tenant", jw_token) + await utils.get_files_data_from_datasets( + [1, 444], "test_tenant", jw_token + ) == expected_result ) @@ -141,9 +145,13 @@ async def test_get_files_data_from_datasets_with_all_invalid_tags(jw_token): @pytest.mark.asyncio async def test_get_files_data_from_datasets_501_error(jw_token): - with patch("jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError()): + with patch( + "jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError() + ): with pytest.raises(HTTPException) as e_info: - await utils.get_files_data_from_datasets([121], "test_tenant", jw_token) + await utils.get_files_data_from_datasets( + [121], "test_tenant", jw_token + ) assert e_info.value.status_code == 422 @@ -259,7 +267,9 @@ async def test_get_files_data_from_separate_files_100_elements(jw_token): ], } - with patch("jobs.utils.fetch", return_value=(200, large_mock_files_data)) as mock: + with patch( + "jobs.utils.fetch", return_value=(200, large_mock_files_data) + ) as mock: assert await utils.get_files_data_from_separate_files( list(range(1, 101)), "test_tenant", jw_token ) == ( @@ -321,7 +331,9 @@ async def test_get_files_data_from_separate_files_101_elements(jw_token): for i in range(101, 102) ], } - with patch("jobs.utils.fetch", side_effect=[(200, json_1), (200, json_2)]) as mock: + with patch( + "jobs.utils.fetch", side_effect=[(200, json_1), (200, json_2)] + ) as mock: expected_files_data = [ { "id": i, @@ -415,7 +427,9 @@ async def test_get_files_data_from_separate_files_111_elements(jw_token): for i in range(101, 111) ], } - with patch("jobs.utils.fetch", side_effect=[(200, json_1), (200, json_2)]) as mock: + with patch( + "jobs.utils.fetch", side_effect=[(200, json_1), (200, json_2)] + ) as mock: assert await utils.get_files_data_from_separate_files( list(range(1, 111)), "test_tenant", jw_token ) == ( @@ -429,7 +443,9 @@ async def test_get_files_data_from_separate_files_111_elements(jw_token): async def test_get_files_data_from_separate_files_501_code( request_body_for_invalid_file, jw_token ): - with patch("jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError()): + with patch( + "jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError() + ): with pytest.raises(HTTPException) as e_info: await utils.get_files_data_from_separate_files( [1234], "test_tenant", jw_token @@ -492,7 +508,9 @@ async def test_get_pipeline_id_by_its_name_positive(jw_token): @pytest.mark.asyncio async def test_get_pipeline_id_by_its_name_negative(jw_token): - with patch("jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError()): + with patch( + "jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError() + ): with pytest.raises(HTTPException) as e_info: await utils.get_pipeline_instance_by_its_name( "invalid_pipeline_name", "test_tenant", jw_token @@ -505,7 +523,9 @@ async def test_get_pipeline_id_by_its_name_negative(jw_token): @pytest.mark.asyncio async def test_execute_pipeline_negative(jw_token): - with patch("jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError()): + with patch( + "jobs.utils.fetch", side_effect=aiohttp.client_exceptions.ClientError() + ): with pytest.raises(HTTPException) as e_info: await utils.execute_pipeline( pipeline_id=2, diff --git a/lib/filter_lib/src/dict_parser.py b/lib/filter_lib/src/dict_parser.py index 2c4418df9..112b3277e 100644 --- a/lib/filter_lib/src/dict_parser.py +++ b/lib/filter_lib/src/dict_parser.py @@ -1,7 +1,9 @@ from typing import Any, Dict -def map_request_to_filter(fields: Dict[str, Any], model: str) -> Dict[str, Any]: +def map_request_to_filter( + fields: Dict[str, Any], model: str +) -> Dict[str, Any]: result: Dict[str, Any] = { "pagination": {}, "filters": [], diff --git a/lib/filter_lib/src/enum_generator.py b/lib/filter_lib/src/enum_generator.py index 4a2517b05..4f78341c3 100644 --- a/lib/filter_lib/src/enum_generator.py +++ b/lib/filter_lib/src/enum_generator.py @@ -15,7 +15,9 @@ class TempEnum(str, enum.Enum): def _get_model_fields(model: Type[DeclarativeMeta]) -> List[str]: mapper: Mapper = inspect(model) relations = [ - attr for attr in inspect(model).attrs if isinstance(attr, RelationshipProperty) + attr + for attr in inspect(model).attrs + if isinstance(attr, RelationshipProperty) ] relation_fields = [ rel.key + "." + col.key diff --git a/lib/filter_lib/src/query_modificator.py b/lib/filter_lib/src/query_modificator.py index 488506add..b04621879 100644 --- a/lib/filter_lib/src/query_modificator.py +++ b/lib/filter_lib/src/query_modificator.py @@ -38,7 +38,9 @@ def get_distinct_columns( return result -def form_query(args: Dict[str, Any], query: Query) -> Tuple[Query, PaginationParams]: +def form_query( + args: Dict[str, Any], query: Query +) -> Tuple[Query, PaginationParams]: filters = args.get("filters") sorting = args.get("sorting") pagination = args.get("pagination") @@ -48,10 +50,14 @@ def form_query(args: Dict[str, Any], query: Query) -> Tuple[Query, PaginationPar filters with 'distinct' operator and others. It's being done because DISTINCT statements should only be applied to query all at once, rather than one by one""" - distinct_filters, non_distinct_filters = splint_to_distinct_and_not(filters) + distinct_filters, non_distinct_filters = splint_to_distinct_and_not( + filters + ) if distinct_filters: distinct_columns = get_distinct_columns(query, distinct_filters) - query = query.with_entities(*distinct_columns).distinct(*distinct_columns) + query = query.with_entities(*distinct_columns).distinct( + *distinct_columns + ) for fil in non_distinct_filters: query = _create_filter(query, fil) @@ -123,7 +129,9 @@ def validate_filter_args( f"Field value should not be null for operator {operator.value}." ) if _has_relation(model, field) and _op_is_match(fil): - raise BadFilterFormat("Operator 'match' shouldn't be used with relations") + raise BadFilterFormat( + "Operator 'match' shouldn't be used with relations" + ) def _create_filter(query: Query, fil: Dict[str, Any]) -> Query: @@ -217,13 +225,17 @@ def _make_ltree_query( :param value: Id of record :return: Query instance """ - subquery = query.with_entities(model.tree).filter(model.id == value).subquery() + subquery = ( + query.with_entities(model.tree).filter(model.id == value).subquery() + ) if op == "parent": return ( query.filter( ( - func.subpath(model.tree, 0, func.nlevel(subquery.c.tree) - 1) + func.subpath( + model.tree, 0, func.nlevel(subquery.c.tree) - 1 + ) == model.tree ), func.index(subquery.c.tree, model.tree) != -1, @@ -250,7 +262,9 @@ def _make_ltree_query( return query -def _create_or_condition(fil: Dict[str, str]) -> Dict[str, List[Dict[str, str]]]: +def _create_or_condition( + fil: Dict[str, str] +) -> Dict[str, List[Dict[str, str]]]: fil_include_null = fil.copy() fil_include_null["op"] = "is_null" filter_args = {"or": [{**fil}, {**fil_include_null}]} diff --git a/lib/filter_lib/src/schema_generator.py b/lib/filter_lib/src/schema_generator.py index b494b8bb0..57992248b 100644 --- a/lib/filter_lib/src/schema_generator.py +++ b/lib/filter_lib/src/schema_generator.py @@ -68,7 +68,9 @@ class BaseSearch(BaseModel): pagination: Optional[Pagination] @root_validator - def root_validate(cls, values: Any) -> Any: # pylint: disable=no-self-argument + def root_validate( + cls, values: Any + ) -> Any: # pylint: disable=no-self-argument if not values.get("pagination"): values["pagination"] = Pagination(page_num=1, page_size=15) return values @@ -90,7 +92,9 @@ class Page(GenericModel, Generic[TypeC], BaseModel): data: Sequence[TypeC] @validator("data") - def custom_validator(cls, v: Any) -> Any: # pylint: disable=no-self-argument + def custom_validator( + cls, v: Any + ) -> Any: # pylint: disable=no-self-argument """Custom validator applied to data in case of using 'distinct' statement and getting result as 'sqlalchemy.util._collections.result' but not as model class object diff --git a/lib/filter_lib/tests/test_dict_parser.py b/lib/filter_lib/tests/test_dict_parser.py index a2130ef8a..5228d14b8 100644 --- a/lib/filter_lib/tests/test_dict_parser.py +++ b/lib/filter_lib/tests/test_dict_parser.py @@ -3,7 +3,9 @@ example_1 = { "pagination": {"page_num": 1, "page_size": 50}, - "filters": [{"field": "ts_vector", "operator": "match", "value": "kubeflow"}], + "filters": [ + {"field": "ts_vector", "operator": "match", "value": "kubeflow"} + ], "sorting": [{"field": "id", "direction": "desc"}], } @@ -46,7 +48,9 @@ def test_positive_standard_structure(): "value": "kubeflow", } ], - "sorting": [{"model": "test_model", "field": "id", "direction": "desc"}], + "sorting": [ + {"model": "test_model", "field": "id", "direction": "desc"} + ], } @@ -73,7 +77,9 @@ def test_positive_many_nested_structures(): "value": 100, }, ], - "sorting": [{"model": "test_model", "field": "created", "direction": "desc"}], + "sorting": [ + {"model": "test_model", "field": "created", "direction": "desc"} + ], } diff --git a/lib/filter_lib/tests/test_enum_generator.py b/lib/filter_lib/tests/test_enum_generator.py index b821ddc25..a064f6414 100644 --- a/lib/filter_lib/tests/test_enum_generator.py +++ b/lib/filter_lib/tests/test_enum_generator.py @@ -32,7 +32,9 @@ def test_exclude_fields(): assert _exclude_fields( user_fields, ["id", "addresses.id", "addresses.location"] ) == ["name", "email", "addresses.owner"] - assert _exclude_fields(address_fields, ["id", "user.name", "user.email"]) == [ + assert _exclude_fields( + address_fields, ["id", "user.name", "user.email"] + ) == [ "location", "owner", "user.id", @@ -56,5 +58,7 @@ def test_create_enum_model(): assert user_enum.EMAIL.value == "email" address_enum = _create_enum_model(address_table_name, address_fields) - assert address_enum.ID.value == "id", address_enum.LOCATION.value == "location" + assert address_enum.ID.value == "id", ( + address_enum.LOCATION.value == "location" + ) assert address_enum.OWNER.value == "owner" diff --git a/lib/filter_lib/tests/test_pagination.py b/lib/filter_lib/tests/test_pagination.py index 327778c74..638faf66b 100644 --- a/lib/filter_lib/tests/test_pagination.py +++ b/lib/filter_lib/tests/test_pagination.py @@ -18,7 +18,9 @@ ], ) def test_pag_params(page_num, page_size, min_pages_left, total, has_more): - res = PaginationParams(page_num, page_size, min_pages_left, total, has_more) + res = PaginationParams( + page_num, page_size, min_pages_left, total, has_more + ) assert ( res.page_num, res.page_size, diff --git a/lib/filter_lib/tests/test_query_modifier.py b/lib/filter_lib/tests/test_query_modifier.py index 2198210f7..2a6f36b75 100644 --- a/lib/filter_lib/tests/test_query_modifier.py +++ b/lib/filter_lib/tests/test_query_modifier.py @@ -184,7 +184,9 @@ def test_create_filter_ltree_not_supported_operation(get_session): # Act query = _create_filter(query, spec) - expected_sql_str = "SELECT categories.id, categories.tree \nFROM categories" + expected_sql_str = ( + "SELECT categories.id, categories.tree \nFROM categories" + ) compiled_statement = query.statement.compile() @@ -342,7 +344,9 @@ def test_form_query_with_distincts_and_filters_and_sorting(get_session): "value": "%or%", }, ], - "sorting": [{"model": "User", "field": user_enum.NAME, "direction": "desc"}], + "sorting": [ + {"model": "User", "field": user_enum.NAME, "direction": "desc"} + ], } query, pag = form_query(specs, query) assert query.all() == [("Grigoriy",), ("Fedor",)] diff --git a/lib/filter_lib/tests/test_schema_generator.py b/lib/filter_lib/tests/test_schema_generator.py index 0b5caa6c9..ce842929a 100644 --- a/lib/filter_lib/tests/test_schema_generator.py +++ b/lib/filter_lib/tests/test_schema_generator.py @@ -18,7 +18,9 @@ def test_search_class_creating(): ] AddressFilter = create_filter_model(Address, exclude=["location"]) - assert AddressFilter.schema()["definitions"]["addresses_Address"]["enum"] == [ + assert AddressFilter.schema()["definitions"]["addresses_Address"][ + "enum" + ] == [ "id", "owner", "user.id", diff --git a/lib/filter_lib/usage_example/app.py b/lib/filter_lib/usage_example/app.py index d2e1f7210..19882eb70 100644 --- a/lib/filter_lib/usage_example/app.py +++ b/lib/filter_lib/usage_example/app.py @@ -70,13 +70,17 @@ def search_users( def create_new_address( request: AddressCreate, session: Session = Depends(get_db) ) -> Set[str]: - new_address = Address(email_address=request.email_address, user_id=request.user_id) + new_address = Address( + email_address=request.email_address, user_id=request.user_id + ) session.add(new_address) session.commit() return {"New address created"} -@app.post("/addresses/search", tags=["addresses"], response_model=Page[AddressOut]) +@app.post( + "/addresses/search", tags=["addresses"], response_model=Page[AddressOut] +) def search_address( request: AddressFilterModel, session: Session = Depends(get_db) # type: ignore # noqa ) -> Page[UserOut]: diff --git a/lib/tenants/src/dependency.py b/lib/tenants/src/dependency.py index bf09e684a..d12a98a46 100644 --- a/lib/tenants/src/dependency.py +++ b/lib/tenants/src/dependency.py @@ -27,7 +27,9 @@ def __init__( """ self.key = key self.algorithm = self._check_algorithm(algorithm) - self.jwk_client: jwt.PyJWKClient = jwt.PyJWKClient(self._create_url(url)) + self.jwk_client: jwt.PyJWKClient = jwt.PyJWKClient( + self._create_url(url) + ) async def __call__(self, request: Request) -> TenantData: authorization: str = request.headers.get("Authorization") @@ -57,7 +59,9 @@ async def __call__(self, request: Request) -> TenantData: tenants = decoded.get("tenants") if decoded.get("clientId") == "pipelines": - return TenantData(token=token, user_id=sub, roles=roles, tenants=tenants) + return TenantData( + token=token, user_id=sub, roles=roles, tenants=tenants + ) if not (sub and roles and tenants): raise HTTPException( @@ -71,7 +75,9 @@ async def __call__(self, request: Request) -> TenantData: detail="X-Current-Tenant not in jwt tenants!", ) - return TenantData(token=token, user_id=sub, roles=roles, tenants=tenants) + return TenantData( + token=token, user_id=sub, roles=roles, tenants=tenants + ) def decode_hs256(self, token: str) -> Dict[str, Any]: try: @@ -91,7 +97,9 @@ def decode_hs256(self, token: str) -> Dict[str, Any]: def decode_rs256(self, token: str) -> Dict[str, Any]: try: signing_key = self.jwk_client.get_signing_key_from_jwt(token) - decoded = jwt.decode(token, signing_key.key, algorithms=[self.algorithm]) + decoded = jwt.decode( + token, signing_key.key, algorithms=[self.algorithm] + ) except jwt.ExpiredSignatureError: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, @@ -107,7 +115,9 @@ def decode_rs256(self, token: str) -> Dict[str, Any]: @staticmethod def _check_algorithm(alg: str) -> str: if alg not in SupportedAlgorithms.members(): - raise ValueError(f"Available algorithms {SupportedAlgorithms.members()}") + raise ValueError( + f"Available algorithms {SupportedAlgorithms.members()}" + ) return alg @staticmethod @@ -157,5 +167,7 @@ def get_tenant_info( debug: If True button 'Authorize' will be rendered on Swagger. """ if debug: - return TenantDependencyDocs(key, algorithm, url, scheme_name, description) + return TenantDependencyDocs( + key, algorithm, url, scheme_name, description + ) return TenantDependencyBase(key, algorithm, url) diff --git a/lib/tenants/tests/conftest.py b/lib/tenants/tests/conftest.py index 877b9dbf1..4132b7027 100644 --- a/lib/tenants/tests/conftest.py +++ b/lib/tenants/tests/conftest.py @@ -24,7 +24,9 @@ def get_key(filename: str) -> str: @pytest.fixture def mock_jwk_client(): - with patch("src.dependency.jwt.PyJWKClient.__init__", return_value=None) as mock: + with patch( + "src.dependency.jwt.PyJWKClient.__init__", return_value=None + ) as mock: yield mock @@ -139,7 +141,9 @@ def token_mock_hs256(): "realm_access": {"roles": ["role-annotator"]}, "tenants": ["tenant1", "epam"], } - token = create_access_token(data=payload, secret=SECRET_KEY, expires_delta=15) + token = create_access_token( + data=payload, secret=SECRET_KEY, expires_delta=15 + ) yield token @@ -150,7 +154,9 @@ def expired_token_mock_hs256(): "realm_access": {"roles": ["role-annotator"]}, "tenants": ["tenant1", "epam"], } - token = create_access_token(data=payload, secret=SECRET_KEY, expires_delta=-15) + token = create_access_token( + data=payload, secret=SECRET_KEY, expires_delta=-15 + ) yield token @@ -161,7 +167,9 @@ def wrong_data_token_mock_hs256(): "realm_access": {"roles": ["role-annotator"]}, "qtenants": ["tenant1"], } - token = create_access_token(data=payload, secret=SECRET_KEY, expires_delta=15) + token = create_access_token( + data=payload, secret=SECRET_KEY, expires_delta=15 + ) yield token diff --git a/lib/tenants/tests/test_dependency_rs256.py b/lib/tenants/tests/test_dependency_rs256.py index 259c0383e..b7f8280dc 100644 --- a/lib/tenants/tests/test_dependency_rs256.py +++ b/lib/tenants/tests/test_dependency_rs256.py @@ -97,7 +97,9 @@ def test_client_token_positive(client_token_mock_rs256, test_app_rs256): assert res.json() == response_body -def test_wrong_client_token_data(wrong_client_token_mock_rs256, test_app_rs256): +def test_wrong_client_token_data( + wrong_client_token_mock_rs256, test_app_rs256 +): headers = { "Authorization": f"Bearer {wrong_client_token_mock_rs256}", "X-Current-Tenant": CURRENT_TENANT, diff --git a/lib/tenants/tests/test_schema.py b/lib/tenants/tests/test_schema.py index 7a77912cc..c79257cec 100644 --- a/lib/tenants/tests/test_schema.py +++ b/lib/tenants/tests/test_schema.py @@ -35,7 +35,9 @@ ) def test_tenant_data_positive(token, user_id, roles, tenants, expected_result): assert ( - TenantData(token=token, user_id=user_id, roles=roles, tenants=tenants).dict() + TenantData( + token=token, user_id=user_id, roles=roles, tenants=tenants + ).dict() == expected_result ) diff --git a/models/alembic/env.py b/models/alembic/env.py index ac145e51f..040ba1808 100644 --- a/models/alembic/env.py +++ b/models/alembic/env.py @@ -71,7 +71,9 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, target_metadata=target_metadata + ) with context.begin_transaction(): context.run_migrations() diff --git a/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py b/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py index 7a87b4f50..a3e04f982 100644 --- a/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py +++ b/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py @@ -19,7 +19,9 @@ def upgrade(): op.add_column( "model", - sa.Column("description", sa.VARCHAR(), server_default="", nullable=False), + sa.Column( + "description", sa.VARCHAR(), server_default="", nullable=False + ), ) diff --git a/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py b/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py index ca4bb3875..79a891592 100644 --- a/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py +++ b/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py @@ -18,8 +18,12 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column("basement", sa.Column("key_script", sa.String(), nullable=True)) - op.add_column("basement", sa.Column("key_archive", sa.String(), nullable=True)) + op.add_column( + "basement", sa.Column("key_script", sa.String(), nullable=True) + ) + op.add_column( + "basement", sa.Column("key_archive", sa.String(), nullable=True) + ) # ### end Alembic commands ### diff --git a/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py b/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py index fa36dda40..f4f40b469 100644 --- a/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py +++ b/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py @@ -18,7 +18,9 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column("training", sa.Column("key_archive", sa.String(), nullable=True)) + op.add_column( + "training", sa.Column("key_archive", sa.String(), nullable=True) + ) # ### end Alembic commands ### diff --git a/models/alembic/versions/683f401ed33e_create_tables.py b/models/alembic/versions/683f401ed33e_create_tables.py index 93c90142c..a1829eb95 100644 --- a/models/alembic/versions/683f401ed33e_create_tables.py +++ b/models/alembic/versions/683f401ed33e_create_tables.py @@ -46,7 +46,9 @@ def upgrade() -> None: sa.Column("created_by", sa.String()), sa.Column("created_at", sa.DateTime()), sa.Column("tenant", sa.String(length=100)), - sa.ForeignKeyConstraint(["basement"], ["basement.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint( + ["basement"], ["basement.id"], ondelete="CASCADE" + ), sa.PrimaryKeyConstraint("id"), ) op.create_table( @@ -75,7 +77,9 @@ def upgrade() -> None: sa.Column("created_by", sa.String()), sa.Column("created_at", sa.DateTime()), sa.Column("tenant", sa.String(length=100)), - sa.ForeignKeyConstraint(["basement"], ["basement.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint( + ["basement"], ["basement.id"], ondelete="CASCADE" + ), sa.ForeignKeyConstraint( ["training_id"], ["training.id"], diff --git a/models/alembic/versions/826680104247_pod_limits_column.py b/models/alembic/versions/826680104247_pod_limits_column.py index e1ef44ea0..770f778d9 100644 --- a/models/alembic/versions/826680104247_pod_limits_column.py +++ b/models/alembic/versions/826680104247_pod_limits_column.py @@ -23,7 +23,9 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column( "basement", - sa.Column("limits", postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column( + "limits", postgresql.JSON(astext_type=sa.Text()), nullable=True + ), ) default_limits = { @@ -46,7 +48,8 @@ def upgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits = '{dumps(dod_limits)}' " f"WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits = '{dumps(dod_limits)}' " + f"WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits = '{dumps(table_extractor_limits)}' " diff --git a/models/alembic/versions/8fd15e9edd28_pod_cpu_limits_change.py b/models/alembic/versions/8fd15e9edd28_pod_cpu_limits_change.py index eece05c2f..5314a583c 100644 --- a/models/alembic/versions/8fd15e9edd28_pod_cpu_limits_change.py +++ b/models/alembic/versions/8fd15e9edd28_pod_cpu_limits_change.py @@ -38,7 +38,8 @@ def upgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits = " f"'{dumps(dod_limits)}' WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits = " + f"'{dumps(dod_limits)}' WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits = " @@ -75,7 +76,8 @@ def downgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits =" f" '{dumps(dod_limits)}' WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits =" + f" '{dumps(dod_limits)}' WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits =" diff --git a/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py b/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py index 4aa952e83..3f83181a2 100644 --- a/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py +++ b/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py @@ -19,7 +19,9 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column("training", sa.Column("jobs", sa.ARRAY(sa.Integer()), nullable=True)) + op.add_column( + "training", sa.Column("jobs", sa.ARRAY(sa.Integer()), nullable=True) + ) op.drop_column("training", "datasets_ids") op.drop_column("training", "files_ids") op.drop_column("basement", "supported_args") diff --git a/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py b/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py index 585c6ab66..f6d970c46 100644 --- a/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py +++ b/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py @@ -19,7 +19,9 @@ def upgrade(): op.add_column( "model", - sa.Column("latest", sa.Boolean(), nullable=False, server_default="True"), + sa.Column( + "latest", sa.Boolean(), nullable=False, server_default="True" + ), ) op.alter_column("model", "latest", server_default=None) diff --git a/models/alembic/versions/b5d7e85a73c2_set_basement_concurrency_limits.py b/models/alembic/versions/b5d7e85a73c2_set_basement_concurrency_limits.py index 6ed22f8da..844bef4f4 100644 --- a/models/alembic/versions/b5d7e85a73c2_set_basement_concurrency_limits.py +++ b/models/alembic/versions/b5d7e85a73c2_set_basement_concurrency_limits.py @@ -41,7 +41,8 @@ def upgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits = '{dumps(dod_limits)}' " "WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits = '{dumps(dod_limits)}' " + "WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits = '{dumps(table_extractor_limits)}' " @@ -74,7 +75,8 @@ def downgrade(): op.execute(f"UPDATE basement SET limits = '{dumps(default_limits)}'") op.execute( - f"UPDATE basement SET limits = '{dumps(dod_limits)}' " "WHERE id LIKE '%dod%'" + f"UPDATE basement SET limits = '{dumps(dod_limits)}' " + "WHERE id LIKE '%dod%'" ) op.execute( f"UPDATE basement SET limits = '{dumps(table_extractor_limits)}' " diff --git a/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py b/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py index 98bfc632c..a826bbd03 100644 --- a/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py +++ b/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py @@ -18,7 +18,9 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column("model", sa.Column("type", sa.String(length=100), nullable=True)) + op.add_column( + "model", sa.Column("type", sa.String(length=100), nullable=True) + ) # ### end Alembic commands ### diff --git a/models/models/colab_ssh_utils.py b/models/models/colab_ssh_utils.py index e5bc3166d..587df88bf 100644 --- a/models/models/colab_ssh_utils.py +++ b/models/models/colab_ssh_utils.py @@ -97,7 +97,9 @@ def local_mount_colab_drive( ) -def sync_colab_with_minio(temp_directory: str, tenant: str, training_id: int) -> None: +def sync_colab_with_minio( + temp_directory: str, tenant: str, training_id: int +) -> None: syn_command = ( f"aws --endpoint-url http://{MINIO_HOST} s3 sync {temp_directory} " f"s3://{tenant}/trainings/{training_id}/results/ --delete" diff --git a/models/models/crud.py b/models/models/crud.py index 0c349f3ee..6b00da2a0 100644 --- a/models/models/crud.py +++ b/models/models/crud.py @@ -99,7 +99,9 @@ def get_latest_model(session: Session, model_id: str) -> Optional[Model]: ) -def get_second_latest_model(session: Session, model_id: str) -> Optional[Model]: +def get_second_latest_model( + session: Session, model_id: str +) -> Optional[Model]: """ Find second model by desc version """ diff --git a/models/models/errors.py b/models/models/errors.py index 4a41d2a41..11d7bde59 100644 --- a/models/models/errors.py +++ b/models/models/errors.py @@ -17,14 +17,18 @@ def __init__(self, message: str): self.message = message -def botocore_error_handler(request: Request, exc: BotoCoreError) -> JSONResponse: +def botocore_error_handler( + request: Request, exc: BotoCoreError +) -> JSONResponse: return JSONResponse( status_code=500, content={"detail": f"Error: connection error ({exc})"}, ) -def minio_client_error_handler(request: Request, exc: ClientError) -> JSONResponse: +def minio_client_error_handler( + request: Request, exc: ClientError +) -> JSONResponse: return JSONResponse( status_code=500, content={"detail": f"Error: client error ({exc})"}, @@ -40,7 +44,9 @@ def minio_no_such_bucket_error_handler( ) -def ssh_connection_error_handler(request: Request, exc: SSHException) -> JSONResponse: +def ssh_connection_error_handler( + request: Request, exc: SSHException +) -> JSONResponse: return JSONResponse( status_code=500, content={"detail": f"Error: ssh connection error ({exc})"}, @@ -56,7 +62,9 @@ def colab_execution_error_handler( ) -def sqlalchemy_db_error_handler(request: Request, exc: SQLAlchemyError) -> JSONResponse: +def sqlalchemy_db_error_handler( + request: Request, exc: SQLAlchemyError +) -> JSONResponse: return JSONResponse( status_code=500, content={"detail": f"Error: connection error ({exc})"}, diff --git a/models/models/routers/basements_routers.py b/models/models/routers/basements_routers.py index 1ba636746..aa13294f3 100644 --- a/models/models/routers/basements_routers.py +++ b/models/models/routers/basements_routers.py @@ -198,7 +198,9 @@ def upload_files_to_object_storage( bucket_name = convert_bucket_name_if_s3prefix(x_current_tenant) basement = crud.get_instance(session, Basement, basement_id) if not basement: - LOGGER.info("upload_script_to_minio got not existing id %s", basement_id) + LOGGER.info( + "upload_script_to_minio got not existing id %s", basement_id + ) raise HTTPException(status_code=404, detail="Not existing basement") try: s3_resource = get_minio_resource(tenant=bucket_name) diff --git a/models/models/routers/deployed_models_routers.py b/models/models/routers/deployed_models_routers.py index 8a7f41267..22b0c4f34 100644 --- a/models/models/routers/deployed_models_routers.py +++ b/models/models/routers/deployed_models_routers.py @@ -92,7 +92,9 @@ def get_deployed_model_by_name( return schemas.DeployedModelDetails( apiVersion=model["apiVersion"], datetime_creation=str( - datetime.strptime(metadata["creationTimestamp"], "%Y-%m-%dT%H:%M:%SZ") + datetime.strptime( + metadata["creationTimestamp"], "%Y-%m-%dT%H:%M:%SZ" + ) ), model_id=metadata["generation"], model_name=metadata["name"], diff --git a/models/models/routers/models_routers.py b/models/models/routers/models_routers.py index b931bcc69..7058dfa41 100644 --- a/models/models/routers/models_routers.py +++ b/models/models/routers/models_routers.py @@ -126,7 +126,9 @@ def search_models( }, }, ) -def get_model_by_id(model_id: str, session: Session = Depends(get_db)) -> Model: +def get_model_by_id( + model_id: str, session: Session = Depends(get_db) +) -> Model: query = crud.get_latest_model(session, model_id) if not query: LOGGER.error("Get_model_by_id get not existing id %s", model_id) @@ -154,7 +156,8 @@ def get_model_by_id_and_version( model = crud.get_instance(session, Model, (model_id, version)) if not model: LOGGER.error( - "Get_model_by_id get not existing model with " "id: %s, version: %d", + "Get_model_by_id get not existing model with " + "id: %s, version: %d", model_id, version, ) @@ -198,7 +201,9 @@ def update_model( if request.training_id and not crud.is_id_existing( session, Training, request.training_id ): - LOGGER.info("Update_model get not existing training id %s", request.training_id) + LOGGER.info( + "Update_model get not existing training id %s", request.training_id + ) raise HTTPException(status_code=404, detail="Not existing training") modified_model = crud.modify_instance(session, model, request) @@ -246,7 +251,9 @@ def update_model_by_id_and_version( if request.training_id and not crud.is_id_existing( session, Training, request.training_id ): - LOGGER.info("Update_model get not existing training id %s", request.training_id) + LOGGER.info( + "Update_model get not existing training id %s", request.training_id + ) raise HTTPException(status_code=404, detail="Not existing training") modified_model = crud.modify_instance(session, model, request) @@ -350,7 +357,9 @@ def deploy_model( schemas.StatusEnum.READY.value, schemas.StatusEnum.DEPLOYED.value, ) - LOGGER.info("Deploy_model get id of already deployed model %s", model.id) + LOGGER.info( + "Deploy_model get id of already deployed model %s", model.id + ) raise HTTPException( status_code=409, detail=f"Model {model.id} has already been deployed", @@ -399,13 +408,17 @@ def deploy_model_by_id_and_version( schemas.StatusEnum.READY.value, schemas.StatusEnum.DEPLOYED.value, ) - LOGGER.info("Deploy_model get id of already deployed model %s", model.id) + LOGGER.info( + "Deploy_model get id of already deployed model %s", model.id + ) raise HTTPException( status_code=409, detail=f"Model {model.id} has already been deployed", ) - LOGGER.info("Deploying model with " "id: %s, version: %d", model_id, version) + LOGGER.info( + "Deploying model with " "id: %s, version: %d", model_id, version + ) utils.deploy(session, model) return {"msg": f"Model {model_id} with version {version} is deploying"} @@ -446,7 +459,9 @@ def undeploy_model( return {"msg": f"Model {model.id} is undeployed"} if utils.undeploy(session, model): return {"msg": f"Model {model.id} is undeployed"} - raise HTTPException(status_code=409, detail=f"Failed to undeploy model {model.id}") + raise HTTPException( + status_code=409, detail=f"Failed to undeploy model {model.id}" + ) @router.post( @@ -475,7 +490,8 @@ def undeploy_model_by_id_and_version( model = crud.get_instance(session, Model, (model_id, version)) if not model: LOGGER.info( - "Undeploy_model get not existing model with " "id: %s, version: %d", + "Undeploy_model get not existing model with " + "id: %s, version: %d", model_id, version, ) @@ -493,5 +509,6 @@ def undeploy_model_by_id_and_version( return {"msg": f"Model {model.id} is undeployed"} raise HTTPException( status_code=409, - detail=f"Failed to undeploy model {model_id} " f"with version {version}", + detail=f"Failed to undeploy model {model_id} " + f"with version {version}", ) diff --git a/models/models/routers/training_routers.py b/models/models/routers/training_routers.py index 8bc186c50..3a1c19135 100644 --- a/models/models/routers/training_routers.py +++ b/models/models/routers/training_routers.py @@ -238,7 +238,9 @@ def delete_training_by_id( bucket_name, ) raise HTTPException(status_code=500, detail=str(err)) - s3_resource.meta.client.delete_object(Bucket=bucket_name, Key=training.key_archive) + s3_resource.meta.client.delete_object( + Bucket=bucket_name, Key=training.key_archive + ) crud.delete_instance(session, training) LOGGER.info("Training %d was deleted", request.id) return {"msg": "Training was deleted"} @@ -272,7 +274,9 @@ def prepare_annotation_dataset( if not training: LOGGER.info("Prepare dataset get not existing id %s", training_id) raise HTTPException(status_code=404, detail="Not existing training") - minio_path = prepare_dataset_info(convert_request, x_current_tenant, token.token) + minio_path = prepare_dataset_info( + convert_request, x_current_tenant, token.token + ) training.key_annotation_dataset = minio_path session.commit() LOGGER.info("Dataset creation for training %s is started", training_id) @@ -339,7 +343,9 @@ def start_training( with connect_colab(credentials) as ssh_client: bucket = convert_bucket_name_if_s3prefix(x_current_tenant) file_script, size_script = get_minio_object(bucket, key_script) - upload_file_to_colab(ssh_client, file_script, size_script, TRAINING_SCRIPT_NAME) + upload_file_to_colab( + ssh_client, file_script, size_script, TRAINING_SCRIPT_NAME + ) file_dataset, size_dataset = get_minio_object(bucket, key_dataset) upload_file_to_colab( ssh_client, file_dataset, size_dataset, ANNOTATION_DATASET_NAME @@ -388,7 +394,9 @@ def download_training_results( bucket_name = convert_bucket_name_if_s3prefix(x_current_tenant) training_exists = crud.is_id_existing(session, Training, training_id) if not training_exists: - LOGGER.info("Download_training_results get not existing id %s", training_id) + LOGGER.info( + "Download_training_results get not existing id %s", training_id + ) raise HTTPException(status_code=404, detail="Not existing training") home_directory = pathlib.Path.home() check_aws_credentials_file(home_directory) diff --git a/models/models/schemas.py b/models/models/schemas.py index a3f020525..1fd2e62e1 100644 --- a/models/models/schemas.py +++ b/models/models/schemas.py @@ -106,7 +106,9 @@ class Model(ModelWithId): "it has been deployed already", example="ready", ) - created_by: str = Field(description="Author who has created model", example="901") + created_by: str = Field( + description="Author who has created model", example="901" + ) created_at: datetime = Field(example="2021-11-09T17:09:43.101004") tenant: str = Field(description="Author's tenant", example="tenant1") latest: bool = Field( @@ -130,7 +132,9 @@ class BasementBase(BaseModel): description="Unique name of docker image to build and run", example="custom:v1.1", ) - name: str = Field(title="Human readable name", example="some describing name") + name: str = Field( + title="Human readable name", example="some describing name" + ) supported_args: Optional[List[Dict[str, Any]]] = Field( example=[ { @@ -229,7 +233,9 @@ class HeaderResponse(BaseModel): class DeployedModelMainData(BaseModel): datetime_creation: str = Field(example="2021-11-09T17:09:43.101004") - status: str = Field(description="Model status, it's running or not", example=True) + status: str = Field( + description="Model status, it's running or not", example=True + ) name: str = Field(description="Name of the model", example="my_model") url: str = Field(description="Model url with details information") @@ -329,8 +335,12 @@ class ConvertRequestSchema(BaseModel): class TrainingCredentials(BaseModel): user: str = Field(..., description="Colab username", example="root") - password: str = Field(..., description="Colab user password", example="SECRET") + password: str = Field( + ..., description="Colab user password", example="SECRET" + ) host: str = Field( ..., description="Ngrok host to connect colab", example="tcp.ngrok.io" ) - port: int = Field(..., description="Ngrok port to connect colab", example="12345") + port: int = Field( + ..., description="Ngrok port to connect colab", example="12345" + ) diff --git a/models/models/utils.py b/models/models/utils.py index 7163464c5..2148227a4 100644 --- a/models/models/utils.py +++ b/models/models/utils.py @@ -140,7 +140,9 @@ def create_ksvc( "value": config_path["file"], }, ], - "ports": [{"protocol": "TCP", "containerPort": 8000}], + "ports": [ + {"protocol": "TCP", "containerPort": 8000} + ], "resources": { "limits": { "cpu": pod_cpu_limit, @@ -373,7 +375,9 @@ def upload_to_object_storage( s3.upload_fileobj(Fileobj=obj, Key=file_path) except ClientError as err: if "404" in err.args[0]: - raise NoSuchTenant(f"Bucket for tenant {bucket_name} does not exist") + raise NoSuchTenant( + f"Bucket for tenant {bucket_name} does not exist" + ) raise diff --git a/models/tests/conftest.py b/models/tests/conftest.py index c357ff55f..35f041828 100644 --- a/models/tests/conftest.py +++ b/models/tests/conftest.py @@ -61,7 +61,9 @@ def overrided_token_client(client, db_session) -> TestClient: def moto_minio() -> boto3.resource: """Creates and returns moto resource for s3 (minio) with test Bucket.""" with mock_s3(): - minio_resource = boto3.resource("s3", config=Config(signature_version="s3v4")) + minio_resource = boto3.resource( + "s3", config=Config(signature_version="s3v4") + ) minio_resource.create_bucket(Bucket=TEST_TENANT) yield minio_resource @@ -154,7 +156,9 @@ def setup_test_db(use_temp_env_var): @pytest.fixture(scope="module") def db_session(setup_test_db) -> Session: """yields SQLAlchemy session""" - session_local = sessionmaker(autocommit=False, autoflush=False, bind=engine) + session_local = sessionmaker( + autocommit=False, autoflush=False, bind=engine + ) session = session_local() yield session diff --git a/models/tests/test_basement_routers.py b/models/tests/test_basement_routers.py index af8ccc680..6756b340f 100644 --- a/models/tests/test_basement_routers.py +++ b/models/tests/test_basement_routers.py @@ -27,11 +27,13 @@ def test_create_basement(exist, create): create.return_value = {"msg": "expected"} token = Mock() token.user_id.return_value = "token" - assert basements_routers.create_new_basement(data, "session", token, "tenant") == { - "msg": "expected" - } + assert basements_routers.create_new_basement( + data, "session", token, "tenant" + ) == {"msg": "expected"} exist.assert_called_once_with("session", Basement, "id") - create.assert_called_once_with("session", Basement, data, token.user_id, "tenant") + create.assert_called_once_with( + "session", Basement, data, token.user_id, "tenant" + ) @patch.object(basements_routers.crud, "create_instance") @@ -82,7 +84,9 @@ def test_get_basement_by_id_withot_basement(get): @patch.object(basements_routers.crud, "delete_instance") @patch.object(basements_routers.crud, "get_instance") def test_delete_basement_by_id(delete, get, client, monkeypatch): - monkeypatch.setattr("models.routers.basements_routers.get_minio_resource", Mock()) + monkeypatch.setattr( + "models.routers.basements_routers.get_minio_resource", Mock() + ) data = {"id": "id"} get.return_value = "expected" response = client.delete("/basements/delete", data=json.dumps(data)) @@ -93,7 +97,9 @@ def test_delete_basement_by_id(delete, get, client, monkeypatch): @patch.object(basements_routers.crud, "delete_instance") @patch.object(basements_routers.crud, "get_instance") def test_delete_basement_by_id_calls_crud(delete, get, monkeypatch): - monkeypatch.setattr("models.routers.basements_routers.get_minio_resource", Mock()) + monkeypatch.setattr( + "models.routers.basements_routers.get_minio_resource", Mock() + ) data = basements_routers.schemas.BasementDelete(id="id") get.return_value = "expected" basements_routers.delete_basement_by_id(data, "session") diff --git a/models/tests/test_colab_start_training.py b/models/tests/test_colab_start_training.py index 5f468a1d5..0e02fde52 100644 --- a/models/tests/test_colab_start_training.py +++ b/models/tests/test_colab_start_training.py @@ -136,7 +136,9 @@ def test_start_training_no_such_bucket_error( "models.utils.boto3.resource", Mock(return_value=moto_minio), ) - monkeypatch.setattr("models.routers.training_routers.connect_colab", MockSSHContext) + monkeypatch.setattr( + "models.routers.training_routers.connect_colab", MockSSHContext + ) response = overrided_token_client.post( START_TRAINING_PATH.format(EXIST_TRAINING_ID), json=TEST_CREDENTIALS, @@ -165,7 +167,9 @@ def test_start_training_boto3_error( "models.routers.training_routers.get_minio_object", Mock(side_effect=BotoCoreError()), ) - monkeypatch.setattr("models.routers.training_routers.connect_colab", MockSSHContext) + monkeypatch.setattr( + "models.routers.training_routers.connect_colab", MockSSHContext + ) response = overrided_token_client.post( START_TRAINING_PATH.format(EXIST_TRAINING_ID), json=TEST_CREDENTIALS, @@ -201,7 +205,9 @@ def test_start_training_integration( "models.utils.boto3.resource", Mock(return_value=save_start_training_minio_objects), ) - monkeypatch.setattr("models.routers.training_routers.connect_colab", MockSSHContext) + monkeypatch.setattr( + "models.routers.training_routers.connect_colab", MockSSHContext + ) response = overrided_token_client.post( START_TRAINING_PATH.format(EXIST_TRAINING_ID), json=TEST_CREDENTIALS, diff --git a/models/tests/test_crud.py b/models/tests/test_crud.py index f53b927fd..8a097ad01 100644 --- a/models/tests/test_crud.py +++ b/models/tests/test_crud.py @@ -30,7 +30,9 @@ def test_is_id_existing_queries_db_and_calls_filter(): def test_create_instance_calls_add_and_commit_and_returns_id(): session = Mock() - basement = BasementBase(id="id", name="name", gpu_support=True, limits=TEST_LIMITS) + basement = BasementBase( + id="id", name="name", gpu_support=True, limits=TEST_LIMITS + ) crud.create_instance(session, Basement, basement, "author", "tenant") session.add.assert_called_once() session.commit.assert_called_once() @@ -48,7 +50,9 @@ def test_get_instance_queries_db_calls_get_and_returns_result_of_get(): def test_modify_instance_calls_commit(): session = Mock() - basement = BasementBase(id="id", name="name", gpu_support=True, limits=TEST_LIMITS) + basement = BasementBase( + id="id", name="name", gpu_support=True, limits=TEST_LIMITS + ) crud.get_instance = Mock(return_value="expected") crud.modify_instance(session, Basement, basement) session.commit.assert_called_once() diff --git a/models/tests/test_models_routers.py b/models/tests/test_models_routers.py index 4a57b9f57..893f273c3 100644 --- a/models/tests/test_models_routers.py +++ b/models/tests/test_models_routers.py @@ -269,7 +269,9 @@ def test_deploy_model_with_wrong_type(get, client): @patch.object(models_routers.crud, "modify_status") @patch.object(models_routers.utils, "is_model_deployed") @patch.object(models_routers.crud, "get_latest_model") -def test_deploy_already_deployed_model_returns_409(get, is_deployed, modify, client): +def test_deploy_already_deployed_model_returns_409( + get, is_deployed, modify, client +): data = {"id": "id"} models_routers.get_db = Mock() query = models_routers.schemas.ModelId(id="id") @@ -283,7 +285,9 @@ def test_deploy_already_deployed_model_returns_409(get, is_deployed, modify, cli @patch.object(models_routers.crud, "modify_status") @patch.object(models_routers.utils, "is_model_deployed") @patch.object(models_routers.crud, "get_latest_model") -def test_deploy_already_deployed_model_modifies_status(get, is_deployed, modify): +def test_deploy_already_deployed_model_modifies_status( + get, is_deployed, modify +): data = models_routers.schemas.ModelId(id="id") models_routers.get_db = Mock() get.return_value = data @@ -312,7 +316,9 @@ def test_deploy_model_in_positive_case(get, is_deployed, deploy, client): @patch.object(models_routers.crud, "modify_status") @patch.object(models_routers.utils, "is_model_deployed") @patch.object(models_routers.crud, "get_latest_model") -def test_deploy_model_without_modifying_status(get, is_deployed, modify, deploy): +def test_deploy_model_without_modifying_status( + get, is_deployed, modify, deploy +): data = models_routers.schemas.ModelId(id="id") models_routers.get_db = Mock() get.return_value = data @@ -395,7 +401,9 @@ def test_undeploy_already_undeployed_model_modifies_status( @patch.object(models_routers.crud, "modify_status") @patch.object(models_routers.utils, "is_model_deployed") @patch.object(models_routers.crud, "get_latest_model") -def test_undeploy_model_calls_undeploying_function(get, is_deployed, modify, undeploy): +def test_undeploy_model_calls_undeploying_function( + get, is_deployed, modify, undeploy +): data = models_routers.schemas.ModelId(id="id") models_routers.get_db = Mock() get.return_value = data diff --git a/models/tests/test_schemas.py b/models/tests/test_schemas.py index 188b9c3d9..5bd2e7cd7 100644 --- a/models/tests/test_schemas.py +++ b/models/tests/test_schemas.py @@ -7,7 +7,9 @@ def test_empty_id_in_modelbase_raises_error(): minio_path = {"file": "file", "bucket": "bucket"} - with pytest.raises(ValidationError, match="this value has at least 1 characters"): + with pytest.raises( + ValidationError, match="this value has at least 1 characters" + ): schemas.ModelWithId( id="", name="name", @@ -107,17 +109,23 @@ def test_validation_of_model_id(): def test_empty_id_in_basementbase_raises_error(): - with pytest.raises(ValidationError, match="this value has at least 1 characters"): + with pytest.raises( + ValidationError, match="this value has at least 1 characters" + ): schemas.BasementBase(id="", name="base", gpu_support=True) def test_not_empty_id_in_basementbase_does_not_raise_error(): - schemas.BasementBase(id="1", name="base", gpu_support=True, limits=TEST_LIMITS) + schemas.BasementBase( + id="1", name="base", gpu_support=True, limits=TEST_LIMITS + ) def test_validation_of_bucket_in_minio_path(): underscore_bucket = "bucket_" - with pytest.raises(ValidationError, match="Bucket cannot contain underscores"): + with pytest.raises( + ValidationError, match="Bucket cannot contain underscores" + ): schemas.MinioPath(bucket=underscore_bucket, file="file") correct_bucket = "bucket" schemas.MinioPath(bucket=correct_bucket, file="file") diff --git a/models/tests/test_trainings_routers.py b/models/tests/test_trainings_routers.py index 0fad4202d..42e148111 100644 --- a/models/tests/test_trainings_routers.py +++ b/models/tests/test_trainings_routers.py @@ -51,11 +51,13 @@ def test_create_training_in_positive_case(exist, create, _get): create.return_value = {"id": "id"} token = Mock() token.user_id.return_value = "token" - assert training_routers.create_new_training(data, "session", token, "tenant") == { - "id": "id" - } + assert training_routers.create_new_training( + data, "session", token, "tenant" + ) == {"id": "id"} exist.assert_called_once_with("session", Basement, "basement") - create.assert_called_once_with("session", Training, data, token.user_id, "tenant") + create.assert_called_once_with( + "session", Training, data, token.user_id, "tenant" + ) @patch.object(training_routers.crud, "create_instance") diff --git a/models/tests/test_utils.py b/models/tests/test_utils.py index 5496f0a94..187efbe12 100644 --- a/models/tests/test_utils.py +++ b/models/tests/test_utils.py @@ -172,7 +172,10 @@ def test_put_object_via_presigned_url(moto_minio, monkeypatch): minio_response = requests.put(presigned_url, json=test_data) assert minio_response.status_code == 200 minio_object = ( - moto_minio.Object(TEST_TENANT, key).get()["Body"].read().decode("utf-8") + moto_minio.Object(TEST_TENANT, key) + .get()["Body"] + .read() + .decode("utf-8") ) assert json.loads(minio_object) == test_data @@ -456,7 +459,9 @@ def test_get_pods_with_terminating_status(): utils.client = Mock() utils.client.CoreV1Api.return_value = api Pods = namedtuple("Pods", {"items"}) - Metadata = namedtuple("Metadata", ("deletion_timestamp", "name", "namespace")) + Metadata = namedtuple( + "Metadata", ("deletion_timestamp", "name", "namespace") + ) Status = namedtuple("Status", ("start_time", "container_statuses")) Container = namedtuple("Container", ("name")) container = Container("name") @@ -484,8 +489,12 @@ def test_get_pods_with_running_status(): utils.client = Mock() utils.client.CoreV1Api.return_value = api Pods = namedtuple("Pods", {"items"}) - Metadata = namedtuple("Metadata", ("deletion_timestamp", "name", "namespace")) - Status = namedtuple("Status", ("start_time", "container_statuses", "phase")) + Metadata = namedtuple( + "Metadata", ("deletion_timestamp", "name", "namespace") + ) + Status = namedtuple( + "Status", ("start_time", "container_statuses", "phase") + ) Pod = namedtuple("Pod", ("metadata", "status")) Container = namedtuple("Container", ("name")) container = Container("name") @@ -516,7 +525,9 @@ def test_get_minio_object_wrong_tenant(monkeypatch, moto_minio) -> None: Mock(return_value=moto_minio), ) wrong_tenant = "wrong_tenant" - with pytest.raises(NoSuchTenant, match=f"Bucket {wrong_tenant} does not exist"): + with pytest.raises( + NoSuchTenant, match=f"Bucket {wrong_tenant} does not exist" + ): utils.get_minio_object(wrong_tenant, "file/file.txt") diff --git a/pipelines/alembic/env.py b/pipelines/alembic/env.py index 11756dd6b..f27c17a18 100644 --- a/pipelines/alembic/env.py +++ b/pipelines/alembic/env.py @@ -70,7 +70,9 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, target_metadata=target_metadata + ) with context.begin_transaction(): context.run_migrations() diff --git a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py index d6605ec1d..690dc25c3 100644 --- a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py +++ b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py @@ -20,9 +20,9 @@ def upgrade() -> None: session = orm.Session(bind=op.get_bind()) - session.query(models.Pipeline).filter(models.Pipeline.type.is_(None)).update( - {models.Pipeline.type: "inference"}, synchronize_session="fetch" - ) + session.query(models.Pipeline).filter( + models.Pipeline.type.is_(None) + ).update({models.Pipeline.type: "inference"}, synchronize_session="fetch") session.commit() session.close() diff --git a/pipelines/alembic/versions/29f072fb5c9c_.py b/pipelines/alembic/versions/29f072fb5c9c_.py index 917fcd844..88b3be178 100644 --- a/pipelines/alembic/versions/29f072fb5c9c_.py +++ b/pipelines/alembic/versions/29f072fb5c9c_.py @@ -39,5 +39,7 @@ def downgrade() -> None: op.f("ix_pipeline_execution_task_job_id"), table_name="pipeline_execution_task", ) - op.drop_index(op.f("ix_execution_step_task_id"), table_name="execution_step") + op.drop_index( + op.f("ix_execution_step_task_id"), table_name="execution_step" + ) # ### end Alembic commands ### diff --git a/pipelines/alembic/versions/5fd9d1fdcf5b_init.py b/pipelines/alembic/versions/5fd9d1fdcf5b_init.py index ef357d737..2b62f9e81 100644 --- a/pipelines/alembic/versions/5fd9d1fdcf5b_init.py +++ b/pipelines/alembic/versions/5fd9d1fdcf5b_init.py @@ -58,7 +58,9 @@ def upgrade() -> None: sa.Column("job_id", sa.Integer(), nullable=True), sa.Column("runner_id", postgresql.UUID(), nullable=True), sa.Column("status", sa.String(length=30), nullable=True), - sa.ForeignKeyConstraint(["pipeline_id"], ["pipeline.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint( + ["pipeline_id"], ["pipeline.id"], ondelete="CASCADE" + ), sa.PrimaryKeyConstraint("id"), ) op.create_table( diff --git a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py index d67b33e39..e4e062c6d 100644 --- a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py +++ b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py @@ -23,7 +23,9 @@ def upgrade() -> None: "pipeline", sa.Column("original_pipeline_id", sa.Integer(), nullable=True), ) - op.add_column("pipeline", sa.Column("is_latest", sa.Boolean(), nullable=True)) + op.add_column( + "pipeline", sa.Column("is_latest", sa.Boolean(), nullable=True) + ) session = orm.Session(bind=op.get_bind()) rows = ( diff --git a/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py b/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py index 1dcd7539f..abefe8942 100644 --- a/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py +++ b/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py @@ -18,8 +18,12 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column("pipeline", sa.Column("type", sa.String(length=30), nullable=True)) - op.add_column("pipeline", sa.Column("description", sa.Text(), nullable=True)) + op.add_column( + "pipeline", sa.Column("type", sa.String(length=30), nullable=True) + ) + op.add_column( + "pipeline", sa.Column("description", sa.Text(), nullable=True) + ) op.add_column("pipeline", sa.Column("summary", sa.Text(), nullable=True)) # ### end Alembic commands ### diff --git a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py index 26e60bc04..6638259bf 100644 --- a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py +++ b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py @@ -28,7 +28,11 @@ def upgrade() -> None: session.close() session = orm.Session(bind=op.get_bind()) - rows = session.query(models.Pipeline).options(orm.load_only("id", "meta")).all() + rows = ( + session.query(models.Pipeline) + .options(orm.load_only("id", "meta")) + .all() + ) for row in rows: new_meta = dict(row.meta) new_meta["version"] = 1 @@ -79,7 +83,11 @@ def downgrade() -> None: session.close() session = orm.Session(bind=op.get_bind()) - rows = session.query(models.Pipeline).options(orm.load_only("id", "meta")).all() + rows = ( + session.query(models.Pipeline) + .options(orm.load_only("id", "meta")) + .all() + ) for row in rows: new_meta = dict(row.meta) new_meta["version"] = "v1" diff --git a/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py b/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py index ac9bfdcf2..bdeecba1b 100644 --- a/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py +++ b/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py @@ -20,10 +20,12 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column( - "execution_step", sa.Column("parent_step", postgresql.UUID(), nullable=True) + "execution_step", + sa.Column("parent_step", postgresql.UUID(), nullable=True), ) op.add_column( - "execution_step", sa.Column("tenant", sa.String(length=50), nullable=True) + "execution_step", + sa.Column("tenant", sa.String(length=50), nullable=True), ) # ### end Alembic commands ### diff --git a/pipelines/pipelines/app.py b/pipelines/pipelines/app.py index bac7f7ef3..17b8e5d77 100644 --- a/pipelines/pipelines/app.py +++ b/pipelines/pipelines/app.py @@ -203,7 +203,9 @@ async def get_task_by_id( task_id: int, session: Session = Depends(service.get_session) ) -> Any: """Get task by its id.""" - res = service.get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) + res = service.get_table_instance_by_id( + session, dbm.PipelineExecutionTask, task_id + ) if res: return res.as_dict() raise HTTPException(status_code=404, detail=NO_TASK) @@ -294,7 +296,9 @@ async def delete_task( task_id: int, session: Session = Depends(service.get_session) ) -> Dict[str, str]: """Delete task from db by its id.""" - res = service.get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) + res = service.get_table_instance_by_id( + session, dbm.PipelineExecutionTask, task_id + ) if res is None: raise HTTPException(status_code=404, detail=NO_TASK) service.delete_instances(session, [res]) @@ -312,7 +316,9 @@ async def get_task_steps_by_id( task_id: int, session: Session = Depends(service.get_session) ) -> List[Dict[str, str]]: """Get task steps by task id.""" - res = service.get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) + res = service.get_table_instance_by_id( + session, dbm.PipelineExecutionTask, task_id + ) if res is None: raise HTTPException(status_code=404, detail=NO_TASK) return [step.as_dict() for step in res.steps] diff --git a/pipelines/pipelines/config.py b/pipelines/pipelines/config.py index a78a02449..22d725e16 100644 --- a/pipelines/pipelines/config.py +++ b/pipelines/pipelines/config.py @@ -62,7 +62,8 @@ def get_version() -> str: KEYCLOAK_REALM = os.getenv("KEYCLOAK_REALM", "master") CLIENT_SECRET = os.getenv("CLIENT_SECRET", "") KEYCLOAK_TOKEN_URI = ( - f"{KEYCLOAK_URI}/auth/realms/{KEYCLOAK_REALM}" f"/protocol/openid-connect/token" + f"{KEYCLOAK_URI}/auth/realms/{KEYCLOAK_REALM}" + f"/protocol/openid-connect/token" ) # Kafka settings diff --git a/pipelines/pipelines/db/logger.py b/pipelines/pipelines/db/logger.py index ae628813f..3d5948da6 100644 --- a/pipelines/pipelines/db/logger.py +++ b/pipelines/pipelines/db/logger.py @@ -31,7 +31,9 @@ def log_after_insert( ) -> None: """Listen for the insert event and log to MainEventLog.""" log_ = create_log(schemas.Event.INS, target).dict() - stmt = insert(models.MainEventLog).values(runner_id=runner.runner_id, event=log_) + stmt = insert(models.MainEventLog).values( + runner_id=runner.runner_id, event=log_ + ) connection.execute(stmt) @@ -43,7 +45,9 @@ def log_after_delete( ) -> None: """Listen for the insert event and log to MainEventLog.""" log_ = create_log(schemas.Event.DEL, target).dict() - stmt = insert(models.MainEventLog).values(runner_id=runner.runner_id, event=log_) + stmt = insert(models.MainEventLog).values( + runner_id=runner.runner_id, event=log_ + ) connection.execute(stmt) @@ -57,6 +61,10 @@ def log_after_update(update_context) -> None: # type: ignore k.key: v.isoformat() if isinstance(v, datetime.datetime) else v for k, v in update_context.values.items() } - log_ = schemas.Log(entity=entity, event_type=schemas.Event.UPD, data=data).dict() - stmt = insert(models.MainEventLog).values(runner_id=runner.runner_id, event=log_) + log_ = schemas.Log( + entity=entity, event_type=schemas.Event.UPD, data=data + ).dict() + stmt = insert(models.MainEventLog).values( + runner_id=runner.runner_id, event=log_ + ) update_context.session.execute(stmt) diff --git a/pipelines/pipelines/db/models.py b/pipelines/pipelines/db/models.py index f0c6e136f..0611ebd7c 100644 --- a/pipelines/pipelines/db/models.py +++ b/pipelines/pipelines/db/models.py @@ -15,7 +15,9 @@ class Pipeline(Base): # type: ignore __tablename__ = "pipeline" id = sa.Column(sa.Integer, primary_key=True) - name = sa.Column(sa.String(50), nullable=False, default=lambda: str(uuid.uuid4())) + name = sa.Column( + sa.String(50), nullable=False, default=lambda: str(uuid.uuid4()) + ) version = sa.Column(sa.Integer, nullable=True) original_pipeline_id = sa.Column(sa.Integer, nullable=True) is_latest = sa.Column(sa.Boolean, default=True, nullable=True) @@ -59,7 +61,9 @@ class PipelineExecutionTask(Base): # type: ignore __tablename__ = "pipeline_execution_task" id = sa.Column(sa.Integer, primary_key=True) - name = sa.Column(sa.String(50), nullable=False, default=lambda: str(uuid.uuid4())) + name = sa.Column( + sa.String(50), nullable=False, default=lambda: str(uuid.uuid4()) + ) date = sa.Column(sa.DateTime, nullable=False, default=datetime.utcnow) pipeline_id = sa.Column( sa.Integer, @@ -108,7 +112,9 @@ class ExecutionStep(Base): # type: ignore index=True, nullable=False, ) - name = sa.Column(sa.String(50), nullable=False, default=lambda: str(uuid.uuid4())) + name = sa.Column( + sa.String(50), nullable=False, default=lambda: str(uuid.uuid4()) + ) step_id = sa.Column(UUID()) parent_step = sa.Column(UUID(), nullable=True) date = sa.Column( @@ -149,7 +155,9 @@ class ExecutorHeartbeat(Base): # type: ignore __tablename__ = "heartbeat" id = sa.Column(UUID(), primary_key=True, default=uuid.uuid4) - last_heartbeat = sa.Column(sa.DateTime, nullable=False, default=datetime.utcnow) + last_heartbeat = sa.Column( + sa.DateTime, nullable=False, default=datetime.utcnow + ) def __repr__(self) -> str: return ( diff --git a/pipelines/pipelines/db/service.py b/pipelines/pipelines/db/service.py index 25a218d02..c24ecf77f 100644 --- a/pipelines/pipelines/db/service.py +++ b/pipelines/pipelines/db/service.py @@ -68,7 +68,9 @@ def _add_instance(session: Session, instance: dbm.Table) -> Union[int, str]: add_pipelines = add_steps = add_tasks = _add_instances -def get_all_table_instances(session: Session, table: dbm.TableType) -> dbm.TablesList: +def get_all_table_instances( + session: Session, table: dbm.TableType +) -> dbm.TablesList: """Get list of all table instances from the db. :param session: DB session. @@ -116,7 +118,9 @@ def get_pipelines( return query.all() # type: ignore -def get_task(session: Session, name: str) -> Optional[dbm.PipelineExecutionTask]: +def get_task( + session: Session, name: str +) -> Optional[dbm.PipelineExecutionTask]: """Get task by its name. Latest if multiple tasks found. :param session: DB session. @@ -139,7 +143,9 @@ def get_task_job_id(session: Session, task_id: int) -> Optional[int]: :param task_id: Task id. :return: Task job_id if found. """ - task = get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) + task = get_table_instance_by_id( + session, dbm.PipelineExecutionTask, task_id + ) return task.job_id if task else None @@ -150,7 +156,9 @@ def get_webhook(session: Session, task_id: int) -> Optional[str]: :param task_id: Task id. :return: webhook """ - task = get_table_instance_by_id(session, dbm.PipelineExecutionTask, task_id) + task = get_table_instance_by_id( + session, dbm.PipelineExecutionTask, task_id + ) return task.webhook if task else None @@ -206,7 +214,9 @@ def update_table_instance_fields( :param id_: Instance id. :param args: Args to update. """ - session.query(table).filter(table.id == id_).update(args, synchronize_session=False) + session.query(table).filter(table.id == id_).update( + args, synchronize_session=False + ) session.commit() @@ -265,7 +275,9 @@ def get_pending_tasks( ) -def update_task_in_lock(session: Session, task_id: int, runner_id: str) -> None: +def update_task_in_lock( + session: Session, task_id: int, runner_id: str +) -> None: """Update task runner_id with 'for update' statement. :param session: DB session. @@ -301,7 +313,9 @@ def get_not_finished_tasks( ) -def get_heartbeat(session: Session, id_: str) -> Optional[dbm.ExecutorHeartbeat]: +def get_heartbeat( + session: Session, id_: str +) -> Optional[dbm.ExecutorHeartbeat]: """Return heartbeat with the given id. :param session: DB session. @@ -409,7 +423,9 @@ async def initialize_execution( return task_id # type: ignore -async def get_step_parent(step_id: str, ids: Dict[str, List[str]]) -> Optional[str]: +async def get_step_parent( + step_id: str, ids: Dict[str, List[str]] +) -> Optional[str]: """ Finds if step has any dependant steps """ diff --git a/pipelines/pipelines/execution.py b/pipelines/pipelines/execution.py index ac3a944a7..7bb4f2149 100644 --- a/pipelines/pipelines/execution.py +++ b/pipelines/pipelines/execution.py @@ -180,7 +180,9 @@ async def process_next_steps(self, producer: AIOKafkaProducer) -> None: ) ) - def update(self, status: schemas.Status, result: Optional[Dict[str, Any]]) -> None: + def update( + self, status: schemas.Status, result: Optional[Dict[str, Any]] + ) -> None: """Updates step status and result.""" self.status = status self.result = result @@ -210,7 +212,9 @@ def get_pipeline_step(self) -> Optional[PipelineStep]: def get_next_steps(self) -> List[ExecutionStep]: task = PipelineTask.get_by_id(self.task_id) - return [step for step in task.steps if step.parent_step == self.step_id] + return [ + step for step in task.steps if step.parent_step == self.step_id + ] def get_pipeline_type(self) -> schemas.PipelineTypes: task = PipelineTask.get_by_id(self.task_id) @@ -271,7 +275,9 @@ async def start(self, producer: AIOKafkaProducer) -> None: args = schemas.InputArguments.parse_obj(initial_step.init_args) tenant = s3.tenant_from_bucket(args.get_output_bucket()) if pipeline_type == schemas.PipelineTypes.INFERENCE: - preprecessing_passed = await self.check_preprocessing_status(tenant) + preprecessing_passed = await self.check_preprocessing_status( + tenant + ) if not preprecessing_passed: return logger.info(f"Start executing task with id = {self.id}") @@ -281,7 +287,9 @@ async def start(self, producer: AIOKafkaProducer) -> None: pipeline_type=pipeline_type, curr_step_id=str(initial_step.id) ) asyncio.create_task( - initial_step.step_execution_with_logging(producer=producer, body=init_body) + initial_step.step_execution_with_logging( + producer=producer, body=init_body + ) ) async def finish(self, failed: bool) -> None: @@ -291,7 +299,9 @@ async def finish(self, failed: bool) -> None: Args: failed: whether the task have failed steps. """ - initial_step = [step for step in self.steps if step.parent_step is None][0] + initial_step = [ + step for step in self.steps if step.parent_step is None + ][0] token = service_token.get_service_token() args = schemas.InputArguments.parse_obj(initial_step.init_args) bucket = args.get_output_bucket() @@ -299,7 +309,8 @@ async def finish(self, failed: bool) -> None: pipeline_type = self.get_pipeline_type() if not failed and pipeline_type == schemas.PipelineTypes.INFERENCE: logger.info( - "preparing to merge results and " "send it to postprocessing/annotation" + "preparing to merge results and " + "send it to postprocessing/annotation" ) path_ = args.get_path() filename = args.get_filename() @@ -321,9 +332,13 @@ async def finish(self, failed: bool) -> None: task_status = schemas.Status.FAIL if failed else schemas.Status.DONE self.change_status(task_status) - logger.info(f"Task with id = {self.id} finished with status = {task_status}") + logger.info( + f"Task with id = {self.id} finished with status = {task_status}" + ) tenant = s3.tenant_from_bucket(bucket) - self.send_status(pipeline_type=pipeline_type, tenant=tenant, token=token) + self.send_status( + pipeline_type=pipeline_type, tenant=tenant, token=token + ) def change_status(self, status: schemas.Status) -> None: """Changes status of the task in the db and in the instance.""" @@ -384,7 +399,9 @@ async def check_preprocessing_status(self, tenant: str) -> bool: max_retries = config.MAX_FILE_STATUS_RETRIES timeout = config.FILE_STATUS_TIMEOUT for retry in range(1, int(max_retries) + 1): - file_status = http_utils.get_file_status(file_id=file_id, tenant=tenant) + file_status = http_utils.get_file_status( + file_id=file_id, tenant=tenant + ) if file_status == schemas.PreprocessingStatus.PREPROCESSED: return True elif file_status is None: @@ -417,7 +434,9 @@ async def check_preprocessing_status(self, tenant: str) -> bool: await self.finish(failed=True) return False - def update_steps(self, status: schemas.Status, result: Dict[str, Any]) -> None: + def update_steps( + self, status: schemas.Status, result: Dict[str, Any] + ) -> None: """Updates all steps in case of they all have one result. For instance, it occurs when preprocessing is failed for steps file.""" @@ -514,7 +533,9 @@ class Pipeline(BaseModel): def get_ids(self) -> Dict[str, List[str]]: """Return ids of all steps.""" return { - k: v for step in self.steps for k, v in step.steps_identifiers().items() + k: v + for step in self.steps + for k, v in step.steps_identifiers().items() } def get_steps_dict(self) -> Dict[str, PipelineStep]: @@ -524,7 +545,9 @@ def get_steps_dict(self) -> Dict[str, PipelineStep]: """ steps_dict = { - k: v for step in self.steps for k, v in step.get_step_dict().items() + k: v + for step in self.steps + for k, v in step.get_step_dict().items() } if self.steps: for step in self.steps: @@ -600,7 +623,8 @@ def get_model_urls(model_ids: List[str]) -> Dict[str, str]: if mod.get("name") == id_: if ( config.DIFFERENT_PREPROCESSING_URLS - and model_types[id_] == schemas.ModelTypes.PREPROCESSING + and model_types[id_] + == schemas.ModelTypes.PREPROCESSING ): url_map[id_] = Pipeline._convert_preprocessing_uri( mod.get("url") @@ -648,7 +672,9 @@ def adjust_pipeline(self, model_ids: List[str]) -> None: def check_name(self, session: orm.Session) -> None: """Checks if a pipeline with the same name already exists in the DB.""" - pipelines_with_such_name = service.get_pipelines(session, name=self.meta.name) + pipelines_with_such_name = service.get_pipelines( + session, name=self.meta.name + ) if pipelines_with_such_name: raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail=PIPELINE_EXISTS diff --git a/pipelines/pipelines/http_utils.py b/pipelines/pipelines/http_utils.py index 6b1771168..20e32eb03 100644 --- a/pipelines/pipelines/http_utils.py +++ b/pipelines/pipelines/http_utils.py @@ -54,7 +54,9 @@ def make_request_with_retry( return None -def get_file_status(file_id: int, tenant: str) -> Optional[schemas.PreprocessingStatus]: +def get_file_status( + file_id: int, tenant: str +) -> Optional[schemas.PreprocessingStatus]: logger.info(f"Sending request to the assets to get file {file_id} status.") body = {"filters": [{"field": "id", "operator": "eq", "value": file_id}]} url = f"{config.ASSETS_URI}/files/search" @@ -82,7 +84,9 @@ def get_model_types(model_ids: List[str]) -> Dict[str, str]: "filters": [{"field": "id", "operator": "in", "value": model_ids}], } model_search = config.MODELS_URI + config.MODELS_SEARCH_ENDPOINT - response = make_request_with_retry(url=model_search, body=body, method="POST") + response = make_request_with_retry( + url=model_search, body=body, method="POST" + ) result = response.json() items = result.get("data") return {item.get("id"): item.get("type") for item in items} diff --git a/pipelines/pipelines/kafka_utils.py b/pipelines/pipelines/kafka_utils.py index 307107592..46a5cacbd 100644 --- a/pipelines/pipelines/kafka_utils.py +++ b/pipelines/pipelines/kafka_utils.py @@ -30,7 +30,9 @@ def consumer(self) -> aiokafka.AIOKafkaConsumer: logger.exception("Failed to initialize consumer.") raise self.consumer.subscribe(topics=[config.KAFKA_CONSUME_TOPIC]) - logger.info(f"Consumer subscribed to topic {config.KAFKA_CONSUME_TOPIC}") + logger.info( + f"Consumer subscribed to topic {config.KAFKA_CONSUME_TOPIC}" + ) return self._consumer @property diff --git a/pipelines/pipelines/pipeline_runner.py b/pipelines/pipelines/pipeline_runner.py index 37fe95196..47c628d20 100644 --- a/pipelines/pipelines/pipeline_runner.py +++ b/pipelines/pipelines/pipeline_runner.py @@ -46,7 +46,8 @@ async def process_message( if received_step.status == schemas.Status.FAIL: error = received_step.result["error"] # type: ignore logger.error( - f"Received failed step with id = {received_step.id}, " f"Error: {error}" + f"Received failed step with id = {received_step.id}, " + f"Error: {error}" ) failed = True elif task.is_completed(): @@ -58,7 +59,9 @@ async def process_message( asyncio.create_task(task.finish(failed=failed)) -async def run_pipeline(consumer: AIOKafkaConsumer, producer: AIOKafkaProducer) -> None: +async def run_pipeline( + consumer: AIOKafkaConsumer, producer: AIOKafkaProducer +) -> None: """ Launch Kafka consumer and process received pipeline steps """ diff --git a/pipelines/pipelines/result_processing.py b/pipelines/pipelines/result_processing.py index 71f579f29..96155baa8 100644 --- a/pipelines/pipelines/result_processing.py +++ b/pipelines/pipelines/result_processing.py @@ -143,7 +143,9 @@ def update_id( ] if unique_obj.links is not None: for link in unique_obj.links: - link.update({"category": unique_obj.category, "to": unique_obj.id}) + link.update( + {"category": unique_obj.category, "to": unique_obj.id} + ) @staticmethod def group_objs_by_id( @@ -156,7 +158,9 @@ def group_objs_by_id( return grouped_objs @staticmethod - def merge(objs: List[GeometryObject], id_: Union[str, int] = 0) -> GeometryObject: + def merge( + objs: List[GeometryObject], id_: Union[str, int] = 0 + ) -> GeometryObject: """Merge Geometry Objects into one. :param objs: Geometry Objects to merge. @@ -243,7 +247,9 @@ def get_pipeline_leaves_data( """ try: path_objects = list_object_names(client, bucket, path_) - files_data = [get_file_data(client, bucket, path_) for path_ in path_objects] + files_data = [ + get_file_data(client, bucket, path_) for path_ in path_objects + ] except (minioerr.S3Error, urllib3.exceptions.MaxRetryError) as err: logger.error("error %s", str(err)) return None @@ -357,7 +363,9 @@ def manage_result_for_annotator( "input": merged_data.dict(exclude_none=True), } headers = {"X-Current-Tenant": tenant, "Authorization": f"Bearer {token}"} - postprocessed_data = postprocess_result(data_for_postprocessor, headers=headers) + postprocessed_data = postprocess_result( + data_for_postprocessor, headers=headers + ) if postprocessed_data is None: logger.info("result for postprocessing data is None") return False diff --git a/pipelines/pipelines/schemas.py b/pipelines/pipelines/schemas.py index 82566a22d..bbdf20a7a 100644 --- a/pipelines/pipelines/schemas.py +++ b/pipelines/pipelines/schemas.py @@ -107,7 +107,9 @@ def output_bucket_validator(cls, values: Dict[str, Any]) -> Dict[str, Any]: return values @validator("input_path", "output_path") - def path_validator(cls, v: Optional[str]) -> Optional[str]: # pylint: disable=E0213 + def path_validator( + cls, v: Optional[str] + ) -> Optional[str]: # pylint: disable=E0213 """Path validator.""" if v is None: return v @@ -124,7 +126,9 @@ def file_path_validator(cls, v: str) -> str: # pylint: disable=E0213 """File path validator.""" mod_v = v.strip().rstrip("/") if mod_v.count("/") != 2: - raise ValueError("File path should be like 'files/fileId/fileId.fileExt'") + raise ValueError( + "File path should be like 'files/fileId/fileId.fileExt'" + ) return mod_v def next_step_args( @@ -159,11 +163,15 @@ def prepare_for_init( ) -> InputArguments: """Prepare args as init by creating copy with modified output path.""" if pipeline_type == PipelineTypes.INFERENCE: - output_path = self.append_path(curr_step_id, self.output_path, ext=".json") + output_path = self.append_path( + curr_step_id, self.output_path, ext=".json" + ) elif pipeline_type == PipelineTypes.PREPROCESSING: output_path = self.output_path return InputArguments( - input_path=self.input_path if self.input_path else self.output_path, + input_path=self.input_path + if self.input_path + else self.output_path, input=self.input if self.input else {}, file=self.file, bucket=self.bucket, @@ -172,7 +180,9 @@ def prepare_for_init( output_bucket=self.output_bucket, ) - def append_path(self, stem: str, path_: Optional[str] = None, ext: str = "") -> str: + def append_path( + self, stem: str, path_: Optional[str] = None, ext: str = "" + ) -> str: """Join path_ and stem. Takes self._path if not provided""" return urllib.parse.urljoin((path_ or self._path) + "/", stem) + ext @@ -197,7 +207,9 @@ def get_filename(self) -> str: """Get filename without extension.""" return self.file.strip("/").rsplit("/", 1)[-1].split(".", 1)[0] - def create_input_by_label(self, label: Optional[List[str]]) -> InputArguments: + def create_input_by_label( + self, label: Optional[List[str]] + ) -> InputArguments: """Return copy of the instance with changed input.""" if not self.input or self._is_init or not label: return self.copy(deep=True) diff --git a/pipelines/pipelines/service_token.py b/pipelines/pipelines/service_token.py index 79e5ad34e..8faaebe88 100644 --- a/pipelines/pipelines/service_token.py +++ b/pipelines/pipelines/service_token.py @@ -33,10 +33,14 @@ def get_service_token() -> Optional[str]: try: response_json = response.json() except json.JSONDecodeError: - logger.exception(f"Response {response} from {url} cannot be converted to json.") + logger.exception( + f"Response {response} from {url} cannot be converted to json." + ) try: token = response_json[ACCESS_TOKEN] except AttributeError: - logger.exception(f"Unable to extract token from response {response} from {url}") + logger.exception( + f"Unable to extract token from response {response} from {url}" + ) return token diff --git a/pipelines/tests/conftest.py b/pipelines/tests/conftest.py index 3e7e71087..663ae2db3 100644 --- a/pipelines/tests/conftest.py +++ b/pipelines/tests/conftest.py @@ -73,7 +73,9 @@ def testing_app(testing_engine, testing_session, setup_token): session = sessionmaker(bind=testing_engine) app.app.dependency_overrides[app.TOKEN] = lambda: setup_token with patch("pipelines.db.service.LocalSession", session): - app.app.dependency_overrides[service.get_session] = lambda: testing_session + app.app.dependency_overrides[ + service.get_session + ] = lambda: testing_session client = TestClient(app.app) yield client @@ -130,6 +132,8 @@ async def check_preprocessing_status_mock(x, y): @pytest.fixture def adjust_mock(): - with patch.object(execution.Pipeline, "check_valid_ids", return_value={"a": True}): + with patch.object( + execution.Pipeline, "check_valid_ids", return_value={"a": True} + ): with patch.object(execution.Pipeline, "adjust_pipeline") as mock: yield mock diff --git a/pipelines/tests/db/test_logger.py b/pipelines/tests/db/test_logger.py index a0bcc8cbd..0ffead07a 100644 --- a/pipelines/tests/db/test_logger.py +++ b/pipelines/tests/db/test_logger.py @@ -88,9 +88,9 @@ def test_log_after_update(testing_session): ) testing_session.add(pipeline) testing_session.commit() - testing_session.query(models.Pipeline).filter(models.Pipeline.id == 1).update( - {models.Pipeline.version: 2} - ) + testing_session.query(models.Pipeline).filter( + models.Pipeline.id == 1 + ).update({models.Pipeline.version: 2}) testing_session.commit() log = schemas.Log( entity="Pipeline", event_type=schemas.Event.UPD, data={"version": 2} diff --git a/pipelines/tests/db/test_service.py b/pipelines/tests/db/test_service.py index 62731d9ed..8fb89409a 100644 --- a/pipelines/tests/db/test_service.py +++ b/pipelines/tests/db/test_service.py @@ -64,14 +64,19 @@ def test_get_table_instance_by_id(testing_session): """Testing get_table_instance_by_id.""" testing_session.add(dbm.Pipeline(type="inference")) obj = service.get_table_instance_by_id(testing_session, dbm.Pipeline, 1) - none_obj = service.get_table_instance_by_id(testing_session, dbm.Pipeline, 2) + none_obj = service.get_table_instance_by_id( + testing_session, dbm.Pipeline, 2 + ) assert obj assert none_obj is None def test_get_table_instance_by_id_not_found(testing_session): """Testing get_table_instance_by_id when instance not found.""" - assert service.get_table_instance_by_id(testing_session, dbm.Pipeline, 1) is None + assert ( + service.get_table_instance_by_id(testing_session, dbm.Pipeline, 1) + is None + ) def test_get_pipelines(testing_session): @@ -107,7 +112,9 @@ def test_get_task_not_found(testing_session): def test_get_task_job_id(testing_session): """Testing get_task_job_id.""" - task = dbm.PipelineExecutionTask(pipeline=dbm.Pipeline(type="inference"), job_id=42) + task = dbm.PipelineExecutionTask( + pipeline=dbm.Pipeline(type="inference"), job_id=42 + ) testing_session.add(task) assert service.get_task_job_id(testing_session, 1) == 42 @@ -121,7 +128,9 @@ def test_get_step_by_step_and_task_id(testing_session): """Testing get_step_by_step_and_task_id.""" task = dbm.PipelineExecutionTask(pipeline=dbm.Pipeline(type="inference")) step_uuid = str(uuid.uuid4()) - step = dbm.ExecutionStep(task=task, step_id=step_uuid, init_args={"foo": 1}) + step = dbm.ExecutionStep( + task=task, step_id=step_uuid, init_args={"foo": 1} + ) testing_session.add(step) assert service.get_step_by_step_and_task_id( testing_session, 1, step_uuid @@ -132,7 +141,9 @@ def test_get_step_by_step_and_task_id_not_found(testing_session): """Testing get_step_by_step_and_task_id when instance not found.""" some_random_uuid = str(uuid.uuid4()) assert ( - service.get_step_by_step_and_task_id(testing_session, 1, some_random_uuid) + service.get_step_by_step_and_task_id( + testing_session, 1, some_random_uuid + ) is None ) @@ -158,7 +169,9 @@ def test_update_table_instance_fields(testing_session): 1, {dbm.PipelineExecutionTask.name: "bar"}, ) - assert testing_session.query(dbm.PipelineExecutionTask).get(1).name == "bar" + assert ( + testing_session.query(dbm.PipelineExecutionTask).get(1).name == "bar" + ) def test_update_status(testing_session): @@ -168,7 +181,9 @@ def test_update_status(testing_session): ) testing_session.add(task) service.update_status(testing_session, dbm.PipelineExecutionTask, 1, PEND) - assert testing_session.query(dbm.PipelineExecutionTask).get(1).status == PEND + assert ( + testing_session.query(dbm.PipelineExecutionTask).get(1).status == PEND + ) def test_update_statuses(testing_session): @@ -177,9 +192,15 @@ def test_update_statuses(testing_session): task_1 = dbm.PipelineExecutionTask(pipeline=pipeline, status=PEND) task_2 = dbm.PipelineExecutionTask(pipeline=pipeline, status=RUN) testing_session.add_all([task_1, task_2]) - service.update_statuses(testing_session, dbm.PipelineExecutionTask, [1, 2], DONE) - assert testing_session.query(dbm.PipelineExecutionTask).get(1).status == DONE - assert testing_session.query(dbm.PipelineExecutionTask).get(2).status == DONE + service.update_statuses( + testing_session, dbm.PipelineExecutionTask, [1, 2], DONE + ) + assert ( + testing_session.query(dbm.PipelineExecutionTask).get(1).status == DONE + ) + assert ( + testing_session.query(dbm.PipelineExecutionTask).get(2).status == DONE + ) def test_get_pending_tasks(testing_session): @@ -197,7 +218,9 @@ def test_update_task_in_lock(testing_session): """Testing update_task_in_lock.""" runner1_uuid, runner2_uuid = [str(uuid.uuid4()) for _ in range(2)] task = dbm.PipelineExecutionTask( - pipeline=dbm.Pipeline(type="inference"), status=PEND, runner_id=runner1_uuid + pipeline=dbm.Pipeline(type="inference"), + status=PEND, + runner_id=runner1_uuid, ) testing_session.add(task) assert task.runner_id == runner1_uuid @@ -243,7 +266,9 @@ def test_get_expired_heartbeats(testing_session): eff_date = datetime.datetime.utcnow() last_heartbeat = eff_date - datetime.timedelta(minutes=1) testing_session.add( - dbm.ExecutorHeartbeat(id=str(uuid.uuid4()), last_heartbeat=last_heartbeat) + dbm.ExecutorHeartbeat( + id=str(uuid.uuid4()), last_heartbeat=last_heartbeat + ) ) result = service.get_expired_heartbeats(testing_session, eff_date) assert result[0].last_heartbeat == last_heartbeat @@ -263,7 +288,9 @@ def test_update_heartbeat_timestamp(testing_session): def test_task_runner_id_status_in_lock(testing_session): """Testing change_task_runner_id_and_status.""" task = dbm.PipelineExecutionTask( - pipeline=dbm.Pipeline(type="inference"), status=RUN, runner_id=str(uuid.uuid4()) + pipeline=dbm.Pipeline(type="inference"), + status=RUN, + runner_id=str(uuid.uuid4()), ) testing_session.add(task) service.change_task_runner_id_status_in_lock(testing_session, 1) @@ -274,7 +301,9 @@ def test_task_runner_id_status_in_lock(testing_session): @pytest.mark.asyncio async def test_initialize(testing_session): """Testing initialize_execution.""" - with patch.object(execution.Pipeline, "from_orm", return_value=td.pipeline): + with patch.object( + execution.Pipeline, "from_orm", return_value=td.pipeline + ): pipeline_db_ = td.pipeline.to_orm() testing_session.add(pipeline_db_) result = await service.initialize_execution( @@ -291,8 +320,12 @@ async def test_initialize(testing_session): task = testing_session.query(dbm.PipelineExecutionTask).get(1) assert task.name == "f" assert task.job_id == 1 - assert testing_session.query(dbm.ExecutionStep).get(1).init_args == {"a": 1} - assert testing_session.query(dbm.ExecutionStep).get(2).init_args is None + assert testing_session.query(dbm.ExecutionStep).get(1).init_args == { + "a": 1 + } + assert ( + testing_session.query(dbm.ExecutionStep).get(2).init_args is None + ) @pytest.mark.parametrize( diff --git a/pipelines/tests/test_app.py b/pipelines/tests/test_app.py index c8dd35831..28a8affbb 100644 --- a/pipelines/tests/test_app.py +++ b/pipelines/tests/test_app.py @@ -63,7 +63,9 @@ def test_add_pipeline_autogen_ids(testing_app, adjust_mock): ({"name": "bar", "version": 2}, td.pipeline_dict_2), ], ) -def test_get_pipeline(q_params: Dict[str, str], testing_app, adjust_mock, pipeline): +def test_get_pipeline( + q_params: Dict[str, str], testing_app, adjust_mock, pipeline +): """Testing get_pipeline.""" testing_app.post("/pipeline", json=pipeline) response = testing_app.get("/pipeline", params=q_params) @@ -97,7 +99,9 @@ def test_get_pipelines(testing_app, adjust_mock): ({"name": "bar", "version": 2}, td.pipeline_dict_2), ], ) -def test_delete_pipelines(q_params: Dict[str, str], pipeline, testing_app, adjust_mock): +def test_delete_pipelines( + q_params: Dict[str, str], pipeline, testing_app, adjust_mock +): """Testing delete_pipelines.""" testing_app.post("/pipeline", json=pipeline) response = testing_app.delete("/pipelines", params=q_params) @@ -259,7 +263,9 @@ def test_get_task_steps_by_id(testing_task, testing_app, testing_session): assert response.json()[0]["status"] == "pending" -def test_get_task_steps_by_id_not_found(testing_task, testing_app, testing_session): +def test_get_task_steps_by_id_not_found( + testing_task, testing_app, testing_session +): """Testing get_task_steps_by_id when there's no such task.""" step = dbm.ExecutionStep(task=testing_task, name="bar", status="pending") service.add_step(testing_session, step) diff --git a/pipelines/tests/test_execution.py b/pipelines/tests/test_execution.py index 072bb238c..57f01988b 100644 --- a/pipelines/tests/test_execution.py +++ b/pipelines/tests/test_execution.py @@ -33,7 +33,10 @@ def uuid_mock(): yield uuid_mock -@patch("pipelines.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock) +@patch( + "pipelines.execution.ExecutionStep.get_pipeline_step", + new_callable=PropertyMock, +) @patch("pipelines.execution.ExecutionStep.step_execution") @pytest.mark.asyncio async def test_step_execution_with_logging( @@ -46,8 +49,12 @@ async def test_step_execution_with_logging( step_exec_mock.return_value = None pipeline_step.return_value = property_mock exec_step = td.test_exec_step - body = schemas.InputArguments.parse_obj({**td.exec_input_args, "result": "foo"}) - await exec_step.step_execution_with_logging(body=body, producer=AIOKafkaProducer) + body = schemas.InputArguments.parse_obj( + {**td.exec_input_args, "result": "foo"} + ) + await exec_step.step_execution_with_logging( + body=body, producer=AIOKafkaProducer + ) assert step_exec_mock.call_count == 1 @@ -56,16 +63,25 @@ async def test_step_execution_with_logging( "Test should be fixed - it 'blinks'. " "It passes when run separately, but fails when all tests are run." ) -@patch("pipelines.execution.ExecutionStep.get_pipeline_step", new_callable=PropertyMock) +@patch( + "pipelines.execution.ExecutionStep.get_pipeline_step", + new_callable=PropertyMock, +) @patch("pipelines.execution.ExecutionStep.send") @pytest.mark.asyncio -async def test_step_execution(mock_send, model_url, caplog, run_in_session_mock): +async def test_step_execution( + mock_send, model_url, caplog, run_in_session_mock +): """Testing step_execution.""" - property_mock = ExecStepPropertyMock.parse_obj({"model_url": "https://foo.com/bar"}) + property_mock = ExecStepPropertyMock.parse_obj( + {"model_url": "https://foo.com/bar"} + ) model_url.return_value = property_mock mock_send.return_value = None exec_step = td.test_exec_step - await exec_step.step_execution(producer=AIOKafkaProducer, body=td.input_args_1) + await exec_step.step_execution( + producer=AIOKafkaProducer, body=td.input_args_1 + ) assert mock_send.called assert caplog.messages[0] == "Step with id = 58 sent." @@ -216,7 +232,9 @@ def test_adjust_pipeline(): return_value={"bar": "http://bar.dev1.gcov.ru"}, ): td.pipeline.adjust_pipeline(td.pipeline.get_model_ids()) - assert td.pipeline.meta.categories.sort() == ["text", "chart"].sort() + assert ( + td.pipeline.meta.categories.sort() == ["text", "chart"].sort() + ) @pytest.mark.skip( diff --git a/pipelines/tests/test_http_utils.py b/pipelines/tests/test_http_utils.py index 22e292cfa..81ca2889a 100644 --- a/pipelines/tests/test_http_utils.py +++ b/pipelines/tests/test_http_utils.py @@ -25,7 +25,9 @@ def test_make_request(request_mock): ) def test_make_request_with_retry(s_effect, expected, call_count, request_mock): """Testing make_request_with_retry.""" - with patch("pipelines.http_utils.make_request", side_effect=s_effect) as req_mock: + with patch( + "pipelines.http_utils.make_request", side_effect=s_effect + ) as req_mock: assert http_utils.make_request_with_retry("", {}, start=0) == expected assert req_mock.call_count == call_count diff --git a/pipelines/tests/test_result_processing.py b/pipelines/tests/test_result_processing.py index 20e11fbcb..f484dc362 100644 --- a/pipelines/tests/test_result_processing.py +++ b/pipelines/tests/test_result_processing.py @@ -99,7 +99,9 @@ def test_unite_geometry_objects(): category=2, children=["some_uiid_3"], ) - obj_3 = processing.GeometryObject(id="some_uiid_3", bbox=(1, 1, 1, 1), category=3) + obj_3 = processing.GeometryObject( + id="some_uiid_3", bbox=(1, 1, 1, 1), category=3 + ) res = obj_1.unite_geometry_objects([obj_1, obj_2, obj_3], id_start=1) assert res == [r_obj_1, r_obj_2, r_obj_3] @@ -128,9 +130,15 @@ def test_group_objs_by_id(): """Testing group_objs_by_id of GeometryObject.""" obj_1 = processing.GeometryObject(id=1, bbox=(1, 1, 1, 1), category="some") obj_2 = processing.GeometryObject(id=1, bbox=(1, 1, 1, 1), category="some") - obj_3 = processing.GeometryObject(id="asd", bbox=(1, 1, 1, 1), category="some") - obj_4 = processing.GeometryObject(id="asd", bbox=(1, 1, 1, 1), category="some") - res = processing.GeometryObject.group_objs_by_id([obj_1, obj_2, obj_3, obj_4]) + obj_3 = processing.GeometryObject( + id="asd", bbox=(1, 1, 1, 1), category="some" + ) + obj_4 = processing.GeometryObject( + id="asd", bbox=(1, 1, 1, 1), category="some" + ) + res = processing.GeometryObject.group_objs_by_id( + [obj_1, obj_2, obj_3, obj_4] + ) assert len(res) == 2 assert res[1] == [obj_1, obj_2] assert res["asd"] == [obj_3, obj_4] @@ -177,7 +185,9 @@ def test_merge_geometry_objects_no_objects_provided(): ) def test_get_annotation_uri(job_id, file_id, expected): """Testing get_annotation_uri.""" - with patch("pipelines.result_processing.config.ANNOTATION_URI", "foobar/ann"): + with patch( + "pipelines.result_processing.config.ANNOTATION_URI", "foobar/ann" + ): assert processing.get_annotation_uri(job_id, file_id) == expected @@ -224,7 +234,9 @@ def test_get_pipeline_leaves_data(): def test_get_pipeline_leaves_data_minio_error(): """Testing get_pipeline_leaves_data when S3Error occurred.""" err = S3Error("", "", "", "", "", "") - with patch("pipelines.result_processing.list_object_names", side_effect=err): + with patch( + "pipelines.result_processing.list_object_names", side_effect=err + ): res = processing.get_pipeline_leaves_data(MagicMock(), "", "") assert res is None @@ -287,31 +299,42 @@ def test_merge_pipeline_leaves_data(): def test_merge_pipeline_leaves_data_no_files_data(): """Testing merge_pipeline_leaves_data when there's no files data.""" with patch( - "pipelines.result_processing.get_pipeline_leaves_data", return_value=None + "pipelines.result_processing.get_pipeline_leaves_data", + return_value=None, ): - assert processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None + assert ( + processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None + ) def test_merge_pipeline_leaves_data_cannot_parse_data(): """Testing merge_pipeline_leaves_data when raw data cannot be parsed.""" with patch( - "pipelines.result_processing.ModelOutput.parse_models", return_value=None + "pipelines.result_processing.ModelOutput.parse_models", + return_value=None, ): with patch("pipelines.result_processing.get_pipeline_leaves_data"): - assert processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None + assert ( + processing.merge_pipeline_leaves_data(MagicMock(), "", "") + is None + ) def test_merge_pipeline_leaves_data_cannot_merge_data(): """Testing merge_pipeline_leaves_data when data cannot be merged.""" with patch("pipelines.result_processing.get_pipeline_leaves_data"): with patch("pipelines.result_processing.ModelOutput.parse_models"): - assert processing.merge_pipeline_leaves_data(MagicMock(), "", "") is None + assert ( + processing.merge_pipeline_leaves_data(MagicMock(), "", "") + is None + ) def test_delete_objects(): """Testing delete_objects.""" with patch( - "pipelines.result_processing.list_object_names", return_value=["f", "b"] + "pipelines.result_processing.list_object_names", + return_value=["f", "b"], ): client_mock = MagicMock() assert processing.delete_objects(client_mock, "bucket", "") @@ -324,7 +347,9 @@ def test_delete_objects(): def test_delete_objects_minio_error(): """Testing delete_objects when S3Error occurred.""" err = S3Error("", "", "", "", "", "") - with patch("pipelines.result_processing.list_object_names", side_effect=err): + with patch( + "pipelines.result_processing.list_object_names", side_effect=err + ): assert not processing.delete_objects(MagicMock(), "bucket", "") @@ -336,7 +361,9 @@ def test_postprocess_result(): "pipelines.result_processing.http_utils.make_request_with_retry", return_value=m, ) as req_mock: - with patch("pipelines.result_processing.config.POSTPROCESSING_URI", "foo.com"): + with patch( + "pipelines.result_processing.config.POSTPROCESSING_URI", "foo.com" + ): res = processing.postprocess_result({"foo": 1}) assert res == {"foo": 42} req_mock.assert_called_once_with( @@ -376,7 +403,9 @@ def test_manage_result_for_annotator(): with patch( "pipelines.result_processing.http_utils.make_request_with_retry" ) as req_mock: - with patch("pipelines.result_processing.delete_objects") as del_mock: + with patch( + "pipelines.result_processing.delete_objects" + ) as del_mock: with patch("pipelines.config.DEBUG_MERGE", False): with patch( "pipelines.result_processing.config.ANNOTATION_URI", @@ -408,7 +437,8 @@ def test_manage_result_for_annotator_no_annotator_uri(): def test_manage_result_for_annotator_cannot_merge_data(): """Testing manage_result_for_annotator when data cannot be merger.""" with patch( - "pipelines.result_processing.merge_pipeline_leaves_data", return_value=None + "pipelines.result_processing.merge_pipeline_leaves_data", + return_value=None, ): assert not processing.manage_result_for_annotator( "", "", "", 0, "", "", "", 8, MagicMock(), "" @@ -435,7 +465,9 @@ def test_manage_result_for_annotator_request_debug_merge(): with patch( "pipelines.result_processing.http_utils.make_request_with_retry" ): - with patch("pipelines.result_processing.config.DEBUG_MERGE", True): + with patch( + "pipelines.result_processing.config.DEBUG_MERGE", True + ): with patch( "pipelines.result_processing.delete_objects" ) as del_mock: diff --git a/pipelines/tests/test_schemas.py b/pipelines/tests/test_schemas.py index a893537b6..df147e83f 100644 --- a/pipelines/tests/test_schemas.py +++ b/pipelines/tests/test_schemas.py @@ -29,7 +29,9 @@ def test_next_step_args_inference(): def test_next_step_args_preprocessing(): """Testing next_step_args of InputArguments.""" args = td.input_args_1 - res = args.next_step_args(schemas.PipelineTypes.PREPROCESSING, "zxc", {"c": 3}) + res = args.next_step_args( + schemas.PipelineTypes.PREPROCESSING, "zxc", {"c": 3} + ) assert res.input == {"c": 3} assert res.input_path == args.output_path assert res.output_path == args.output_path @@ -38,7 +40,9 @@ def test_next_step_args_preprocessing(): def test_prepare_for_init_inference(): """Testing prepare_for_init of InputArguments.""" - res = td.input_args_1.prepare_for_init(schemas.PipelineTypes.INFERENCE, "baz") + res = td.input_args_1.prepare_for_init( + schemas.PipelineTypes.INFERENCE, "baz" + ) d = td.input_args_1.dict() d.update({"input_path": td.input_args_1.output_path}) expected = { @@ -50,7 +54,9 @@ def test_prepare_for_init_inference(): def test_prepare_for_init_preprocessing(): """Testing prepare_for_init of InputArguments.""" - res = td.input_args_1.prepare_for_init(schemas.PipelineTypes.PREPROCESSING, "baz") + res = td.input_args_1.prepare_for_init( + schemas.PipelineTypes.PREPROCESSING, "baz" + ) d = td.input_args_1.dict() d.update({"input_path": td.input_args_1.output_path}) expected = { @@ -170,4 +176,6 @@ def test_invalid_entity(): ], ) def test_filter_dict_by_categories(data, args, result): - assert schemas.InputArguments.filter_dict_by_categories(data, args) == result + assert ( + schemas.InputArguments.filter_dict_by_categories(data, args) == result + ) diff --git a/pipelines/tests/test_webhooks.py b/pipelines/tests/test_webhooks.py index f14b0bbee..12b083d34 100644 --- a/pipelines/tests/test_webhooks.py +++ b/pipelines/tests/test_webhooks.py @@ -12,7 +12,8 @@ def test_create_inference_url_and_body(): task_status = schemas.Status.RUN status = schemas.JobStatus.RUN with patch( - "pipelines.webhooks.service.get_job_status_if_changed", return_value=status + "pipelines.webhooks.service.get_job_status_if_changed", + return_value=status, ): url, body = webhooks.create_inference_url_and_body( webhook=webhook, job_id=job_id, task_status=task_status diff --git a/pipelines/tests/testing_data.py b/pipelines/tests/testing_data.py index ba4e59133..4dc4b273b 100644 --- a/pipelines/tests/testing_data.py +++ b/pipelines/tests/testing_data.py @@ -83,7 +83,8 @@ heartbeat_db = dbm.ExecutorHeartbeat() pipeline_db_repr = ( - "" + "" ) task_db_repr = ( " None: ) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, target_metadata=target_metadata + ) with context.begin_transaction(): context.run_migrations() diff --git a/processing/processing/health_check_easy_ocr.py b/processing/processing/health_check_easy_ocr.py index c61021727..13baebcf0 100644 --- a/processing/processing/health_check_easy_ocr.py +++ b/processing/processing/health_check_easy_ocr.py @@ -47,7 +47,9 @@ async def health_check_preprocessing( await asyncio.gather( *(run_preprocessing(model_url, file, languages) for file in file_ids) ) - result = all(check_results(file, pages) for file, pages in file_ids.items()) + result = all( + check_results(file, pages) for file, pages in file_ids.items() + ) for file, pages in file_ids.items(): clear_data(file, pages) return result @@ -57,7 +59,9 @@ def is_data_prepared() -> bool: try: for file_id in file_ids: minio_client.stat_object(bucket, f"files/{file_id}/{file_id}.pdf") - minio_client.stat_object(bucket, f"files/{file_id}/expected/1.json") + minio_client.stat_object( + bucket, f"files/{file_id}/expected/1.json" + ) except MinioException: return False return True @@ -88,7 +92,9 @@ def check_results(file_id: str, pages: List[int]) -> bool: logger.error("Preprocessing works incorrect") return False except MinioException: - logger.error("MinioException had happened while checking easy-ocr health") + logger.error( + "MinioException had happened while checking easy-ocr health" + ) return False finally: test_page.close() diff --git a/processing/processing/main.py b/processing/processing/main.py index 8a2268b5c..bfd7447d7 100644 --- a/processing/processing/main.py +++ b/processing/processing/main.py @@ -61,8 +61,12 @@ def run_text_matching( ) def get_preprocessing_result( file_id: int = Path(..., example=4), - pages: Optional[Set[int]] = Query(None, min_items=1, ge=1, example={3, 4, 1}), - current_tenant: str = Header(..., example="tenant", alias="X-Current-Tenant"), + pages: Optional[Set[int]] = Query( + None, min_items=1, ge=1, example={3, 4, 1} + ), + current_tenant: str = Header( + ..., example="tenant", alias="X-Current-Tenant" + ), ) -> Response: """ Take preprocess data from MinIO for `file_id`, and return it as @@ -131,9 +135,9 @@ async def update_task_status( current_tenant: str = Header(..., alias="X-Current-Tenant"), session: Session = Depends(db.service.session_scope), ) -> Dict[str, str]: - task: Optional[db.models.DbPreprocessingTask] = db.service.get_task_by_execution_id( - task_id, session - ) + task: Optional[ + db.models.DbPreprocessingTask + ] = db.service.get_task_by_execution_id(task_id, session) if task is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="No such task" @@ -144,8 +148,8 @@ async def update_task_status( task.file_id, task.batch_id, session ) if finished: - assets_status: schema.PreprocessingStatus = map_finish_status_for_assets( - file_status + assets_status: schema.PreprocessingStatus = ( + map_finish_status_for_assets(file_status) ) await PreprocessingTask.update_file_statuses( [task.file_id], assets_status, current_tenant, token_data.token diff --git a/processing/processing/schema.py b/processing/processing/schema.py index e1f1fe106..80f1711d3 100644 --- a/processing/processing/schema.py +++ b/processing/processing/schema.py @@ -58,7 +58,9 @@ class Page(BaseModel): size: PageSize objs: List[Dict[str, Any]] = Field( ..., - example=[{"id": 1, "bbox": [1, 2, 3, 4], "category": "1", "text": "string"}], + example=[ + {"id": 1, "bbox": [1, 2, 3, 4], "category": "1", "text": "string"} + ], ) diff --git a/processing/processing/send_preprocess_results.py b/processing/processing/send_preprocess_results.py index ab3d026de..a36e0caf8 100644 --- a/processing/processing/send_preprocess_results.py +++ b/processing/processing/send_preprocess_results.py @@ -40,7 +40,10 @@ def get_pages(bucket: str, path: str, pages: Optional[Set[int]]) -> Set[int]: raise HTTPException(status_code=400, detail=str(err)) return set( - (page.object_name.rsplit("/", maxsplit=1)[-1][:-5] for page in pages_in_minio) + ( + page.object_name.rsplit("/", maxsplit=1)[-1][:-5] + for page in pages_in_minio + ) ) diff --git a/processing/processing/tasks.py b/processing/processing/tasks.py index ea566cf9a..5aeb4117b 100644 --- a/processing/processing/tasks.py +++ b/processing/processing/tasks.py @@ -94,7 +94,9 @@ def __init__( async def _execute(self) -> None: logger.info("Fetch data from assets %s", self) - files_data, _ = await get_files_data(self.file_ids, self.tenant, self.jw_token) + files_data, _ = await get_files_data( + self.file_ids, self.tenant, self.jw_token + ) logger.debug(files_data) logger.info("Execute pipeline %s", self) await execute_pipeline( @@ -122,7 +124,9 @@ async def update_file_statuses( for id_ in ids: body = {"file": id_, "status": task_status} task = asyncio.create_task( - send_request("PUT", url=settings.assets_url, json=body, headers=headers) + send_request( + "PUT", url=settings.assets_url, json=body, headers=headers + ) ) tasks.append(task) @@ -134,7 +138,9 @@ def prepare_data_for_pipelines( ) -> Iterator[FilesData]: for file_data in files_data: - file_data["output_path"] = str(Path(file_data["path"]).parent / "ocr") + file_data["output_path"] = str( + Path(file_data["path"]).parent / "ocr" + ) if file_data["pages"] <= settings.pages_per_batch: file_data["pages"] = list(range(1, file_data["pages"] + 1)) diff --git a/processing/processing/text_merge.py b/processing/processing/text_merge.py index a3d06691f..300e45b1b 100644 --- a/processing/processing/text_merge.py +++ b/processing/processing/text_merge.py @@ -174,7 +174,9 @@ def merge_words_to_paragraph(request_data: AnnotationData) -> AnnotationData: matched_pages: List[MatchedPage] = [] for page in request_data.input.pages: preprocessed_page = convert_points_to_pixels( - page=json.loads((ocr_path / f"{page.page_num}.json").read_text()), + page=json.loads( + (ocr_path / f"{page.page_num}.json").read_text() + ), new_width=page.size.width, new_height=page.size.height, ) diff --git a/processing/processing/third_party_code/table.py b/processing/processing/third_party_code/table.py index df6932b34..7bf5e6939 100644 --- a/processing/processing/third_party_code/table.py +++ b/processing/processing/third_party_code/table.py @@ -40,7 +40,9 @@ def merge(self, bb: BorderBox) -> BorderBox: bottom_right_y=max(self.bottom_right_y, bb.bottom_right_y), ) - def box_is_inside_another(self, bb2: BorderBox, threshold: float = 0.9) -> bool: + def box_is_inside_another( + self, bb2: BorderBox, threshold: float = 0.9 + ) -> bool: ( intersection_area, bb1_area, @@ -48,9 +50,13 @@ def box_is_inside_another(self, bb2: BorderBox, threshold: float = 0.9) -> bool: ) = self.get_boxes_intersection_area(other_box=bb2) if intersection_area == 0: return False - return any((intersection_area / bb) > threshold for bb in (bb1_area, bb2_area)) + return any( + (intersection_area / bb) > threshold for bb in (bb1_area, bb2_area) + ) - def box_is_inside_box(self, bb2: BorderBox, threshold: float = 0.95) -> bool: + def box_is_inside_box( + self, bb2: BorderBox, threshold: float = 0.95 + ) -> bool: ( intersection_area, bb1_area, diff --git a/processing/processing/utils/aiohttp_utils.py b/processing/processing/utils/aiohttp_utils.py index 9a3719e3a..7c0318df3 100644 --- a/processing/processing/utils/aiohttp_utils.py +++ b/processing/processing/utils/aiohttp_utils.py @@ -9,7 +9,9 @@ logger = get_logger(__name__) -Response = NamedTuple("Response", [("status_code", int), ("json", Dict[Any, Any])]) +Response = NamedTuple( + "Response", [("status_code", int), ("json", Dict[Any, Any])] +) async def send_request(method: str, url: str, **kwargs: Any) -> Response: @@ -19,7 +21,9 @@ async def send_request(method: str, url: str, **kwargs: Any) -> Response: ) logger.info("Send request to %s. %s, %s", url, method, kwargs) for attempt in range(settings.retry_attempts): - async with http_session.request(method=method, url=url, **kwargs) as resp: + async with http_session.request( + method=method, url=url, **kwargs + ) as resp: if resp.status in settings.retry_statuses: logger.error("Bad status code: %s from %s", resp.status, url) if attempt != settings.retry_attempts - 1: diff --git a/processing/processing/utils/logger.py b/processing/processing/utils/logger.py index 54ec95a64..de0dcebb7 100644 --- a/processing/processing/utils/logger.py +++ b/processing/processing/utils/logger.py @@ -10,7 +10,8 @@ "formatters": { "default": { "()": "uvicorn.logging.DefaultFormatter", - "fmt": "[%(asctime)s] - [%(name)s] - " "[%(levelname)s] - %(message)s", + "fmt": "[%(asctime)s] - [%(name)s] - " + "[%(levelname)s] - %(message)s", "datefmt": "%d-%m-%Y %H:%M:%S", }, }, diff --git a/processing/processing/utils/minio_utils.py b/processing/processing/utils/minio_utils.py index 130438d75..19a0c96a4 100644 --- a/processing/processing/utils/minio_utils.py +++ b/processing/processing/utils/minio_utils.py @@ -32,14 +32,20 @@ def create_minio_config(): elif settings.s3_credentials_provider == "aws_config": # environmental variable AWS_PROFILE_NAME should be set minio_config.update( - {"credentials": AWSConfigProvider(profile=settings.aws_profile_name)} + { + "credentials": AWSConfigProvider( + profile=settings.aws_profile_name + ) + } ) else: raise NotConfiguredException( "s3 connection is not properly configured - " "s3_credentials_provider is not set" ) - logger.info(f"S3_Credentials provider - {settings.s3_credentials_provider}") + logger.info( + f"S3_Credentials provider - {settings.s3_credentials_provider}" + ) return minio_config diff --git a/processing/processing/utils/utils.py b/processing/processing/utils/utils.py index 9a40cc474..40b927595 100644 --- a/processing/processing/utils/utils.py +++ b/processing/processing/utils/utils.py @@ -18,7 +18,9 @@ def get_internal_url(url: str) -> str: def split_iterable(list_a: List[T], chunk_size: int) -> List[List[T]]: """Splits a list passed in chunks with no more, than elements""" - return [list_a[x : chunk_size + x] for x in range(0, len(list_a), chunk_size)] + return [ + list_a[x : chunk_size + x] for x in range(0, len(list_a), chunk_size) + ] @AsyncTTL(time_to_live=60 * 5, maxsize=8) @@ -49,12 +51,16 @@ async def get_files_data( Returns list of dictionaries with data for each file with ids passed in request_body""" elements_per_page_in_dataset_manager = 100 - splatted_files_ids = split_iterable(files_ids, elements_per_page_in_dataset_manager) + splatted_files_ids = split_iterable( + files_ids, elements_per_page_in_dataset_manager + ) all_files_data = [] for batch in splatted_files_ids: params = { "pagination": { - "page_num": len(files_ids) // elements_per_page_in_dataset_manager + 1, + "page_num": len(files_ids) + // elements_per_page_in_dataset_manager + + 1, "page_size": elements_per_page_in_dataset_manager, }, "filters": [{"field": "id", "operator": "in", "value": batch}], diff --git a/processing/tests/integration/test_integration.py b/processing/tests/integration/test_integration.py index 277f2938f..834ea24b9 100644 --- a/processing/tests/integration/test_integration.py +++ b/processing/tests/integration/test_integration.py @@ -57,7 +57,9 @@ def preprocessing_url(module_scoped_container_getter): request_session = requests.Session() retries = Retry(total=5, backoff_factor=1) request_session.mount("http://", HTTPAdapter(max_retries=retries)) - service = module_scoped_container_getter.get("preprocessing").network_info[0] + service = module_scoped_container_getter.get("preprocessing").network_info[ + 0 + ] api_url = f"http://{service.hostname}:{service.host_port}" return api_url @@ -99,7 +101,9 @@ def file_id(minio_client): "Fails with ValueError: Unable to find `/processing/docker-compose.yml` for integration tests." ) def test_minio_ok(minio_url, minio_client, file_id): - objs = minio_client.list_objects(BUCKET, f"files/{file_id}", recursive=True) + objs = minio_client.list_objects( + BUCKET, f"files/{file_id}", recursive=True + ) file_names = [i.object_name for i in objs] assert set(file_names) == { "files/52/52.pdf", @@ -159,7 +163,9 @@ def test_send_request_to_preprocessing( assert response.status_code == 202 sleep(1) - objs = set(minio_client.list_objects(BUCKET, "files/1/ocr", recursive=True)) + objs = set( + minio_client.list_objects(BUCKET, "files/1/ocr", recursive=True) + ) assert set((i.object_name for i in objs)) == { "files/1/ocr/2.json", "files/1/ocr/1.json", diff --git a/processing/tests/test_assets_status.py b/processing/tests/test_assets_status.py index c6258786e..5d7e5a981 100644 --- a/processing/tests/test_assets_status.py +++ b/processing/tests/test_assets_status.py @@ -29,7 +29,10 @@ async def test_send_status_to_assets(mock_preprocessing_task, status): mock.assert_awaited_once_with( method="PUT", url=settings.assets_url, - json={"file": int(mock_preprocessing_task.file_id), "status": status}, + json={ + "file": int(mock_preprocessing_task.file_id), + "status": status, + }, headers={ "X-Current-Tenant": mock_preprocessing_task.tenant, "Authorization": f"Bearer {mock_preprocessing_task.token}", diff --git a/processing/tests/test_text_merge.py b/processing/tests/test_text_merge.py index 047ffcec2..512bd6c9e 100644 --- a/processing/tests/test_text_merge.py +++ b/processing/tests/test_text_merge.py @@ -74,7 +74,9 @@ def test_match_empty_annotations(self): {"type": "text", "bbox": (3, 3, 8, 8), "text": "1 2 3"}, ], } - page = Page(page_num=1, size=PageSize(width=1000, height=1000), objs=[]) + page = Page( + page_num=1, size=PageSize(width=1000, height=1000), objs=[] + ) assert match_page(words=words, page=page) == MatchedPage( page_num=1, paragraph_bboxes={} ) @@ -228,8 +230,12 @@ def test_download(self, _1, tmp_path): bucket="some_bucket", input=Input( pages=[ - Page(page_num=1, size=PageSize(width=10, height=10), objs=[]), - Page(page_num=2, size=PageSize(width=10, height=10), objs=[]), + Page( + page_num=1, size=PageSize(width=10, height=10), objs=[] + ), + Page( + page_num=2, size=PageSize(width=10, height=10), objs=[] + ), ] ), ) diff --git a/processing/tests/test_utils/test_utils.py b/processing/tests/test_utils/test_utils.py index 339ea6617..4f8422c58 100644 --- a/processing/tests/test_utils/test_utils.py +++ b/processing/tests/test_utils/test_utils.py @@ -125,7 +125,10 @@ def test_positive_get_files_data_from_separate_files(jw_token): [1, 2], ) - assert utils.get_files_data([1, 2], "test_tenant", jw_token) == expected_result + assert ( + utils.get_files_data([1, 2], "test_tenant", jw_token) + == expected_result + ) @pytest.mark.skip @@ -174,7 +177,9 @@ def test_get_files_data_from_separate_files_100_elements(jw_token): json=large_mock_files_data, status=200, ) - assert utils.get_files_data(list(range(1, 101)), "test_tenant", jw_token) == ( + assert utils.get_files_data( + list(range(1, 101)), "test_tenant", jw_token + ) == ( large_mock_files_data["data"], list(range(1, 101)), ) @@ -283,7 +288,9 @@ def test_get_files_data_from_separate_files_101_elements(jw_token): } for i in range(1, 102) ] - assert utils.get_files_data(list(range(1, 102)), "test_tenant", jw_token) == ( + assert utils.get_files_data( + list(range(1, 102)), "test_tenant", jw_token + ) == ( expected_files_data, list(range(1, 102)), ) @@ -393,7 +400,9 @@ def test_get_files_data_from_separate_files_111_elements(jw_token): }, status=200, ) - assert utils.get_files_data(list(range(1, 111)), "test_tenant", jw_token) == ( + assert utils.get_files_data( + list(range(1, 111)), "test_tenant", jw_token + ) == ( expected_files_data, list(range(1, 111)), ) @@ -405,7 +414,9 @@ def test_get_files_data_from_separate_files_111_elements(jw_token): def test_get_files_data_from_separate_files_501_code(jw_token): request_body = { "pagination": {"page_num": 1, "page_size": 15}, - "filters": [{"field": "id", "operator": "eq", "value": "some invalid file id"}], + "filters": [ + {"field": "id", "operator": "eq", "value": "some invalid file id"} + ], "sorting": [{"field": "id", "direction": "asc"}], } responses.add( @@ -425,7 +436,9 @@ def test_get_files_data_from_separate_files_501_code(jw_token): # --------------------- TESTING execute_pipeline ------------------------- @pytest.mark.skip @responses.activate -def test_execute_pipeline_negative(jw_token, files_data_for_pipeline, db_test_session): +def test_execute_pipeline_negative( + jw_token, files_data_for_pipeline, db_test_session +): responses.add( responses.POST, diff --git a/scheduler/alembic/env.py b/scheduler/alembic/env.py index e865ca545..f21ea5240 100644 --- a/scheduler/alembic/env.py +++ b/scheduler/alembic/env.py @@ -71,7 +71,9 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, target_metadata=target_metadata + ) with context.begin_transaction(): context.run_migrations() diff --git a/scheduler/alembic/versions/0cadbdb7f0ea_.py b/scheduler/alembic/versions/0cadbdb7f0ea_.py index f0a0ada17..f13053fec 100644 --- a/scheduler/alembic/versions/0cadbdb7f0ea_.py +++ b/scheduler/alembic/versions/0cadbdb7f0ea_.py @@ -25,7 +25,9 @@ def upgrade() -> None: sa.Column("last_heartbeat", sa.DateTime(), nullable=False), sa.PrimaryKeyConstraint("id"), ) - op.add_column("units", sa.Column("runner_id", postgresql.UUID(), nullable=True)) + op.add_column( + "units", sa.Column("runner_id", postgresql.UUID(), nullable=True) + ) # ### end Alembic commands ### diff --git a/scheduler/alembic/versions/449be82736bd_.py b/scheduler/alembic/versions/449be82736bd_.py index 57e2d1924..ff603864d 100644 --- a/scheduler/alembic/versions/449be82736bd_.py +++ b/scheduler/alembic/versions/449be82736bd_.py @@ -18,7 +18,9 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column("units", sa.Column("response_topic", sa.String(), nullable=True)) + op.add_column( + "units", sa.Column("response_topic", sa.String(), nullable=True) + ) # ### end Alembic commands ### diff --git a/scheduler/scheduler/app.py b/scheduler/scheduler/app.py index 136b02670..db894e53d 100644 --- a/scheduler/scheduler/app.py +++ b/scheduler/scheduler/app.py @@ -8,7 +8,9 @@ logger = log.get_logger(__name__) -tenant = tenant_dependency.get_tenant_info(url=config.KEYCLOAK_URI, algorithm="RS256") +tenant = tenant_dependency.get_tenant_info( + url=config.KEYCLOAK_URI, algorithm="RS256" +) app = FastAPI( title="Scheduler", diff --git a/scheduler/scheduler/db/models.py b/scheduler/scheduler/db/models.py index 720f2880e..da6c2000f 100644 --- a/scheduler/scheduler/db/models.py +++ b/scheduler/scheduler/db/models.py @@ -23,7 +23,9 @@ class Unit(Base): # type: ignore created = sqlalchemy.Column( sqlalchemy.DateTime, nullable=False, default=datetime.datetime.utcnow ) - updated = sqlalchemy.Column(sqlalchemy.DateTime, onupdate=datetime.datetime.utcnow) + updated = sqlalchemy.Column( + sqlalchemy.DateTime, onupdate=datetime.datetime.utcnow + ) def __repr__(self) -> str: return ( @@ -47,7 +49,9 @@ def as_dict(self) -> Dict[str, Any]: class Heartbeat(Base): # type: ignore __tablename__ = "heartbeat" - id = sqlalchemy.Column(postgresql.UUID(), primary_key=True, default=uuid.uuid4) + id = sqlalchemy.Column( + postgresql.UUID(), primary_key=True, default=uuid.uuid4 + ) last_heartbeat = sqlalchemy.Column( sqlalchemy.DateTime, nullable=False, default=datetime.datetime.utcnow ) diff --git a/scheduler/scheduler/db/service.py b/scheduler/scheduler/db/service.py index eb2db3566..3acc6388b 100644 --- a/scheduler/scheduler/db/service.py +++ b/scheduler/scheduler/db/service.py @@ -6,7 +6,9 @@ from scheduler.db import models from sqlalchemy import orm -engine = sqlalchemy.create_engine(config.DB_URL, pool_size=int(config.POOL_SIZE)) +engine = sqlalchemy.create_engine( + config.DB_URL, pool_size=int(config.POOL_SIZE) +) Session = orm.sessionmaker(bind=engine, expire_on_commit=False) @@ -48,7 +50,9 @@ def get_expired_heartbeats( ) -def get_not_finished_units(session: orm.Session, runner_id: str) -> List[models.Unit]: +def get_not_finished_units( + session: orm.Session, runner_id: str +) -> List[models.Unit]: """Get units with statuses 'RECEIVED' and 'IN_PROGRESS' with the given runner_id. """ @@ -70,9 +74,9 @@ def change_unit_runner_id_in_lock(session: orm.Session, id_: str) -> None: change status to 'RECEIVED' with 'for update' statement. """ args = {"runner_id": None, "status": unit.UnitStatus.RECEIVED} - session.query(models.Unit).filter(models.Unit.id == id_).with_for_update().update( - args - ) + session.query(models.Unit).filter( + models.Unit.id == id_ + ).with_for_update().update(args) def delete_instances(session: orm.Session, objs: models.TablesList) -> None: diff --git a/scheduler/scheduler/heartbeat.py b/scheduler/scheduler/heartbeat.py index 617fa5d8f..effdc86e8 100644 --- a/scheduler/scheduler/heartbeat.py +++ b/scheduler/scheduler/heartbeat.py @@ -18,7 +18,9 @@ def expire_date() -> datetime.datetime: return datetime.datetime.utcnow() - heartbeat_threshold -def manage_expired_runners(session: orm.Session, producer: AIOKafkaProducer) -> None: +def manage_expired_runners( + session: orm.Session, producer: AIOKafkaProducer +) -> None: """Get expired heartbeats, remove runner_id and change status to 'RECEIVED' from corresponding units. Remove expired heartbeats from db. Runs unfinished units if there are any. @@ -26,9 +28,13 @@ def manage_expired_runners(session: orm.Session, producer: AIOKafkaProducer) -> expired_heartbeats = service.get_expired_heartbeats(session, expire_date()) for expired_heartbeat in expired_heartbeats: runner_id_ = expired_heartbeat.id - not_finished_units = service.get_not_finished_units(session, runner_id_) + not_finished_units = service.get_not_finished_units( + session, runner_id_ + ) for not_finished_unit in not_finished_units: - service.change_unit_runner_id_in_lock(session, not_finished_unit.id) + service.change_unit_runner_id_in_lock( + session, not_finished_unit.id + ) runner.run_orm_unit(producer, not_finished_unit) service.delete_instances(session, expired_heartbeats) @@ -52,5 +58,7 @@ async def heartbeat(producer: AIOKafkaProducer) -> None: with service.Session.begin() as session: service.update_heartbeat_timestamp(session, runner.runner_id) manage_expired_runners(session, producer) - sleep_time_after_heartbeat = time_to_sleep - sleep_time_before_heartbeat + sleep_time_after_heartbeat = ( + time_to_sleep - sleep_time_before_heartbeat + ) await asyncio.sleep(sleep_time_after_heartbeat) diff --git a/scheduler/scheduler/runner.py b/scheduler/scheduler/runner.py index 8627fc497..e8cc25e2e 100644 --- a/scheduler/scheduler/runner.py +++ b/scheduler/scheduler/runner.py @@ -9,7 +9,9 @@ runner_id: str = str(uuid.uuid4()) -async def fetch_and_send(producer: aiokafka.AIOKafkaProducer, unit_: unit.Unit) -> None: +async def fetch_and_send( + producer: aiokafka.AIOKafkaProducer, unit_: unit.Unit +) -> None: """Perform request to the url and send the result to the response topic if the response topic is specified. @@ -38,7 +40,9 @@ async def fetch_and_send(producer: aiokafka.AIOKafkaProducer, unit_: unit.Unit) ) -def run_orm_unit(producer: aiokafka.AIOKafkaProducer, orm_unit: models.Unit) -> None: +def run_orm_unit( + producer: aiokafka.AIOKafkaProducer, orm_unit: models.Unit +) -> None: unit_ = unit.Unit.from_orm(orm_unit) asyncio.create_task(fetch_and_send(producer, unit_)) diff --git a/scheduler/tests/test_heartbeat.py b/scheduler/tests/test_heartbeat.py index 465306a33..b035c7f06 100644 --- a/scheduler/tests/test_heartbeat.py +++ b/scheduler/tests/test_heartbeat.py @@ -22,11 +22,15 @@ def test_expire_date(): def test_manage_expired_runners(testing_session): """Testing manage_expired_runners.""" id_ = str(uuid.uuid4()) - unit_1 = models.Unit(id="unit_1_id", runner_id=id_, status=unit.UnitStatus.DONE) + unit_1 = models.Unit( + id="unit_1_id", runner_id=id_, status=unit.UnitStatus.DONE + ) unit_2 = models.Unit( id="unit_2_id", runner_id=id_, status=unit.UnitStatus.IN_PROGRESS ) - heartbeat_ = models.Heartbeat(id=id_, last_heartbeat=datetime.datetime(2022, 1, 1)) + heartbeat_ = models.Heartbeat( + id=id_, last_heartbeat=datetime.datetime(2022, 1, 1) + ) testing_session.add_all([unit_1, unit_2, heartbeat_]) with mock.patch("scheduler.runner.run_orm_unit"): diff --git a/scheduler/tests/test_service.py b/scheduler/tests/test_service.py index 16f8c4ae4..31a32cf22 100644 --- a/scheduler/tests/test_service.py +++ b/scheduler/tests/test_service.py @@ -14,7 +14,9 @@ def test_get_unit_by_id(testing_session, testing_unit_instance): testing_session.add(testing_unit_instance) testing_session.flush() testing_session.commit() - instance = service.get_unit_by_id(testing_session, testing_unit_instance.id) + instance = service.get_unit_by_id( + testing_session, testing_unit_instance.id + ) assert isinstance(instance, models.Unit) assert instance.id == testing_unit_instance.id diff --git a/search/search/config.py b/search/search/config.py index 5d6027e7d..db6bba644 100644 --- a/search/search/config.py +++ b/search/search/config.py @@ -68,7 +68,9 @@ def annotation_categories_search_url(self) -> str: @property def jobs_search_url(self) -> str: - return "/".join((self.jobs_url.rstrip("/"), self.jobs_search.lstrip("/"))) + return "/".join( + (self.jobs_url.rstrip("/"), self.jobs_search.lstrip("/")) + ) class Config: env_file: str = find_dotenv(".env") diff --git a/search/search/es.py b/search/search/es.py index 6140474e8..4ce5efda5 100644 --- a/search/search/es.py +++ b/search/search/es.py @@ -41,10 +41,14 @@ class NoCategory(NoSuchTenant): pass -async def prepare_index(es_instance: AsyncElasticsearch, index_name: str) -> None: +async def prepare_index( + es_instance: AsyncElasticsearch, index_name: str +) -> None: if not await es_instance.indices.exists(index=index_name): try: - await es_instance.indices.create(index=index_name, body=INDEX_SETTINGS) + await es_instance.indices.create( + index=index_name, body=INDEX_SETTINGS + ) except RequestError as exc: if exc.error == "resource_already_exists_exception": pass @@ -130,16 +134,22 @@ async def build_query( terms_filter = {"terms": {"category": categories_ids}} query["query"]["bool"]["filter"].append(terms_filter) for parameter, value in search_parameters.items(): - query["query"]["bool"]["filter"].append({"term": {parameter: {"value": value}}}) + query["query"]["bool"]["filter"].append( + {"term": {parameter: {"value": value}}} + ) return query -async def add_child_categories(category_id: str, tenant: str, token: str) -> List[str]: +async def add_child_categories( + category_id: str, tenant: str, token: str +) -> List[str]: """Helper function which makes GET request into "annotation" service endpoint and returns list of provided category_id with ids of all subcategories from endpoint's response. """ - child_category_url = f"{settings.annotation_categories_url}/{category_id}/child" + child_category_url = ( + f"{settings.annotation_categories_url}/{category_id}/child" + ) header = {"X-Current-Tenant": tenant, "Authorization": f"Bearer {token}"} try: diff --git a/search/search/harvester.py b/search/search/harvester.py index 53ae27e25..289261d04 100644 --- a/search/search/harvester.py +++ b/search/search/harvester.py @@ -39,7 +39,9 @@ def create_boto3_config(): "s3 connection is not properly configured " "- s3_credentials_provider is not set" ) - logger.info(f"S3_Credentials provider - {settings.s3_credentials_provider}") + logger.info( + f"S3_Credentials provider - {settings.s3_credentials_provider}" + ) return boto3_config @@ -87,7 +89,9 @@ def prepare_es_document( es_document["category"] = document["category"] es_document["bbox"] = document.get("bbox") es_document["tokens"] = document.get("tokens") - return schemas.pieces.GeomObject.parse_obj(es_document) # for input data validation + return schemas.pieces.GeomObject.parse_obj( + es_document + ) # for input data validation def extract_manifest_data( @@ -167,5 +171,7 @@ async def old_pieces_cleaner( async def start_harvester( tenant: str, job_id: int, file_id: Optional[int] = None ) -> None: - await helpers.async_bulk(es.ES, old_pieces_cleaner(tenant, job_id, file_id)) + await helpers.async_bulk( + es.ES, old_pieces_cleaner(tenant, job_id, file_id) + ) await helpers.async_bulk(es.ES, harvester(tenant, job_id, file_id)) diff --git a/search/search/main.py b/search/search/main.py index 4d9394a92..dbe293ea9 100644 --- a/search/search/main.py +++ b/search/search/main.py @@ -18,7 +18,9 @@ }, ] -TOKEN = get_tenant_info(url=settings.keycloak_url, algorithm=settings.jwt_algorithm) +TOKEN = get_tenant_info( + url=settings.keycloak_url, algorithm=settings.jwt_algorithm +) app = fastapi.FastAPI( title=settings.app_title, @@ -59,7 +61,9 @@ def elastic_exception_handler_es_error( @app.exception_handler(BotoCoreError) -def minio_exception_handler_bc_error(request: fastapi.Request, exc: BotoCoreError): +def minio_exception_handler_bc_error( + request: fastapi.Request, exc: BotoCoreError +): return fastapi.responses.JSONResponse( status_code=500, content={"detail": f"Error: connection error ({exc})"}, @@ -179,6 +183,8 @@ async def search_facets( ) -> schemas.facets.FacetsResponse: query = request.build_es_query() elastic_response = await es.ES.search(index=x_current_tenant, body=query) - response = schemas.facets.FacetsResponse.parse_es_response(elastic_response) + response = schemas.facets.FacetsResponse.parse_es_response( + elastic_response + ) await response.adjust_facet_result(x_current_tenant, token.token) return response diff --git a/search/search/schemas/facets.py b/search/search/schemas/facets.py index ae1bc6762..242f4d35a 100644 --- a/search/search/schemas/facets.py +++ b/search/search/schemas/facets.py @@ -51,9 +51,13 @@ def apply_filter(self, query: Dict[str, Any]) -> Dict[str, Any]: continue if self.operator == FacetOperator.IN: - facet_body["filter"]["bool"]["must"].append(self.filter_template) + facet_body["filter"]["bool"]["must"].append( + self.filter_template + ) if self.operator == FacetOperator.NOT_IN: - facet_body["filter"]["bool"]["must_not"].append(self.filter_template) + facet_body["filter"]["bool"]["must_not"].append( + self.filter_template + ) return query @@ -76,7 +80,9 @@ def facet_template(self) -> Dict[str, Any]: self.name: { "filter": {"bool": {"must": [], "must_not": []}}, "aggs": { - self.name: {"terms": {"field": self.name, "size": self.limit}} + self.name: { + "terms": {"field": self.name, "size": self.limit} + } }, } } @@ -88,8 +94,12 @@ class FacetsRequest(BaseModel): description="*Match query in a text type field*", example="Elasticsearch", ) - facets: List[FacetParams] = Field(description="*An array for ES aggregations*") - filters: Optional[List[FilterParams]] = Field(description="*Filters for facets*") + facets: List[FacetParams] = Field( + description="*An array for ES aggregations*" + ) + filters: Optional[List[FilterParams]] = Field( + description="*Filters for facets*" + ) def _build_facets(self, query: Dict[str, Any]) -> Dict[str, Any]: for facet in self.facets: @@ -126,18 +136,26 @@ def build_es_query(self) -> Dict[str, Any]: class AggResult(BaseModel): - id: Union[int, str] = Field(description="*Aggregation key id*", example="Header") + id: Union[int, str] = Field( + description="*Aggregation key id*", example="Header" + ) count: int = Field(description="*Count of aggregated docs*", example=10) name: Optional[str] = Field(description="*A name of a category or a job*") @staticmethod def parse_es_agg_doc(es_doc: Dict[str, Any]) -> "AggResult": - return AggResult(id=es_doc.get("key", ""), count=es_doc.get("doc_count", 0)) + return AggResult( + id=es_doc.get("key", ""), count=es_doc.get("doc_count", 0) + ) class FacetBodyResponse(BaseModel): - name: str = Field(description="*A name of aggregation*", example="category") - values: List[AggResult] = Field(description="*An array aggregation results*") + name: str = Field( + description="*A name of aggregation*", example="category" + ) + values: List[AggResult] = Field( + description="*An array aggregation results*" + ) async def adjust_facet(self, tenant: str, token: str) -> None: if self.name not in settings.computed_fields: diff --git a/search/search/schemas/pieces.py b/search/search/schemas/pieces.py index f4a95d49a..a5c4ec48e 100644 --- a/search/search/schemas/pieces.py +++ b/search/search/schemas/pieces.py @@ -18,7 +18,9 @@ def pieces_condition(properties: Dict[str, Any]) -> List[str]: return [ - el for el in properties if properties[el].get("type") not in __excluded_types + el + for el in properties + if properties[el].get("type") not in __excluded_types ] @@ -31,7 +33,9 @@ class GeomObject(pydantic.BaseModel): content: str = pydantic.Field(..., example="ElasticSearch") document_id: pydantic.conint(ge=1) = pydantic.Field(..., example=1) # type: ignore page_number: pydantic.conint(ge=1) = pydantic.Field(..., example=1) # type: ignore - bbox: Optional[pydantic.conlist(float, min_items=4, max_items=4)] = pydantic.Field( + bbox: Optional[ + pydantic.conlist(float, min_items=4, max_items=4) + ] = pydantic.Field( None, example=[1.5, 1.5, 1.5, 1.5] ) # type: ignore tokens: Optional[List[str]] = pydantic.Field( @@ -83,12 +87,16 @@ def get_filter_template(self) -> Dict[str, Any]: def is_include(self) -> bool: return self.operator in (PieceOperators.IN, PieceOperators.EQ) - async def adjust_for_child_categories(self, tenant: str, token: str) -> List[str]: + async def adjust_for_child_categories( + self, tenant: str, token: str + ) -> List[str]: if not isinstance(self.value, list): self.value = [self.value] tasks = [] for category in self.value: - task = asyncio.create_task(es.add_child_categories(category, tenant, token)) + task = asyncio.create_task( + es.add_child_categories(category, tenant, token) + ) tasks.append(task) res = await asyncio.gather(*tasks) new_categories = list(reduce(lambda a, b: a & b, map(set, res))) @@ -125,9 +133,13 @@ def _build_sorts(self) -> List[Dict[str, Any]]: def _apply_filters(self, query: Dict[str, Any]) -> Dict[str, Any]: for filter_ in self.filters: if filter_.is_include: - query["query"]["bool"]["must"].append(filter_.get_filter_template()) + query["query"]["bool"]["must"].append( + filter_.get_filter_template() + ) if not filter_.is_include: - query["query"]["bool"]["must_not"].append(filter_.get_filter_template()) + query["query"]["bool"]["must_not"].append( + filter_.get_filter_template() + ) return query def _apply_sort(self, query: Dict[str, Any]) -> Dict[str, Any]: @@ -140,7 +152,9 @@ def _apply_es_pagination(self, query: Dict[str, Any]) -> Dict[str, Any]: def _apply_query(self, query: Dict[str, Any]) -> Dict[str, Any]: match = { - "match": {"content": {"query": self.query, "minimum_should_match": "81%"}} + "match": { + "content": {"query": self.query, "minimum_should_match": "81%"} + } } query["query"]["bool"]["must"].append(match) return query @@ -196,7 +210,9 @@ def __make_pag_params( pages = SearchResultSchema2.__calculate_num_pages( pag_in.page_size, total_results ) - return PaginationParams(pag_in.page_num, pag_in.page_size, pages, total_results) + return PaginationParams( + pag_in.page_num, pag_in.page_size, pages, total_results + ) @staticmethod def __calculate_num_pages(page_size: int, total_results: int) -> int: diff --git a/search/tests/conftest.py b/search/tests/conftest.py index 70ed1a321..7a1d2436c 100644 --- a/search/tests/conftest.py +++ b/search/tests/conftest.py @@ -23,7 +23,9 @@ @pytest_asyncio.fixture async def es(): - es_ = AsyncElasticsearch(hosts=settings.es_host_test, port=settings.es_port_test) + es_ = AsyncElasticsearch( + hosts=settings.es_host_test, port=settings.es_port_test + ) yield es_ await es_.indices.delete(index=INDEX_NAME) await es_.close() @@ -31,7 +33,9 @@ async def es(): @pytest_asyncio.fixture async def index_test_data(monkeypatch) -> None: - es_ = AsyncElasticsearch(hosts=settings.es_host_test, port=settings.es_port_test) + es_ = AsyncElasticsearch( + hosts=settings.es_host_test, port=settings.es_port_test + ) monkeypatch.setattr("search.main.ES", es_) await es_.indices.create(index=INDEX_NAME, ignore=400, body=INDEX_SETTINGS) for test_object in TEST_DATA + list(CHILD_CATEGORIES_DATA.values()): @@ -133,8 +137,12 @@ def drop_es_index(moto_s3) -> boto3.resource: @pytest_asyncio.fixture -async def drop_parametrized_index(moto_s3, request, monkeypatch) -> boto3.resource: - es_ = AsyncElasticsearch(hosts=settings.es_host_test, port=settings.es_port_test) +async def drop_parametrized_index( + moto_s3, request, monkeypatch +) -> boto3.resource: + es_ = AsyncElasticsearch( + hosts=settings.es_host_test, port=settings.es_port_test + ) monkeypatch.setattr("search.harvester.ES", es_) yield moto_s3 await es_.indices.delete(index=request.param) diff --git a/search/tests/test_facets.py b/search/tests/test_facets.py index e26c096bc..02d704b4a 100644 --- a/search/tests/test_facets.py +++ b/search/tests/test_facets.py @@ -48,7 +48,9 @@ class TestData: wrong_facet_request_2 = { "query": "some", "facets": [{"name": "some", "limit": 5}], - "filters": [{"field": "some", "operator": "in", "value": ["some1", "some2"]}], + "filters": [ + {"field": "some", "operator": "in", "value": ["some1", "some2"]} + ], } agg_result_1 = {"key": "Header", "doc_count": 10} agg_result_2 = {"key": "Title", "doc_count": 10} @@ -84,9 +86,9 @@ def test_filter_param_template(): obj = facets.FilterParams.parse_obj(TestData.valid_filter_params_in) assert obj.filter_template == { "terms": { - TestData.valid_filter_params_in["field"]: TestData.valid_filter_params_in[ - "value" - ] + TestData.valid_filter_params_in[ + "field" + ]: TestData.valid_filter_params_in["value"] } } @@ -144,7 +146,9 @@ def test_facet_request_build_es_query(): "must_not": [{"terms": {"job_id": [10, 100]}}], } }, - "aggs": {"category": {"terms": {"field": "category", "size": 10}}}, + "aggs": { + "category": {"terms": {"field": "category", "size": 10}} + }, }, "job_id": { "filter": { @@ -182,7 +186,9 @@ def test_facet_request_build_es_query(): "aggs": { "category": { "filter": {"bool": {"must": [], "must_not": []}}, - "aggs": {"category": {"terms": {"field": "category", "size": 5}}}, + "aggs": { + "category": {"terms": {"field": "category", "size": 5}} + }, } }, "size": 0, diff --git a/search/tests/test_get.py b/search/tests/test_get.py index 23c10bb04..76ba251b6 100644 --- a/search/tests/test_get.py +++ b/search/tests/test_get.py @@ -382,7 +382,9 @@ def test_get_child_categories( expected_total_objects: int, expected_text_pieces: List[dict], ): - with patch("search.es.add_child_categories", return_value=annotation_response): + with patch( + "search.es.add_child_categories", return_value=annotation_response + ): response = client.get( settings.text_pieces_path, params=url_params, @@ -415,7 +417,9 @@ def test_no_such_tenant_index(tenant: str): @pytest.mark.asyncio @pytest.mark.unittest -@pytest.mark.parametrize("child_categories", [("category_1", "category_2"), tuple()]) +@pytest.mark.parametrize( + "child_categories", [("category_1", "category_2"), tuple()] +) async def test_add_child_categories(child_categories): with patch( "search.es.fetch", @@ -478,7 +482,8 @@ def test_requests_exception(monkeypatch): ) assert response.status_code == 500 expected_error_response = ( - f"Can't get subcategories for {category_id} " f"due to error {error_message}" + f"Can't get subcategories for {category_id} " + f"due to error {error_message}" ) assert expected_error_response in response.text @@ -498,14 +503,18 @@ def test_facets_endpoint(): } } } - with patch("search.main.es.ES.search", return_value=asyncio.Future()) as mock: + with patch( + "search.main.es.ES.search", return_value=asyncio.Future() + ) as mock: with patch( "search.main.schemas.facets.FacetsResponse.adjust_facet_result", return_value=asyncio.Future(), ) as mock1: mock.return_value.set_result(es_response) mock1.return_value.set_result(None) - resp = client.post("/facets", json=mock_es_query, headers=TEST_HEADERS) + resp = client.post( + "/facets", json=mock_es_query, headers=TEST_HEADERS + ) assert resp.json() == { "facets": [ { diff --git a/search/tests/test_harvester.py b/search/tests/test_harvester.py index aeadad8a1..029d486c9 100644 --- a/search/tests/test_harvester.py +++ b/search/tests/test_harvester.py @@ -29,7 +29,9 @@ "original_annotation_id": 1, "links": ["link_1", "link_2", "link_3"], "category": "Paragraph", - "text": ("Elasticsearch is a search engine based on the Lucene library."), + "text": ( + "Elasticsearch is a search engine based on the Lucene library." + ), "bbox": [20.2, 30.3, 145.5, 120.7], "tokens": None, }, @@ -230,7 +232,8 @@ { "document_id": 1, "page_number": 1, - "content": "Elasticsearch is a search engine " "based on the Lucene library.", + "content": "Elasticsearch is a search engine " + "based on the Lucene library.", "category": "Paragraph", "bbox": [20.2, 30.3, 145.5, 120.7], "job_id": 1, @@ -286,7 +289,9 @@ async def test_start_harvester_total_amount( expected_result: int, es, ): - monkeypatch.setattr("search.harvester.connect_s3", Mock(return_value=moto_s3)) + monkeypatch.setattr( + "search.harvester.connect_s3", Mock(return_value=moto_s3) + ) monkeypatch.setattr("search.es.ES", es) for i in range(amount_of_uploads): await start_harvester(INDEX_NAME, **ids) @@ -315,7 +320,9 @@ async def test_start_harvester_elastic_content( ids, expected_result, ): - monkeypatch.setattr("search.harvester.connect_s3", Mock(return_value=moto_s3)) + monkeypatch.setattr( + "search.harvester.connect_s3", Mock(return_value=moto_s3) + ) monkeypatch.setattr("search.es.ES", es) await start_harvester(INDEX_NAME, **ids) await es.indices.refresh(index=INDEX_NAME) @@ -325,7 +332,9 @@ async def test_start_harvester_elastic_content( @pytest.mark.asyncio @pytest.mark.integration -async def test_start_harvester_no_text_objects(monkeypatch, moto_s3_fail_cases, es): +async def test_start_harvester_no_text_objects( + monkeypatch, moto_s3_fail_cases, es +): monkeypatch.setattr( "search.harvester.connect_s3", Mock(return_value=moto_s3_fail_cases), diff --git a/search/tests/test_indexation_endpoint.py b/search/tests/test_indexation_endpoint.py index 5f8fb1295..f2b493efa 100644 --- a/search/tests/test_indexation_endpoint.py +++ b/search/tests/test_indexation_endpoint.py @@ -18,7 +18,9 @@ @mark.integration @mark.parametrize("job_id", (1, 2, 100)) def test_successful_response(monkeypatch, drop_es_index, job_id, es): - monkeypatch.setattr("search.harvester.connect_s3", Mock(return_value=drop_es_index)) + monkeypatch.setattr( + "search.harvester.connect_s3", Mock(return_value=drop_es_index) + ) monkeypatch.setattr("search.harvester.ES", es) response = client.post( f"{settings.indexation_path}/{job_id}", @@ -56,7 +58,9 @@ def test_no_such_tenant_bucket(drop_parametrized_index, tenant): @mark.integration def test_minio_connection_error(monkeypatch, moto_s3): - monkeypatch.setattr("search.harvester.connect_s3", Mock(side_effect=BotoCoreError)) + monkeypatch.setattr( + "search.harvester.connect_s3", Mock(side_effect=BotoCoreError) + ) response = client.post( f"{settings.indexation_path}/1", headers=TEST_HEADERS, @@ -67,7 +71,9 @@ def test_minio_connection_error(monkeypatch, moto_s3): @mark.integration def test_elasticsearch_connection_error(monkeypatch, moto_s3): - monkeypatch.setattr("search.harvester.connect_s3", Mock(return_value=moto_s3)) + monkeypatch.setattr( + "search.harvester.connect_s3", Mock(return_value=moto_s3) + ) monkeypatch.setattr( "search.harvester.old_pieces_cleaner", Mock(side_effect=ElasticsearchException("ElasticsearchException")), diff --git a/search/tests/test_pieces.py b/search/tests/test_pieces.py index 0da166547..b5bffbb98 100644 --- a/search/tests/test_pieces.py +++ b/search/tests/test_pieces.py @@ -83,7 +83,9 @@ def test_filter_eq(): def test_filter_not_in(): fil = pieces.PieceFilter.validate(TestData.filter_2) assert not fil.is_include - assert fil.get_filter_template() == {"terms": {"category": ["Header", "Table"]}} + assert fil.get_filter_template() == { + "terms": {"category": ["Header", "Table"]} + } @pytest.mark.unittest @@ -116,8 +118,16 @@ def test_request_1(): "from": 0, "size": 50, "sort": [ - {pieces.PIECES_ENUM.CATEGORY: {"order": pieces.PieceSortDirections.ASC}}, - {pieces.PIECES_ENUM.JOB_ID: {"order": pieces.PieceSortDirections.DESC}}, + { + pieces.PIECES_ENUM.CATEGORY: { + "order": pieces.PieceSortDirections.ASC + } + }, + { + pieces.PIECES_ENUM.JOB_ID: { + "order": pieces.PieceSortDirections.DESC + } + }, ], } @@ -149,15 +159,27 @@ def test_request_2(): }, ], "must_not": [ - {"terms": {pieces.PIECES_ENUM.PAGE_NUMBER: [10000, 1000000]}} + { + "terms": { + pieces.PIECES_ENUM.PAGE_NUMBER: [10000, 1000000] + } + } ], } }, "from": 0, "size": 50, "sort": [ - {pieces.PIECES_ENUM.CATEGORY: {"order": pieces.PieceSortDirections.ASC}}, - {pieces.PIECES_ENUM.JOB_ID: {"order": pieces.PieceSortDirections.DESC}}, + { + pieces.PIECES_ENUM.CATEGORY: { + "order": pieces.PieceSortDirections.ASC + } + }, + { + pieces.PIECES_ENUM.JOB_ID: { + "order": pieces.PieceSortDirections.DESC + } + }, ], } @@ -166,7 +188,9 @@ def test_request_2(): @pytest.mark.unittest async def test_adjust_categories(): filter_ = pieces.PieceFilter.validate(TestData.filter_1) - with patch("search.es.add_child_categories", return_value=["Table", "Cell"]): + with patch( + "search.es.add_child_categories", return_value=["Table", "Cell"] + ): await filter_.adjust_for_child_categories("foo", "bar") assert sorted(filter_.value) == sorted(["Header", "Table", "Cell"]) @@ -174,7 +198,9 @@ async def test_adjust_categories(): @pytest.mark.unittest def test_parse_es_response(): pag = pieces.PiecePagination(page_num=1, page_size=10) - resp = pieces.SearchResultSchema2.parse_es_response(TestData.es_response, pag) + resp = pieces.SearchResultSchema2.parse_es_response( + TestData.es_response, pag + ) assert resp.dict() == { "pagination": {"page_num": 1, "page_size": 10, "total": 1, "pages": 1}, "data": [ diff --git a/taxonomy/alembic/env.py b/taxonomy/alembic/env.py index f75a28ddc..663ed2f08 100644 --- a/taxonomy/alembic/env.py +++ b/taxonomy/alembic/env.py @@ -2,9 +2,9 @@ from logging.config import fileConfig from sqlalchemy import engine_from_config, pool +from taxonomy.database import SQLALCHEMY_DATABASE_URL, get_test_db_url from alembic import context # type: ignore -from taxonomy.database import SQLALCHEMY_DATABASE_URL, get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -21,7 +21,9 @@ if not os.getenv("USE_TEST_DB"): config.set_main_option("sqlalchemy.url", SQLALCHEMY_DATABASE_URL) else: - config.set_main_option("sqlalchemy.url", get_test_db_url(SQLALCHEMY_DATABASE_URL)) + config.set_main_option( + "sqlalchemy.url", get_test_db_url(SQLALCHEMY_DATABASE_URL) + ) def run_migrations_offline(): diff --git a/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py b/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py index ea1d48100..0a5a8062f 100644 --- a/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py +++ b/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py @@ -27,7 +27,9 @@ def upgrade() -> None: ["taxonomy_id", "taxonomy_version"], ["taxonomy.id", "taxonomy.version"], ), - sa.PrimaryKeyConstraint("taxonomy_id", "taxonomy_version", "category_id"), + sa.PrimaryKeyConstraint( + "taxonomy_id", "taxonomy_version", "category_id" + ), ) op.drop_column("taxonomy", "category_id") # ### end Alembic commands ### @@ -37,7 +39,9 @@ def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.add_column( "taxonomy", - sa.Column("category_id", sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + "category_id", sa.VARCHAR(), autoincrement=False, nullable=False + ), ) op.drop_table("association_taxonomy_category") # ### end Alembic commands ### diff --git a/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py b/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py index 38b2ad97f..3adec516a 100644 --- a/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py +++ b/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py @@ -49,8 +49,12 @@ def upgrade() -> None: sa.Column("taxonomy_id", sa.VARCHAR(), nullable=True), sa.Column("taxonomy_version", sa.Integer(), nullable=True), sa.Column("parent_id", sa.VARCHAR(), nullable=True), - sa.Column("tree", sqlalchemy_utils.types.ltree.LtreeType(), nullable=True), - sa.ForeignKeyConstraint(["parent_id"], ["taxon.id"], ondelete="cascade"), + sa.Column( + "tree", sqlalchemy_utils.types.ltree.LtreeType(), nullable=True + ), + sa.ForeignKeyConstraint( + ["parent_id"], ["taxon.id"], ondelete="cascade" + ), sa.ForeignKeyConstraint( ["taxonomy_id", "taxonomy_version"], ["taxonomy.id", "taxonomy.version"], @@ -64,14 +68,18 @@ def upgrade() -> None: unique=False, postgresql_using="gist", ) - op.create_index(op.f("ix_taxon_parent_id"), "taxon", ["parent_id"], unique=False) + op.create_index( + op.f("ix_taxon_parent_id"), "taxon", ["parent_id"], unique=False + ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_index(op.f("ix_taxon_parent_id"), table_name="taxon") - op.drop_index("index_taxon_tree", table_name="taxon", postgresql_using="gist") + op.drop_index( + "index_taxon_tree", table_name="taxon", postgresql_using="gist" + ) op.drop_table("taxon") op.drop_table("association_taxonomy_job") op.drop_table("taxonomy") diff --git a/taxonomy/documentation/update_docs.py b/taxonomy/documentation/update_docs.py index 7b2e0a0bb..5b2d93f41 100644 --- a/taxonomy/documentation/update_docs.py +++ b/taxonomy/documentation/update_docs.py @@ -4,7 +4,9 @@ def str_presenter(dumper, data): if "\n" in data: - return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar( + "tag:yaml.org,2002:str", data, style="|" + ) return dumper.represent_scalar("tag:yaml.org,2002:str", data) diff --git a/taxonomy/taxonomy/errors.py b/taxonomy/taxonomy/errors.py index 0fad611d4..3f806a619 100644 --- a/taxonomy/taxonomy/errors.py +++ b/taxonomy/taxonomy/errors.py @@ -54,7 +54,9 @@ def check_field_error_handler(request: Request, exc: CheckFieldError): ) -def field_constraint_error_handler(request: Request, exc: FieldConstraintError): +def field_constraint_error_handler( + request: Request, exc: FieldConstraintError +): return JSONResponse( status_code=400, content={"detail": f"Error: {exc.message}"}, diff --git a/taxonomy/taxonomy/schemas/taxon.py b/taxonomy/taxonomy/schemas/taxon.py index 687f72fc8..c55d6dd1f 100644 --- a/taxonomy/taxonomy/schemas/taxon.py +++ b/taxonomy/taxonomy/schemas/taxon.py @@ -43,7 +43,9 @@ class Config: class ParentsConcatenateResponseSchema(BaseModel): taxon_id: str = Field(..., example="my_taxon_id") taxon_name: str = Field(..., example="taxon_name") - parent_ids_concat: Optional[str] = Field(..., example="parent_id_1.parent_id_2") + parent_ids_concat: Optional[str] = Field( + ..., example="parent_id_1.parent_id_2" + ) parent_names_concat: Optional[str] = Field( ..., example="parent_name_1.parent_name_2" ) diff --git a/taxonomy/taxonomy/schemas/taxonomy.py b/taxonomy/taxonomy/schemas/taxonomy.py index 0bde6ae00..0eb590b42 100644 --- a/taxonomy/taxonomy/schemas/taxonomy.py +++ b/taxonomy/taxonomy/schemas/taxonomy.py @@ -27,7 +27,9 @@ class CategoryLinkSchema(BaseModel): category_id: str = Field( ..., example="123abc", description="Category id to link taxonomy to" ) - job_id: str = Field(..., example="123abc", description="Job id to link taxonomy to") + job_id: str = Field( + ..., example="123abc", description="Job id to link taxonomy to" + ) taxonomy_id: str = Field(..., example="my_taxonomy_id") taxonomy_version: Optional[int] = Field( description="Version of taxonomy", example=1 @@ -35,7 +37,9 @@ class CategoryLinkSchema(BaseModel): class JobTaxonomySchema(BaseModel): - name: str = Field(..., example="taxonomy_name", description="Taxonomy name") + name: str = Field( + ..., example="taxonomy_name", description="Taxonomy name" + ) id: str = Field(..., example="my_taxonomy_id", description="Taxonomy id") version: int = Field(..., example=1, description="Version of taxonomy") category_id: str = Field( diff --git a/taxonomy/taxonomy/taxon/services.py b/taxonomy/taxonomy/taxon/services.py index 6b042e47c..bc663f00b 100644 --- a/taxonomy/taxonomy/taxon/services.py +++ b/taxonomy/taxonomy/taxon/services.py @@ -121,7 +121,9 @@ def insert_taxon_tree( taxon_response = TaxonResponseSchema.from_orm(taxon_db) if taxon_response.parent_id: - taxon_response.parents = [set_parents_is_leaf(taxon) for taxon in parents] + taxon_response.parents = [ + set_parents_is_leaf(taxon) for taxon in parents + ] taxon_response.is_leaf = is_leaf return taxon_response @@ -155,7 +157,9 @@ def update_taxon_db( raise SelfParentError("Taxon cannot be its own parent.") update_query["parent_id"] = ( - update_query["parent_id"] if update_query["parent_id"] != "null" else None + update_query["parent_id"] + if update_query["parent_id"] != "null" + else None ) ex_parent_id = taxon.parent_id new_parent_id = update_query["parent_id"] @@ -239,8 +243,12 @@ def _get_obj_from_request( return taxon_query.all(), pagination -def _extract_taxon(path: str, taxons: Dict[str, Taxon]) -> List[TaxonResponseSchema]: - return [set_parents_is_leaf(taxons[node]) for node in path.split(".")[0:-1]] +def _extract_taxon( + path: str, taxons: Dict[str, Taxon] +) -> List[TaxonResponseSchema]: + return [ + set_parents_is_leaf(taxons[node]) for node in path.split(".")[0:-1] + ] def _get_parents(db: Session, taxons: List[Taxon], tenant: str) -> Parents: @@ -261,7 +269,9 @@ def _get_parents(db: Session, taxons: List[Taxon], tenant: str) -> Parents: return path_to_taxon -def fetch_bunch_taxons_db(db: Session, taxon_ids: Set[str], tenant: str) -> List[Taxon]: +def fetch_bunch_taxons_db( + db: Session, taxon_ids: Set[str], tenant: str +) -> List[Taxon]: taxons = ( db.query(Taxon) .filter( @@ -272,7 +282,9 @@ def fetch_bunch_taxons_db(db: Session, taxon_ids: Set[str], tenant: str) -> List ) .all() ) - taxons_not_exist = {taxon.id for taxon in taxons}.symmetric_difference(taxon_ids) + taxons_not_exist = {taxon.id for taxon in taxons}.symmetric_difference( + taxon_ids + ) error_message = ", ".join(sorted(taxons_not_exist)) if taxons_not_exist: raise NoTaxonError(f"No such taxons: {error_message}") @@ -298,7 +310,9 @@ def filter_taxons( tenant: str, query: Optional[Query] = None, ) -> Page[Union[TaxonResponseSchema, str, dict]]: - taxons_request, pagination = _get_obj_from_request(db, request, tenant, query) + taxons_request, pagination = _get_obj_from_request( + db, request, tenant, query + ) if request.filters and "distinct" in [ item.operator.value for item in request.filters diff --git a/taxonomy/taxonomy/taxonomy/resources.py b/taxonomy/taxonomy/taxonomy/resources.py index 7e6934386..71e9eb0c7 100644 --- a/taxonomy/taxonomy/taxonomy/resources.py +++ b/taxonomy/taxonomy/taxonomy/resources.py @@ -61,7 +61,9 @@ def create_new_taxonomy( raise HTTPException( status_code=400, detail="Header x-current-tenant is required" ) - latest_taxonomy = get_latest_taxonomy(session, taxonomy.id, x_current_tenant) + latest_taxonomy = get_latest_taxonomy( + session, taxonomy.id, x_current_tenant + ) if latest_taxonomy: LOGGER.info( "save_taxonomy find taxonomy with id %s. " @@ -169,8 +171,12 @@ def associate_taxonomy_to_category( else: latests.append(category_link) - taxonomies: dict = batch_versioned_taxonomies(session, versions, x_current_tenant) - taxonomies.update(batch_latest_taxonomies(session, latests, x_current_tenant)) + taxonomies: dict = batch_versioned_taxonomies( + session, versions, x_current_tenant + ) + taxonomies.update( + batch_latest_taxonomies(session, latests, x_current_tenant) + ) not_found_taxonomies = [ link.taxonomy_id @@ -222,7 +228,9 @@ def delete_category_link( session: Session = Depends(get_db), x_current_tenant: str = X_CURRENT_TENANT_HEADER, ) -> Response: - bulk_delete_category_association(session, x_current_tenant, job_id, category_id) + bulk_delete_category_association( + session, x_current_tenant, job_id, category_id + ) return Response(status_code=status.HTTP_204_NO_CONTENT) @@ -261,7 +269,9 @@ def update_taxonomy( if not taxonomy: LOGGER.error("update_taxonomy get not existing id %s", query.id) raise HTTPException(status_code=404, detail="Not existing taxonomy") - taxonomy_db = update_taxonomy_instance(session, taxonomy, query, x_current_tenant) + taxonomy_db = update_taxonomy_instance( + session, taxonomy, query, x_current_tenant + ) return TaxonomyResponseSchema.from_orm(taxonomy_db) @@ -293,7 +303,9 @@ def update_taxonomy_by_id_and_version( (taxonomy_id, version), ) raise HTTPException(status_code=404, detail="Not existing taxonomy") - taxonomy_db = update_taxonomy_instance(session, taxonomy, query, x_current_tenant) + taxonomy_db = update_taxonomy_instance( + session, taxonomy, query, x_current_tenant + ) return TaxonomyResponseSchema.from_orm(taxonomy_db) @@ -402,7 +414,9 @@ def get_taxonomy_by_job_and_category_id( session: Session = Depends(get_db), x_current_tenant: str = X_CURRENT_TENANT_HEADER, ) -> List[TaxonomyResponseSchema]: - taxonomy = get_linked_taxonomies(session, job_id, category_id, x_current_tenant) + taxonomy = get_linked_taxonomies( + session, job_id, category_id, x_current_tenant + ) if not taxonomy: LOGGER.error( "get_taxonomy_by_job_and_category_id get not existing combination" diff --git a/taxonomy/taxonomy/taxonomy/services.py b/taxonomy/taxonomy/taxonomy/services.py index bf281036f..2d793e190 100644 --- a/taxonomy/taxonomy/taxonomy/services.py +++ b/taxonomy/taxonomy/taxonomy/services.py @@ -228,8 +228,12 @@ def bulk_delete_category_association( AssociationTaxonomyCategory.category_id == category_id, ) taxonomy_links.filter( - AssociationTaxonomyCategory.taxonomy_id.in_(tenant_taxonomy.subquery()), - AssociationTaxonomyCategory.taxonomy_version.in_(tenant_taxonomy.subquery()), + AssociationTaxonomyCategory.taxonomy_id.in_( + tenant_taxonomy.subquery() + ), + AssociationTaxonomyCategory.taxonomy_version.in_( + tenant_taxonomy.subquery() + ), ) taxonomy_links.delete(synchronize_session=False) session.commit() @@ -255,6 +259,8 @@ def filter_taxonomies( tenant: str, query: Optional[Query] = None, ) -> Page[Union[TaxonomyResponseSchema, str, dict]]: - taxonomies_request, pagination = _get_obj_from_request(db, request, tenant, query) + taxonomies_request, pagination = _get_obj_from_request( + db, request, tenant, query + ) return paginate(taxonomies_request, pagination) diff --git a/taxonomy/tests/conftest.py b/taxonomy/tests/conftest.py index c489fdd2f..c177ccb6d 100644 --- a/taxonomy/tests/conftest.py +++ b/taxonomy/tests/conftest.py @@ -15,10 +15,19 @@ from alembic import command from alembic.config import Config -from taxonomy.database import SQLALCHEMY_DATABASE_URL, Base, get_db, get_test_db_url +from taxonomy.database import ( + SQLALCHEMY_DATABASE_URL, + Base, + get_db, + get_test_db_url, +) from taxonomy.main import app from taxonomy.models import Taxon, Taxonomy -from taxonomy.schemas import CategoryLinkSchema, TaxonInputSchema, TaxonomyInputSchema +from taxonomy.schemas import ( + CategoryLinkSchema, + TaxonInputSchema, + TaxonomyInputSchema, +) from taxonomy.taxon import services as taxon_services from taxonomy.taxonomy import services as taxonomy_services from taxonomy.token_dependency import TOKEN @@ -80,7 +89,9 @@ def setup_test_db(use_temp_env_var, db_test_engine): # 3. Install 'ltree' extension with db_test_engine.connect() as conn: - conn.execute(sqlalchemy.sql.text("CREATE EXTENSION IF NOT EXISTS ltree")) + conn.execute( + sqlalchemy.sql.text("CREATE EXTENSION IF NOT EXISTS ltree") + ) # 4. run 'alembic upgrade head' alembic_cfg = Config("alembic.ini") @@ -94,7 +105,9 @@ def setup_test_db(use_temp_env_var, db_test_engine): @pytest.fixture -def db_session(db_test_engine, setup_test_db) -> Generator[Session, None, None]: +def db_session( + db_test_engine, setup_test_db +) -> Generator[Session, None, None]: """Creates all tables on setUp, yields SQLAlchemy session and removes tables on tearDown. """ @@ -128,7 +141,9 @@ def taxon_input_data(prepared_taxonomy_record_in_db): @pytest.fixture -def prepared_taxonomy_record_in_db(taxonomy_input_data, db_session) -> Taxonomy: +def prepared_taxonomy_record_in_db( + taxonomy_input_data, db_session +) -> Taxonomy: return taxonomy_services.create_taxonomy_instance( db_session, TEST_TENANTS[0], @@ -234,7 +249,9 @@ def prepare_two_taxons_different_names( @pytest.fixture -def prepare_three_taxons_parent_each_other(db_session, taxon_input_data) -> List[Taxon]: +def prepare_three_taxons_parent_each_other( + db_session, taxon_input_data +) -> List[Taxon]: first_taxon = deepcopy(taxon_input_data) first_id = uuid4().hex @@ -344,7 +361,9 @@ def common_taxon(db_session, prepare_common_tenant_taxonomy): @pytest.fixture -def overrided_token_client(client, db_session) -> Generator[TestClient, None, None]: +def overrided_token_client( + client, db_session +) -> Generator[TestClient, None, None]: app.dependency_overrides[TOKEN] = override app.dependency_overrides[get_db] = lambda: db_session diff --git a/taxonomy/tests/test_taxon_crud.py b/taxonomy/tests/test_taxon_crud.py index c9a79ce85..c5b4d4271 100644 --- a/taxonomy/tests/test_taxon_crud.py +++ b/taxonomy/tests/test_taxon_crud.py @@ -81,9 +81,12 @@ def prepare_parents_concatenate_expected_response(taxons: List[Taxon]) -> dict: { "taxon_id": taxon.id, "taxon_name": taxon.name, - "parent_ids_concat": ".".join(taxon.tree.path.split(".")[:-1]) or None, + "parent_ids_concat": ".".join(taxon.tree.path.split(".")[:-1]) + or None, # Names equal to ids in this test - "parent_names_concat": ".".join(taxon.tree.path.split(".")[:-1]) + "parent_names_concat": ".".join( + taxon.tree.path.split(".")[:-1] + ) or None, } for taxon in taxons @@ -98,7 +101,9 @@ def test_add_taxon_taxonomy_does_not_exist(overrided_token_client): name=uuid.uuid4().hex, taxonomy_id=uuid.uuid4().hex, ) - response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) + response = overrided_token_client.post( + TAXON_PATH, json=data, headers=TEST_HEADER + ) assert response.status_code == 400 assert "Taxonomy with this id doesn't exist" in response.text @@ -115,7 +120,9 @@ def test_add_taxon_self_parent( taxonomy_id=prepared_taxonomy_record_in_db.id, parent_id=taxon_id, ) - response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) + response = overrided_token_client.post( + TAXON_PATH, json=data, headers=TEST_HEADER + ) assert response.status_code == 400 assert "Taxon cannot be its own parent" in response.text @@ -147,7 +154,9 @@ def test_add_taxon_name_empty_string( name="", taxonomy_id=prepared_taxonomy_record_in_db.id, ) - response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) + response = overrided_token_client.post( + TAXON_PATH, json=data, headers=TEST_HEADER + ) assert response.status_code == 400 assert "Taxon name can not be empty" in response.text @@ -163,7 +172,9 @@ def test_add_taxon_specify_version( taxonomy_id=prepared_taxonomy_record_in_db.id, taxonomy_version=prepared_taxonomy_record_in_db.version, ) - response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) + response = overrided_token_client.post( + TAXON_PATH, json=data, headers=TEST_HEADER + ) assert response.status_code == 201 assert response_schema_from_request(data) == response.json() @@ -183,7 +194,9 @@ def test_add_unique_name( taxonomy_id=prepared_taxonomy_record_in_db.id, taxonomy_version=prepared_taxonomy_record_in_db.version, ) - response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) + response = overrided_token_client.post( + TAXON_PATH, json=data, headers=TEST_HEADER + ) assert response.status_code == 201 assert response_schema_from_request(data) == response.json() @@ -198,7 +211,9 @@ def test_add_taxon_id_exists( id_=prepared_taxon_entity_in_db.id, taxonomy_id=prepared_taxonomy_record_in_db.id, ) - response = overrided_token_client.post(TAXON_PATH, json=data, headers=TEST_HEADER) + response = overrided_token_client.post( + TAXON_PATH, json=data, headers=TEST_HEADER + ) assert response.status_code == 400 assert "Taxon id must be unique" in response.text @@ -254,7 +269,9 @@ def test_get_taxon_parents_isleaf( @pytest.mark.integration def test_get_taxon_does_not_exist(overrided_token_client): id_ = uuid.uuid4().hex - response = overrided_token_client.get(f"{TAXON_PATH}/{id_}", headers=TEST_HEADER) + response = overrided_token_client.get( + f"{TAXON_PATH}/{id_}", headers=TEST_HEADER + ) assert response.status_code == 404 assert f"Taxon with id: {id_} doesn't exist" in response.text @@ -278,7 +295,9 @@ def test_update_taxon_duplicate_name( prepare_two_taxons_different_names, ): id_ = prepare_two_taxons_different_names[1].id - taxon_update = prepare_taxon_body(name=prepare_two_taxons_different_names[0].name) + taxon_update = prepare_taxon_body( + name=prepare_two_taxons_different_names[0].name + ) taxon_update.pop("id") response = overrided_token_client.put( @@ -392,7 +411,10 @@ def test_delete_taxon_does_not_exist( f"{TAXON_PATH}/{uuid.uuid4().hex}", headers=TEST_HEADER ) assert delete_response.status_code == 404 - assert "Cannot delete taxon that doesn't exist" in delete_response.json()["detail"] + assert ( + "Cannot delete taxon that doesn't exist" + in delete_response.json()["detail"] + ) @pytest.mark.integration @@ -596,7 +618,9 @@ def test_search_parents_recursive_tree( assert len(taxons) == 2 - assert parent_1 == response_schema_from_request(root.to_dict(), is_leaf=False) + assert parent_1 == response_schema_from_request( + root.to_dict(), is_leaf=False + ) assert parent_2 == response_schema_from_request( second.to_dict(), parents=[response_schema_from_request(root.to_dict(), is_leaf=False)], @@ -621,7 +645,9 @@ def test_get_parents_concatenated_not_found( @pytest.mark.integration -def test_get_parents_concatenated(overrided_token_client, prepared_taxon_hierarchy): +def test_get_parents_concatenated( + overrided_token_client, prepared_taxon_hierarchy +): taxons_search_from = prepared_taxon_hierarchy[:5] taxon_ids = [taxon.id for taxon in taxons_search_from] @@ -635,6 +661,6 @@ def test_get_parents_concatenated(overrided_token_client, prepared_taxon_hierarc taxons = response.json() assert len(taxons) == 5 - assert prepare_parents_concatenate_expected_response(taxons_search_from) == sorted( - response.json(), key=lambda x: x["taxon_id"] - ) + assert prepare_parents_concatenate_expected_response( + taxons_search_from + ) == sorted(response.json(), key=lambda x: x["taxon_id"]) diff --git a/users/tests/keycloak/test_query.py b/users/tests/keycloak/test_query.py index 17fbfa374..bf9084421 100644 --- a/users/tests/keycloak/test_query.py +++ b/users/tests/keycloak/test_query.py @@ -32,7 +32,9 @@ async def test_get_token_v2(request_mock): @pytest.mark.asyncio -async def test_introspect_token_test(request_mock, mocked_token1, mocked_token1_data): +async def test_introspect_token_test( + request_mock, mocked_token1, mocked_token1_data +): request_mock.return_value.__aenter__.return_value.json.return_value = ( mocked_token1_data ) @@ -41,7 +43,9 @@ async def test_introspect_token_test(request_mock, mocked_token1, mocked_token1_ @pytest.mark.asyncio -async def test_get_master_realm_auth_data(request_mock, mocked_admin_auth_data): +async def test_get_master_realm_auth_data( + request_mock, mocked_admin_auth_data +): request_mock.return_value.__aenter__.return_value.json.return_value = ( mocked_admin_auth_data ) diff --git a/users/tests/keycloak/test_utils.py b/users/tests/keycloak/test_utils.py index 69bccc9f9..ae1f60428 100644 --- a/users/tests/keycloak/test_utils.py +++ b/users/tests/keycloak/test_utils.py @@ -28,4 +28,8 @@ def test_create_filters_with_empty_request_body(request_body): def test_create_filters(request_body): users = Users(filters=request_body) filters = kc_utils.create_filters(users) - assert filters == {"name": "h", "id": ["user_id"], "role": "role-annotator"} + assert filters == { + "name": "h", + "id": ["user_id"], + "role": "role-annotator", + } diff --git a/users/tests/test_main.py b/users/tests/test_main.py index 5568eb44d..42c64e9ad 100644 --- a/users/tests/test_main.py +++ b/users/tests/test_main.py @@ -96,7 +96,9 @@ def does_not_raise(): roles=["admin"], tenants=["tenant"], ), - TenantData(token="token", user_id="user_id", roles=[], tenants=["tenant"]), + TenantData( + token="token", user_id="user_id", roles=[], tenants=["tenant"] + ), ], ) def test_check_authorization_role_is_missing(mock_tenant_data): @@ -210,7 +212,9 @@ def test_login_status_code(token_schema, request_body, status_code): class TestGetUserGWT: def test_get_user_jwt_body(self, mock_user, user_representation): response = client.get("/users/current") - assert response.json() == user_representation(user_id="1", user_name="user") + assert response.json() == user_representation( + user_id="1", user_name="user" + ) def test_get_user_jwt_status_code(self, mock_user): response = client.get("/users/current") @@ -221,16 +225,21 @@ def test_get_user_jwt_status_code(self, mock_user): class TestGetUser: def test_get_user_body(self, mock_user, user_representation): response = client.get("/users/user-id") - assert response.json() == user_representation(user_id="1", user_name="user") + assert response.json() == user_representation( + user_id="1", user_name="user" + ) def test_get_user_status_code(self, mock_user): response = client.get("/users/user-id") assert response.status_code == 200 -def test_get_user_info_from_token_introspection(mocked_token1, mocked_token1_data): +def test_get_user_info_from_token_introspection( + mocked_token1, mocked_token1_data +): with patch( - "users.keycloak.query.introspect_token", return_value=mocked_token1_data + "users.keycloak.query.introspect_token", + return_value=mocked_token1_data, ): response = client.get( "/users/current_v2", @@ -335,7 +344,9 @@ def test_add_user_to_tenant2( ("group_1", {"detail": "User has been removed from the tenant"}), ], ) -def test_remove_user_from_tenant_body(mock_user, update_user, tenant, expected_result): +def test_remove_user_from_tenant_body( + mock_user, update_user, tenant, expected_result +): response = client.delete(f"/tenants/{tenant}/users/user_1") assert response.json() == expected_result @@ -356,7 +367,9 @@ def test_remove_user_from_tenant_status_code( @patch("users.keycloak.query.get_users_v2", return_value=mock_all_users) -@patch("users.keycloak.query.get_users_by_role", return_value=mock_users_with_role) +@patch( + "users.keycloak.query.get_users_by_role", return_value=mock_users_with_role +) class TestUsersSearch: @pytest.mark.parametrize("request_body", [{}, {"filters": []}]) def test_get_all_users_body( @@ -385,7 +398,11 @@ def test_filter_users_by_name_body( ): response = client.post( "/users/search", - json={"filters": [{"field": "name", "operator": "like", "value": "r"}]}, + json={ + "filters": [ + {"field": "name", "operator": "like", "value": "r"} + ] + }, ) assert response.json() == [ user_representation(user_id="1", user_name="user"), @@ -397,7 +414,11 @@ def test_filter_users_by_name_status_code( ): response = client.post( "/users/search", - json={"filters": [{"field": "name", "operator": "like", "value": "r"}]}, + json={ + "filters": [ + {"field": "name", "operator": "like", "value": "r"} + ] + }, ) assert response.status_code == 200 @@ -440,14 +461,20 @@ def test_filter_users_by_empty_name_status_code( ): response = client.post( "/users/search", - json={"filters": [{"field": "name", "operator": "like", "value": ""}]}, + json={ + "filters": [{"field": "name", "operator": "like", "value": ""}] + }, ) assert response.status_code == 422 @pytest.mark.parametrize( "request_body", [ - {"filters": [{"field": "id", "operator": "in", "value": ["1", "2"]}]}, + { + "filters": [ + {"field": "id", "operator": "in", "value": ["1", "2"]} + ] + }, { "filters": [ { @@ -479,7 +506,11 @@ def test_filter_users_by_id_body( @pytest.mark.parametrize( "request_body", [ - {"filters": [{"field": "id", "operator": "in", "value": ["1", "2"]}]}, + { + "filters": [ + {"field": "id", "operator": "in", "value": ["1", "2"]} + ] + }, { "filters": [ { @@ -508,7 +539,11 @@ def test_filter_users_by_id_status_code( "request_body", [ {"filters": [{"field": "id", "operator": "in", "value": []}]}, - {"filters": [{"field": "id", "operator": "in", "value": ["wrong_id"]}]}, + { + "filters": [ + {"field": "id", "operator": "in", "value": ["wrong_id"]} + ] + }, ], ) def test_filter_users_by_wrong_or_empty_id_body( @@ -528,7 +563,11 @@ def test_filter_users_by_wrong_or_empty_id_body( "request_body", [ {"filters": [{"field": "id", "operator": "in", "value": []}]}, - {"filters": [{"field": "id", "operator": "in", "value": ["wrong_id"]}]}, + { + "filters": [ + {"field": "id", "operator": "in", "value": ["wrong_id"]} + ] + }, ], ) def test_filter_users_by_wrong_or_empty_id_status_code( @@ -541,7 +580,9 @@ def test_filter_users_by_wrong_or_empty_id_status_code( response = client.post( "/users/search", json={ - "filters": [{"field": "id", "operator": "in", "value": ["wrong_id"]}] + "filters": [ + {"field": "id", "operator": "in", "value": ["wrong_id"]} + ] }, ) assert response.status_code == 200 @@ -589,7 +630,9 @@ def test_filter_users_by_wrong_role_body( response = client.post( "/users/search", json={ - "filters": [{"field": "role", "operator": "eq", "value": "wrong_role"}] + "filters": [ + {"field": "role", "operator": "eq", "value": "wrong_role"} + ] }, ) assert response.status_code == 422 diff --git a/users/users/config.py b/users/users/config.py index 4b65f1ead..f56795777 100644 --- a/users/users/config.py +++ b/users/users/config.py @@ -4,7 +4,9 @@ load_dotenv() -KEYCLOAK_ENDPOINT = os.getenv("KEYCLOAK_DIRECT_ENDPOINT", "http://dev2.badgerdoc.com") +KEYCLOAK_ENDPOINT = os.getenv( + "KEYCLOAK_DIRECT_ENDPOINT", "http://dev2.badgerdoc.com" +) KEYCLOAK_REALM = os.getenv("KEYCLOAK_REALM", "master") KEYCLOAK_ROLE_ADMIN = os.getenv("KEYCLOAK_ROLE_ADMIN", "") KEYCLOAK_USERS_PUBLIC_KEY = os.getenv("KEYCLOAK_USERS_PUBLIC_KEY", "") diff --git a/users/users/keycloak/query.py b/users/users/keycloak/query.py index c1769bcf4..4953687cb 100644 --- a/users/users/keycloak/query.py +++ b/users/users/keycloak/query.py @@ -38,7 +38,9 @@ def create_bearer_header(token: str) -> Dict[str, str]: return {"Authorization": f"Bearer {token}"} -async def create_user(token: str, realm: str, username: str, email: str) -> None: +async def create_user( + token: str, realm: str, username: str, email: str +) -> None: """Create user""" url = resources.users_uri.substitute(realm=realm) method = "POST" @@ -54,7 +56,9 @@ async def create_user(token: str, realm: str, username: str, email: str) -> None return -async def get_users_by_role(token: str, realm: str, role: str) -> List[schemas.User]: +async def get_users_by_role( + token: str, realm: str, role: str +) -> List[schemas.User]: """Get list of users from keycloak by role""" url = resources.users_by_role_uri.substitute(realm=realm, role=role) @@ -71,7 +75,8 @@ async def get_users_by_role(token: str, realm: str, role: str) -> List[schemas.U async def get_token_v2( - realm: str, request_form: Union[schemas.TokenRequest, schemas.RefreshTokenRequest] + realm: str, + request_form: Union[schemas.TokenRequest, schemas.RefreshTokenRequest], ) -> schemas.TokenResponse: """Get access token. @@ -93,7 +98,9 @@ async def get_token_v2( return schemas.TokenResponse.parse_obj(await resp.json()) -async def get_users_v2(realm: str, token: str, **filters: Any) -> List[schemas.User]: +async def get_users_v2( + realm: str, token: str, **filters: Any +) -> List[schemas.User]: """Get users from realm, filtered according to filters. :param realm: Keycloak realm. @@ -165,14 +172,18 @@ async def introspect_token(token: str) -> Token_Data: ) return data_to_return except aiohttp.ClientConnectionError as e: - logger.Logger.error("Exception while sending request to Keycloak: %s", e) + logger.Logger.error( + "Exception while sending request to Keycloak: %s", e + ) raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=f"Exception while sending request to Keycloak: {e}", ) -async def get_groups(realm: str, token: str, name: str = None) -> List[schemas.Group]: +async def get_groups( + realm: str, token: str, name: str = None +) -> List[schemas.Group]: """Get group from realm by its name. :param realm: Keycloak realm. @@ -216,7 +227,9 @@ async def create_group(realm: str, token: str, group: schemas.Group) -> None: return -async def update_user(realm: str, token: str, user_id: str, upd: schemas.User) -> None: +async def update_user( + realm: str, token: str, user_id: str, upd: schemas.User +) -> None: """Update user. :param realm: Keycloak realm. @@ -240,7 +253,9 @@ async def update_user(realm: str, token: str, user_id: str, upd: schemas.User) - async def execute_action_email(token: str, realm: str, user_id: str) -> None: """Send email to user for updating user profile""" - url = resources.execute_actions_email_uri.substitute(realm=realm, id=user_id) + url = resources.execute_actions_email_uri.substitute( + realm=realm, id=user_id + ) method = "PUT" headers = create_bearer_header(token) payload = ["UPDATE_PROFILE", "UPDATE_PASSWORD"] @@ -265,7 +280,8 @@ async def get_master_realm_auth_data() -> AuthData: } url = resources.token_uri.substitute(realm="master") logger.Logger.info( - "Sending request to Keycloak url: %s to get admin auth data, " "payload: %s", + "Sending request to Keycloak url: %s to get admin auth data, " + "payload: %s", url, payload, ) @@ -277,11 +293,15 @@ async def get_master_realm_auth_data() -> AuthData: data=payload, ) as resp: data = await resp.json() - data_to_return: AuthData = data # casting into TypedDict for linter checks + data_to_return: AuthData = ( + data # casting into TypedDict for linter checks + ) return data_to_return except aiohttp.ClientConnectionError as e: - logger.Logger.error("Exception while sending request to Keycloak: %s", e) + logger.Logger.error( + "Exception while sending request to Keycloak: %s", e + ) raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=f"Exception while sending request to Keycloak: {e}", @@ -308,7 +328,9 @@ async def get_identity_providers_data( return await resp.json() except aiohttp.ClientConnectionError as e: - logger.Logger.error("Exception while sending request to Keycloak: %s", e) + logger.Logger.error( + "Exception while sending request to Keycloak: %s", e + ) raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=f"Exception while sending request to Keycloak: {e}", diff --git a/users/users/keycloak/resources.py b/users/users/keycloak/resources.py index 8eb7f560e..bf07f4c94 100644 --- a/users/users/keycloak/resources.py +++ b/users/users/keycloak/resources.py @@ -38,5 +38,7 @@ def join_paths(*args: str) -> str: join_paths(_base_uri, _oidc_uri, "token", "introspect") ) identity_providers_uri = Template( - join_paths(_base_uri, "admin", _realm_uri, "identity-provider", "instances") + join_paths( + _base_uri, "admin", _realm_uri, "identity-provider", "instances" + ) ) diff --git a/users/users/main.py b/users/users/main.py index bb7df442b..0ce47f175 100644 --- a/users/users/main.py +++ b/users/users/main.py @@ -27,7 +27,9 @@ realm = conf.KEYCLOAK_REALM minio_client = s3.get_minio_client() -tenant = get_tenant_info(KEYCLOAK_USERS_PUBLIC_KEY, algorithm="RS256", debug=True) +tenant = get_tenant_info( + KEYCLOAK_USERS_PUBLIC_KEY, algorithm="RS256", debug=True +) def check_authorization(token: TenantData, role: str) -> None: @@ -36,13 +38,19 @@ def check_authorization(token: TenantData, role: str) -> None: @app.middleware("http") -async def request_error_handler(request: Request, call_next: Callable[..., Any]) -> Any: +async def request_error_handler( + request: Request, call_next: Callable[..., Any] +) -> Any: try: return await call_next(request) except aiohttp.ClientResponseError as err: - return JSONResponse(status_code=err.status, content={"detail": err.message}) + return JSONResponse( + status_code=err.status, content={"detail": err.message} + ) except AIOHTTPException as err: - return JSONResponse(status_code=err.status_code, content={"detail": err.reason}) + return JSONResponse( + status_code=err.status_code, content={"detail": err.reason} + ) @app.post( @@ -95,7 +103,9 @@ async def user_registration( realm=realm, token=token.token, email=email, exact="true" ) user_id = user[0].id - await kc_query.execute_action_email(token=token.token, realm=realm, user_id=user_id) + await kc_query.execute_action_email( + token=token.token, realm=realm, user_id=user_id + ) return {"detail": "User has been created"} @@ -138,13 +148,17 @@ async def get_user( return await kc_query.get_user(realm, token.token, user_id) -@app.get("/tenants", status_code=200, response_model=List[str], tags=["tenants"]) +@app.get( + "/tenants", status_code=200, response_model=List[str], tags=["tenants"] +) async def get_tenants( token: TenantData = Depends(tenant), current_tenant: Optional[str] = Header(None, alias="X-Current-Tenant"), ) -> List[str]: """Get all tenants.""" - return [group.name for group in await kc_query.get_groups(realm, token.token)] + return [ + group.name for group in await kc_query.get_groups(realm, token.token) + ] @app.post( @@ -164,7 +178,9 @@ async def create_tenant( try: s3.create_bucket(minio_client, bucket) except MaxRetryError: - raise HTTPException(status_code=503, detail="Cannot connect to the Minio.") + raise HTTPException( + status_code=503, detail="Cannot connect to the Minio." + ) tenant_ = kc_schemas.Group(name=tenant) await kc_query.create_group(realm, token.token, tenant_) return {"detail": "Tenant has been created"} @@ -230,7 +246,9 @@ async def get_users_by_filter( role=filters.get("role").value, # type: ignore ) else: - users_list = await kc_query.get_users_v2(realm=realm, token=token.token) + users_list = await kc_query.get_users_v2( + realm=realm, token=token.token + ) users_list = kc_schemas.User.filter_users( users=users_list, diff --git a/users/users/schemas.py b/users/users/schemas.py index bbe6831dc..ac11db41f 100644 --- a/users/users/schemas.py +++ b/users/users/schemas.py @@ -51,4 +51,6 @@ class FilterUserUserName(BaseModel): class Users(BaseModel): - filters: Optional[List[Union[FilterUserUserName, FilterUserUserID, FilterRole]]] + filters: Optional[ + List[Union[FilterUserUserName, FilterUserUserID, FilterRole]] + ] From 67c47fed8da09d621d03692dcf165de511b34a05 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Fri, 17 Feb 2023 17:57:21 +0400 Subject: [PATCH 14/22] refactor: formatting --- annotation/annotation/distribution/main.py | 9 ++++++--- annotation/annotation/schemas/tasks.py | 8 ++++++-- annotation/tests/test_post.py | 18 ++++++++++++------ 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/annotation/annotation/distribution/main.py b/annotation/annotation/distribution/main.py index 37b66ba91..08c47ddc0 100644 --- a/annotation/annotation/distribution/main.py +++ b/annotation/annotation/distribution/main.py @@ -100,8 +100,12 @@ def distribute( db.flush() annotated_files_pages = {} # no pages distributed for annotation yet - annotators = [x.__dict__ for x in annotators if x.default_load] # type: ignore - validators = [x.__dict__ for x in validators if x.default_load] # type: ignore + annotators = [ + x.__dict__ for x in annotators if x.default_load + ] # type: ignore + validators = [ + x.__dict__ for x in validators if x.default_load + ] # type: ignore if annotators: if ( validation_type == ValidationSchema.extensive_coverage @@ -172,7 +176,6 @@ def distribute_tasks_extensively( extensive_coverage: int, deadline: Optional[datetime] = None, ) -> List[Task]: - calculate_users_load( files=files, users=users, diff --git a/annotation/annotation/schemas/tasks.py b/annotation/annotation/schemas/tasks.py index b7ce00e65..5bfd7d9a3 100644 --- a/annotation/annotation/schemas/tasks.py +++ b/annotation/annotation/schemas/tasks.py @@ -33,7 +33,9 @@ class TaskStatusSchema(BaseModel): class ManualAnnotationTaskInSchema(BaseModel): file_id: int = Field(..., example=2) - pages: Set[int] = Field(..., ge=1, min_items=1, example={1, 2, 3}) # type: ignore + pages: Set[int] = Field( + ..., ge=1, min_items=1, example={1, 2, 3} + ) # type: ignore job_id: int = Field(..., example=3) user_id: UUID = Field(..., example="4e9c5839-f63b-49c8-b918-614b87813e53") is_validation: bool = Field(default=False, example=False) @@ -58,7 +60,9 @@ class UserSchema(BaseModel): class ExpandedManualAnnotationTaskSchema(TaskStatusSchema): - pages: Set[int] = Field(..., ge=1, min_items=1, example={1, 2, 3}) # type: ignore + pages: Set[int] = Field( + ..., ge=1, min_items=1, example={1, 2, 3} + ) # type: ignore user: UserSchema is_validation: bool = Field(default=False, example=False) deadline: Optional[datetime] = Field(None, example="2021-10-19 01:01:01") diff --git a/annotation/tests/test_post.py b/annotation/tests/test_post.py index 3f2a719f6..ded16b5ae 100644 --- a/annotation/tests/test_post.py +++ b/annotation/tests/test_post.py @@ -474,7 +474,8 @@ def test_post_tasks_only_files( expected_tasks_number, ): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=returned_files), ) response = client.post( @@ -521,7 +522,8 @@ def test_post_tasks_new_user(monkeypatch, prepare_db_for_post): TASK_INFO_NEW_USER["user_ids"][1] ) monkeypatch.setattr( - "annotation.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=FILES_FROM_ASSETS_FOR_TASK_INFO_NEW_USER), ) response = client.post( @@ -583,7 +585,8 @@ def test_post_tasks_deadline( assets_files, ): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=assets_files), ) response = client.post( @@ -598,7 +601,8 @@ def test_post_tasks_deadline( @pytest.mark.integration def test_post_tasks_validation_only(monkeypatch, prepare_db_for_post): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=[FILES_FROM_ASSETS_FOR_TASK_INFO[2][0]]), ) tasks_info = { @@ -631,7 +635,8 @@ def test_post_tasks_wrong_files( returned_files, ): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=returned_files), ) response = client.post( @@ -688,7 +693,8 @@ def test_post_tasks_users_validation_error( assets_files, ): monkeypatch.setattr( - "annotation.microservice_communication.assets_communication.get_response", + "annotation.microservice_communication.assets_communication." + "get_response", Mock(return_value=assets_files), ) response = client.post( From 4b623ee16ec652b0f2eab1fdbdaf2fb9466197c9 Mon Sep 17 00:00:00 2001 From: Andrei Borisevich Date: Fri, 17 Feb 2023 23:24:57 +0300 Subject: [PATCH 15/22] Add migration --- assets/.env | 2 +- dev_runner/conf/shared.env | 26 ++++++++++++++++---------- dev_runner/migration.sh | 17 +++++++++++++++++ 3 files changed, 34 insertions(+), 11 deletions(-) create mode 100644 dev_runner/migration.sh diff --git a/assets/.env b/assets/.env index 8483ad65e..72d4a11ea 100644 --- a/assets/.env +++ b/assets/.env @@ -18,7 +18,7 @@ S3_CREDENTIALS_PROVIDER=minio S3_PREFIX= S3_ENDPOINT=minio:9000 S3_ACCESS_KEY=minioadmin -S3_SECRET_KEY =minioadmin +S3_SECRET_KEY=minioadmin TEST_REGION=us-west-2 MINIO_SECURE_CONNECTION=False diff --git a/dev_runner/conf/shared.env b/dev_runner/conf/shared.env index e3c7cb01f..532287a17 100644 --- a/dev_runner/conf/shared.env +++ b/dev_runner/conf/shared.env @@ -1,11 +1,17 @@ -POSTGRES_HOST=postgres -DB_HOST=${POSTGRES_HOST} -POSTGRES_PORT=5432 -DB_PORT=${POSTGRES_PORT} -POSTGRES_USER=postgres -DB_USERNAME=${POSTGRES_USER} -POSTGRES_PASSWORD=postgres -DB_PASSWORD=${POSTGRES_PASSWORD} +export POSTGRES_HOST=0.0.0.0 +export DB_HOST=${POSTGRES_HOST} +export POSTGRES_PORT=5432 +export DB_PORT=${POSTGRES_PORT} +export POSTGRES_USER=postgres +export DB_USERNAME=${POSTGRES_USER} +export POSTGRES_PASSWORD=postgres +export DB_PASSWORD=${POSTGRES_PASSWORD} + +# ASSETS +export DATABASE_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${ASSETS_DB}" + +# JOBS +export POSTGRESQL_JOBMANAGER_DATABASE_URI="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/job_manager" S3_ENDPOINT_URL=http://localhost MINIO_HOST=${S3_ENDPOINT_URL} @@ -74,12 +80,12 @@ KEYCLOAK_URI=${KEYCLOAK_URL} GOTENBERG=gotenberg:3000 GOTENBERG_LIBRE_OFFICE_ENDPOINT="http://${GOTENBERG}/forms/libreoffice/convert" GOTENBERG_FORMATS=[".txt",".docx",".doc",".bib",".xml",".fodt",".html",".ltx",".odt",".ott",".pdb",".psw",".rtf",".sdw",".stw",".sxw",".uot",".vor",".wps",".epub",".emf",".fodg",".met",".odd",".otg",".std",".svg",".svm",".swf",".sxd",".sxw",".tiff",".xhtml",".xpm",".fodp",".potm",".pot",".pptx",".pps",".ppt",".pwp",".sda",".sdd",".sti",".sxi",".uop",".wmf",".odp"] -IMAGE_FORMATS=[".png",".bmp", ".pbm", ".pct", ".pgm", ".ppm", ".ras", ".tiff"] +IMAGE_FORMATS=[".png",".bmp",".pbm",".pct",".pgm",".ppm",".ras",".tiff"] ROOT_PATH= LOG_LEVEL=DEBUG ES_HOST_TEST=localhost -ES_HOST=${ES_HOST_TEST +ES_HOST=${ES_HOST_TEST} ES_PORT_TEST=9200 ES_PORT=${ES_PORT_TEST} diff --git a/dev_runner/migration.sh b/dev_runner/migration.sh new file mode 100644 index 000000000..410bc1a14 --- /dev/null +++ b/dev_runner/migration.sh @@ -0,0 +1,17 @@ +SHARED_PATH=$(realpath "./conf/shared.env") +ASSETS_PATH="./conf/assets.env" + + +for service in "assets" "annotation" "jobs" "pipelines" "processing" "taxonomy" +do + echo "Migrate database for :"$service + cd "../"$service + source $SHARED_PATH && alembic upgrade head +done + + +#cd ../models +#source $SHARED_PATH && alembic upgrade head + +#cd ../scheduler +#source $SHARED_PATH && alembic upgrade head \ No newline at end of file From e44b4ffc9202db77e5c402b70a81faa1c7a22545 Mon Sep 17 00:00:00 2001 From: Andrei Borisevich Date: Sat, 18 Feb 2023 20:36:30 +0300 Subject: [PATCH 16/22] fix: add migration for scheduler --- scheduler/alembic.ini | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scheduler/alembic.ini b/scheduler/alembic.ini index cc418411f..2c518b7e0 100644 --- a/scheduler/alembic.ini +++ b/scheduler/alembic.ini @@ -7,6 +7,10 @@ script_location = alembic # template used to generate migration files # file_template = %%(rev)s_%%(slug)s +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + # timezone to use when rendering the date # within the migration file as well as the filename. # string value is passed to dateutil.tz.gettz() From a1e0002f6fc89f42410eb0d7010f289296de8a54 Mon Sep 17 00:00:00 2001 From: Andrei Borisevich Date: Sat, 18 Feb 2023 20:42:33 +0300 Subject: [PATCH 17/22] fix: add migration for models --- dev_runner/README.md | 5 ++++- dev_runner/conf/shared.env | 3 +++ dev_runner/migration.sh | 8 +------- 3 files changed, 8 insertions(+), 8 deletions(-) mode change 100644 => 100755 dev_runner/migration.sh diff --git a/dev_runner/README.md b/dev_runner/README.md index e8c2751f6..fe877495d 100644 --- a/dev_runner/README.md +++ b/dev_runner/README.md @@ -34,7 +34,10 @@ There is a row of external dependencies, to run them you need to use docker-comp ### Run the BadgerDoc -> TODO: DB migrations +To run the migration for all services you need to run external dependencies first and then run following command: +```bash +bash ./migration.sh +``` To run the all the services you need to run the following command: ```bash diff --git a/dev_runner/conf/shared.env b/dev_runner/conf/shared.env index 532287a17..1a0e58726 100644 --- a/dev_runner/conf/shared.env +++ b/dev_runner/conf/shared.env @@ -13,6 +13,9 @@ export DATABASE_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD} # JOBS export POSTGRESQL_JOBMANAGER_DATABASE_URI="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/job_manager" +# MODELS +export POSTGRES_DB="models" + S3_ENDPOINT_URL=http://localhost MINIO_HOST=${S3_ENDPOINT_URL} MINIO_PUBLIC_HOST=${MINIO_HOST} diff --git a/dev_runner/migration.sh b/dev_runner/migration.sh old mode 100644 new mode 100755 index 410bc1a14..f682da2c4 --- a/dev_runner/migration.sh +++ b/dev_runner/migration.sh @@ -2,16 +2,10 @@ SHARED_PATH=$(realpath "./conf/shared.env") ASSETS_PATH="./conf/assets.env" -for service in "assets" "annotation" "jobs" "pipelines" "processing" "taxonomy" +for service in "assets" "annotation" "jobs" "models" "pipelines" "processing" "scheduler" "taxonomy" do echo "Migrate database for :"$service cd "../"$service source $SHARED_PATH && alembic upgrade head done - -#cd ../models -#source $SHARED_PATH && alembic upgrade head - -#cd ../scheduler -#source $SHARED_PATH && alembic upgrade head \ No newline at end of file From 513054f5a5bb26ea934b2b9ae81f91ec39b39a5f Mon Sep 17 00:00:00 2001 From: Andrei Borisevich Date: Sat, 18 Feb 2023 20:44:03 +0300 Subject: [PATCH 18/22] fix: remove python-magic-bin from dependency list --- dev_runner/pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/dev_runner/pyproject.toml b/dev_runner/pyproject.toml index 774cd1f8d..c08acd59d 100644 --- a/dev_runner/pyproject.toml +++ b/dev_runner/pyproject.toml @@ -104,7 +104,6 @@ aiosignal = "*" alembic = "*" tenant-dependency = {path = "../lib/tenants"} filter-lib = {path = "../lib/filter_lib"} -python-magic-bin = "^0.4.14" [build-system] From 958b8c6e4aa81e98e219779bee4617513612d272 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Mon, 20 Feb 2023 15:00:39 +0400 Subject: [PATCH 19/22] fix pipeliens --- .github/workflows/processing.yml | 2 ++ assets/tests/test_utils.py | 3 ++- lib/filter_lib/usage_example/app.py | 4 +--- pipelines/requirements.txt | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/processing.yml b/.github/workflows/processing.yml index 1589607b9..525904242 100644 --- a/.github/workflows/processing.yml +++ b/.github/workflows/processing.yml @@ -20,6 +20,8 @@ jobs: run: cd infra/docker/python_base && make build image_name=818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc/python_base:0.1.7 - name: run docker-compose up run: cd processing && docker-compose up -d --build + - name: install pytest + run: docker exec processing_web_app bash -c "pip install pytest" - name: run tests run: docker exec processing_web_app bash -c "pytest" - name: check with flake8 diff --git a/assets/tests/test_utils.py b/assets/tests/test_utils.py index 19cf374c0..0864d79e1 100644 --- a/assets/tests/test_utils.py +++ b/assets/tests/test_utils.py @@ -187,7 +187,8 @@ def test_file_processor_is_file_updated_status_not_updated(update_file_status): @patch("assets.utils.common_utils.FileProcessor.is_file_updated") @patch("assets.utils.common_utils.FileProcessor.is_blank_is_created") @patch( - "assets.utils.common_utils.FileProcessor.is_original_file_uploaded_to_storage" + "assets.utils.common_utils.FileProcessor." + "is_original_file_uploaded_to_storage" ) @patch("assets.utils.common_utils.FileProcessor.is_uploaded_to_storage") @patch("assets.utils.common_utils.FileProcessor.is_inserted_to_database") diff --git a/lib/filter_lib/usage_example/app.py b/lib/filter_lib/usage_example/app.py index 19882eb70..e3f18d07e 100644 --- a/lib/filter_lib/usage_example/app.py +++ b/lib/filter_lib/usage_example/app.py @@ -57,9 +57,7 @@ def create_new_user( @app.post("/users/search", tags=["users"], response_model=Page[UserOut]) -def search_users( - request: UserFilterModel, session: Session = Depends(get_db) # type: ignore # noqa -) -> Page[UserOut]: +def search_users(request: UserFilterModel, session: Session = Depends(get_db)) -> Page[UserOut]: # type: ignore # noqa query = session.query(User) filter_args = map_request_to_filter(request.dict(), "User") # type: ignore query, pagination = form_query(filter_args, query) diff --git a/pipelines/requirements.txt b/pipelines/requirements.txt index 53b9b4646..81b85f1b8 100644 --- a/pipelines/requirements.txt +++ b/pipelines/requirements.txt @@ -3,7 +3,7 @@ fastapi~=0.70.0 pydantic==1.8.2 psycopg2-binary==2.9.1 sqlalchemy==1.3.23 -SQLAlchemy-Utils==0.37.8 +SQLAlchemy-Utils~=0.38.3 uvicorn[standard]==0.15.0 minio==7.1.1 alembic==1.7.5 From 844754fd42d8dffb9ea0b90bcbcaf262f2adf506 Mon Sep 17 00:00:00 2001 From: yarnaid Date: Mon, 20 Feb 2023 15:14:43 +0400 Subject: [PATCH 20/22] fix pipeliens --- pipelines/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelines/Dockerfile b/pipelines/Dockerfile index c831dd29e..1e9db464e 100644 --- a/pipelines/Dockerfile +++ b/pipelines/Dockerfile @@ -3,14 +3,14 @@ FROM ${base_image} as build WORKDIR /opt/pipeline_executor -COPY pipelines src +COPY pipelines pipelines COPY alembic alembic COPY alembic.ini wait-for-it.sh .env requirements.txt version.txt setup.py ./ RUN python3 -m pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt && pip install . ENV PYTHONPATH /opt/pipeline_executor -CMD alembic upgrade head && uvicorn src.app:app --host 0.0.0.0 --port 8080 +CMD alembic upgrade head && uvicorn pipelines.app:app --host 0.0.0.0 --port 8080 FROM build AS development @@ -21,7 +21,7 @@ RUN pip install --no-cache-dir -r requirements_dev.txt && python3 -m pip install FROM sonarsource/sonar-scanner-cli:4.6 AS sonar -COPY pipelines /sonar/src +COPY pipelines /sonar/pipelines COPY alembic /sonar/alembic COPY tests /sonar/tests COPY sonar-project.properties /sonar/sonar-project.properties From 3990ca33ea099581e4e058552170f75df97dfbcf Mon Sep 17 00:00:00 2001 From: yarnaid Date: Mon, 20 Feb 2023 15:21:20 +0400 Subject: [PATCH 21/22] refactor: isort for pipelines --- pipelines/alembic/env.py | 6 +++--- .../0ab5e65cf34b_fix_default_type_to_inference.py | 3 +-- ...499e2b_add_original_pipeline_id_and_is_latest.py | 2 +- .../b0cbaebbddd8_change_pipeline_version_to_int.py | 2 +- pipelines/pipelines/app.py | 13 +++++++------ pipelines/pipelines/db/logger.py | 7 ++++--- pipelines/pipelines/db/service.py | 5 +++-- pipelines/pipelines/execution.py | 11 ++++++----- pipelines/pipelines/http_utils.py | 1 + pipelines/pipelines/kafka_utils.py | 1 + pipelines/pipelines/pipeline_runner.py | 1 + pipelines/pipelines/result_processing.py | 3 ++- pipelines/pipelines/schemas.py | 3 ++- pipelines/tests/db/test_logger.py | 2 +- pipelines/tests/test_http_utils.py | 2 +- 15 files changed, 35 insertions(+), 27 deletions(-) diff --git a/pipelines/alembic/env.py b/pipelines/alembic/env.py index f27c17a18..3754b3c1c 100644 --- a/pipelines/alembic/env.py +++ b/pipelines/alembic/env.py @@ -1,12 +1,12 @@ import os from logging.config import fileConfig -import pipelines.config as settings -from pipelines.db.models import Base -from pipelines.db.service import get_test_db_url from sqlalchemy import engine_from_config, pool +import pipelines.config as settings from alembic import context +from pipelines.db.models import Base +from pipelines.db.service import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py index 690dc25c3..a1dbc20c6 100644 --- a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py +++ b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py @@ -5,11 +5,10 @@ Create Date: 2022-04-26 19:37:27.263471 """ -import sqlalchemy as sa -from pipelines.db import models from sqlalchemy import orm from alembic import op +from pipelines.db import models # revision identifiers, used by Alembic. revision = "0ab5e65cf34b" diff --git a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py index e4e062c6d..46a937c4c 100644 --- a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py +++ b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py @@ -6,10 +6,10 @@ """ import sqlalchemy as sa -from pipelines.db import models from sqlalchemy import orm from alembic import op +from pipelines.db import models # revision identifiers, used by Alembic. revision = "764961499e2b" diff --git a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py index 6638259bf..cf22b1af8 100644 --- a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py +++ b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py @@ -6,10 +6,10 @@ """ import sqlalchemy as sa -from pipelines.db import models from sqlalchemy import orm from alembic import op +from pipelines.db import models # revision identifiers, used by Alembic. revision = "b0cbaebbddd8" diff --git a/pipelines/pipelines/app.py b/pipelines/pipelines/app.py index 17b8e5d77..5d78b239e 100644 --- a/pipelines/pipelines/app.py +++ b/pipelines/pipelines/app.py @@ -1,19 +1,20 @@ import asyncio from typing import Any, Dict, List, Optional, Union +from fastapi import Depends, FastAPI, Header, HTTPException, status +from filter_lib import Page, form_query, map_request_to_filter, paginate +from pydantic import AnyUrl +from sqlalchemy.orm import Session +from sqlalchemy_filters.exceptions import BadFilterFormat +from tenant_dependency import TenantData, get_tenant_info + import pipelines.config as config import pipelines.db.models as dbm import pipelines.db.service as service import pipelines.execution as execution import pipelines.schemas as schemas -from fastapi import Depends, FastAPI, Header, HTTPException, status -from filter_lib import Page, form_query, map_request_to_filter, paginate from pipelines.kafka_utils import Kafka from pipelines.pipeline_runner import run_pipeline -from pydantic import AnyUrl -from sqlalchemy.orm import Session -from sqlalchemy_filters.exceptions import BadFilterFormat -from tenant_dependency import TenantData, get_tenant_info TOKEN = get_tenant_info(url=config.KEYCLOAK_URI, algorithm="RS256") diff --git a/pipelines/pipelines/db/logger.py b/pipelines/pipelines/db/logger.py index 3d5948da6..ec52b1085 100644 --- a/pipelines/pipelines/db/logger.py +++ b/pipelines/pipelines/db/logger.py @@ -1,12 +1,13 @@ import datetime +from sqlalchemy import event, insert +from sqlalchemy.engine import Connection +from sqlalchemy.orm import Mapper + import pipelines.db.models as models import pipelines.db.service as service import pipelines.pipeline_runner as runner import pipelines.schemas as schemas -from sqlalchemy import event, insert -from sqlalchemy.engine import Connection -from sqlalchemy.orm import Mapper def create_log(event_type: str, entity: models.Table) -> schemas.Log: diff --git a/pipelines/pipelines/db/service.py b/pipelines/pipelines/db/service.py index c24ecf77f..8a07d0512 100644 --- a/pipelines/pipelines/db/service.py +++ b/pipelines/pipelines/db/service.py @@ -2,13 +2,14 @@ import datetime from typing import Any, Callable, Dict, Generator, List, Optional, Type, Union -import pipelines.db.models as dbm from aiokafka import AIOKafkaProducer -from pipelines import config, execution, log, schemas from pydantic import AnyUrl from sqlalchemy import create_engine from sqlalchemy.orm import Session, sessionmaker +import pipelines.db.models as dbm +from pipelines import config, execution, log, schemas + logger = log.get_logger(__file__) engine = create_engine( diff --git a/pipelines/pipelines/execution.py b/pipelines/pipelines/execution.py index 7bb4f2149..e46ee8e40 100644 --- a/pipelines/pipelines/execution.py +++ b/pipelines/pipelines/execution.py @@ -8,12 +8,15 @@ from typing import Any, DefaultDict, Dict, List, Optional, Union from uuid import uuid4 -import pipelines.db.models as dbm -import pipelines.db.service as service -import pipelines.result_processing as postprocessing import requests from aiokafka import AIOKafkaProducer from fastapi import HTTPException, status +from pydantic import BaseModel, Field +from sqlalchemy import orm + +import pipelines.db.models as dbm +import pipelines.db.service as service +import pipelines.result_processing as postprocessing from pipelines import ( config, http_utils, @@ -23,8 +26,6 @@ service_token, webhooks, ) -from pydantic import BaseModel, Field -from sqlalchemy import orm logger = log.get_logger(__file__) minio_client = s3.get_minio_client() diff --git a/pipelines/pipelines/http_utils.py b/pipelines/pipelines/http_utils.py index 20e32eb03..8ee693a20 100644 --- a/pipelines/pipelines/http_utils.py +++ b/pipelines/pipelines/http_utils.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional import requests + from pipelines import config, log, schemas, service_token logger = log.get_logger(__file__) diff --git a/pipelines/pipelines/kafka_utils.py b/pipelines/pipelines/kafka_utils.py index 46a5cacbd..44cc7f2f3 100644 --- a/pipelines/pipelines/kafka_utils.py +++ b/pipelines/pipelines/kafka_utils.py @@ -3,6 +3,7 @@ import aiokafka from kafka import admin, errors + from pipelines import config, log logger = log.get_logger(__name__) diff --git a/pipelines/pipelines/pipeline_runner.py b/pipelines/pipelines/pipeline_runner.py index 47c628d20..432ae70f0 100644 --- a/pipelines/pipelines/pipeline_runner.py +++ b/pipelines/pipelines/pipeline_runner.py @@ -4,6 +4,7 @@ import aiokafka from aiokafka import AIOKafkaConsumer, AIOKafkaProducer + from pipelines import execution, schemas from pipelines.log import get_logger diff --git a/pipelines/pipelines/result_processing.py b/pipelines/pipelines/result_processing.py index 96155baa8..e5cce42bb 100644 --- a/pipelines/pipelines/result_processing.py +++ b/pipelines/pipelines/result_processing.py @@ -8,9 +8,10 @@ import urllib3.exceptions from minio import Minio from minio import error as minioerr -from pipelines import config, http_utils, log from pydantic import BaseModel, ValidationError +from pipelines import config, http_utils, log + logger = log.get_logger(__file__) diff --git a/pipelines/pipelines/schemas.py b/pipelines/pipelines/schemas.py index bbdf20a7a..d4f682c49 100644 --- a/pipelines/pipelines/schemas.py +++ b/pipelines/pipelines/schemas.py @@ -5,9 +5,10 @@ from enum import Enum from typing import Any, Dict, List, Optional +from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator + import pipelines.db.models as dbm from pipelines import log -from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator logger = log.get_logger(__file__) diff --git a/pipelines/tests/db/test_logger.py b/pipelines/tests/db/test_logger.py index 0ffead07a..81d7189dd 100644 --- a/pipelines/tests/db/test_logger.py +++ b/pipelines/tests/db/test_logger.py @@ -5,8 +5,8 @@ import pipelines.db.logger as logger import pipelines.db.models as models -import pipelines.schemas as schemas import pipelines.pipeline_runner as runner +import pipelines.schemas as schemas def test_create_log(testing_session): diff --git a/pipelines/tests/test_http_utils.py b/pipelines/tests/test_http_utils.py index 81ca2889a..162ff7796 100644 --- a/pipelines/tests/test_http_utils.py +++ b/pipelines/tests/test_http_utils.py @@ -1,8 +1,8 @@ from unittest.mock import patch +import pytest import requests -import pytest from pipelines import http_utils, schemas From c0c58793621c4a1bcd49d67d746aa948f86abdaa Mon Sep 17 00:00:00 2001 From: yarnaid Date: Mon, 20 Feb 2023 18:33:08 +0400 Subject: [PATCH 22/22] add web config --- annotation/alembic/env.py | 2 +- ...72a5043_add_categories_editor_url_data_.py | 3 +- ..._alter_categories_id_integer_to_varchar.py | 3 +- ...3136551008d8_agreement_metrics_relation.py | 3 +- .../36bff2d016f7_expand_validationschema.py | 3 +- .../versions/3a083a1fbba0_first_revision.py | 3 +- ...0dc1_add_name_and_job_type_to_job_model.py | 3 +- .../versions/4272d0a43ff1_agreement_score.py | 3 +- .../4cd10ac49cc2_expand_annotated_doc.py | 3 +- .../versions/615379303da2_added_links_json.py | 3 +- .../66cd6054c2d0_add_categories_tree.py | 1 - ...fb3e0d231ff_create_document_links_table.py | 1 - ...8e6343_add_extensive_coverage_parameter.py | 2 +- .../7cc1ed83c309_compare_agreement_scores.py | 1 - ...4_create_association_doc_category_table.py | 1 - ..._category_change_editor_data_attributes.py | 3 +- ...e_make_document_categories_an_array_of_.py | 1 - .../9c07a25ca06f_expand_file_model.py | 3 +- ...4_adds_overall_load_field_not_negative_.py | 1 - .../versions/c053ae380212_expand_job_model.py | 3 +- .../c06c594c7435_change_file_statuses.py | 3 +- .../cf633ca94498_add_statuses_to_job.py | 3 +- ...adds_not_nullable_tenant_field_to_jobs_.py | 1 - ...963_drop_is_link_add_type_to_categories.py | 3 +- annotation/annotation/annotations/__init__.py | 19 +- annotation/annotation/annotations/main.py | 10 +- .../annotation/annotations/resources.py | 47 ++-- annotation/annotation/categories/resources.py | 32 +-- annotation/annotation/categories/services.py | 15 +- .../annotation/distribution/__init__.py | 25 +- annotation/annotation/distribution/main.py | 5 +- .../annotation/distribution/resources.py | 31 +-- annotation/annotation/filters.py | 9 +- annotation/annotation/jobs/__init__.py | 24 +- annotation/annotation/jobs/resources.py | 78 ++---- annotation/annotation/jobs/services.py | 44 ++-- annotation/annotation/main.py | 42 ++-- annotation/annotation/metadata/resources.py | 5 +- .../assets_communication.py | 8 +- .../jobs_communication.py | 10 +- .../microservice_communication/task.py | 14 +- annotation/annotation/models.py | 35 +-- annotation/annotation/revisions/resources.py | 5 +- annotation/annotation/schemas/__init__.py | 98 +++----- annotation/annotation/tasks/__init__.py | 8 +- annotation/annotation/tasks/resources.py | 104 +++----- annotation/annotation/tasks/services.py | 38 ++- annotation/annotation/tasks/validation.py | 12 +- annotation/tests/conftest.py | 224 +++++++----------- annotation/tests/override_app_dependency.py | 12 +- .../tests/test_annotators_overall_load.py | 31 +-- annotation/tests/test_assets_communication.py | 11 +- annotation/tests/test_category_crud.py | 4 +- annotation/tests/test_compare_scores.py | 8 +- annotation/tests/test_cross_validation.py | 8 +- annotation/tests/test_delete_batch_tasks.py | 13 +- annotation/tests/test_distribution.py | 24 +- annotation/tests/test_finish_task.py | 27 +-- .../tests/test_get_accumulated_revisions.py | 14 +- ..._get_annotation_for_particular_revision.py | 12 +- annotation/tests/test_get_child_categories.py | 27 +-- annotation/tests/test_get_entities_status.py | 2 +- annotation/tests/test_get_job.py | 12 +- annotation/tests/test_get_job_files.py | 17 +- annotation/tests/test_get_job_progress.py | 20 +- .../tests/test_get_jobs_info_by_files.py | 4 +- annotation/tests/test_get_pages_info.py | 21 +- annotation/tests/test_get_revisions.py | 4 +- .../test_get_revisions_without_annotation.py | 19 +- annotation/tests/test_get_unassigned_files.py | 2 +- annotation/tests/test_get_users_for_job.py | 2 +- annotation/tests/test_job_categories.py | 20 +- annotation/tests/test_microservices_search.py | 30 +-- annotation/tests/test_post.py | 7 +- annotation/tests/test_post_annotation.py | 66 ++---- annotation/tests/test_post_job.py | 28 +-- annotation/tests/test_post_next_task.py | 34 +-- .../tests/test_post_unassgined_files.py | 25 +- annotation/tests/test_search_kafka.py | 21 +- annotation/tests/test_start_job.py | 10 +- annotation/tests/test_tasks_crud_cr.py | 16 +- annotation/tests/test_tasks_crud_ud.py | 13 +- annotation/tests/test_update_job.py | 34 +-- annotation/tests/test_validation.py | 41 ++-- assets/alembic/env.py | 6 +- ..._add_original_ext_column_to_files_table.py | 3 +- .../versions/9e837ea0c11d_image_pages.py | 2 +- .../versions/afa33cc83d57_new_fields.py | 4 +- .../versions/fe5926249504_count_datasets.py | 2 +- assets/assets/db/models.py | 3 +- assets/assets/db/service.py | 7 +- assets/assets/routers/bonds_router.py | 1 + assets/assets/routers/datasets_router.py | 1 + assets/assets/routers/files_router.py | 1 + assets/assets/routers/minio_router.py | 1 + assets/assets/routers/s3_router.py | 1 + assets/assets/utils/common_utils.py | 1 + assets/assets/utils/convert_service_utils.py | 1 + assets/assets/utils/minio_utils.py | 3 +- assets/assets/utils/s3_utils.py | 1 + assets/tests/conftest.py | 6 +- assets/tests/test_helpers.py | 7 +- assets/tests/test_utils.py | 16 +- common/model_api/model_api/pipeline.py | 7 +- common/model_api/tests/test_api.py | 11 +- common/model_api/tests/test_preprocessing.py | 13 +- common/model_api/tests/test_smoke.py | 4 +- convert/convert/coco_export/convert.py | 1 + convert/convert/coco_import/import_service.py | 3 +- convert/convert/config.py | 3 +- .../badgerdoc_format/annotation_converter.py | 16 +- .../annotation_converter_practic.py | 9 +- .../badgerdoc_format/badgerdoc_format.py | 10 +- .../badgerdoc_format/plain_text_converter.py | 10 +- .../badgerdoc_to_label_studio_use_case.py | 13 +- .../label_studio_to_badgerdoc_use_case.py | 38 ++- .../labelstudio_format/label_studio_format.py | 16 +- .../models/__init__.py | 12 +- .../text_to_badgerdoc_use_case.py | 5 +- convert/convert/main.py | 2 +- convert/convert/routers/coco.py | 15 +- convert/convert/routers/label_studio.py | 20 +- convert/convert/routers/text.py | 8 +- convert/convert/utils/render_pdf_page.py | 1 + convert/convert/utils/s3_utils.py | 10 +- .../tests/test_label_studio/test_export.py | 19 +- .../tests/test_label_studio/test_import.py | 25 +- .../test_label_studio/test_text_wrapper.py | 5 +- dev_runner/conf/shared.env | 2 +- dev_runner/dev_runner/conf.py | 2 +- dev_runner/dev_runner/runners/jobs_runner.py | 3 +- .../dev_runner/runners/models_runner.py | 3 +- .../dev_runner/runners/pipelines_runner.py | 3 +- .../dev_runner/runners/processing_runner.py | 6 +- .../dev_runner/runners/search_runner.py | 3 +- .../dev_runner/runners/taxonomy_runner.py | 3 +- dev_runner/goten.env | 2 +- dev_runner/migration.sh | 2 + dev_runner/start.py | 3 +- jobs/alembic/env.py | 4 +- jobs/alembic/versions/13ac4bb3abd2_.py | 3 +- jobs/alembic/versions/3f5b2d199d38_.py | 1 - jobs/alembic/versions/7511c6790067_.py | 3 +- jobs/alembic/versions/83694c0b2df6_.py | 1 - jobs/alembic/versions/86f432539475_.py | 1 - jobs/alembic/versions/9229e70d2791_.py | 3 +- ...add_start_manual_job_automatically_flag.py | 1 - jobs/alembic/versions/d1ddce2d5352_.py | 1 - ...0dd492b17f_add_extensive_coverage_param.py | 1 - jobs/jobs/create_job_funcs.py | 5 +- jobs/jobs/db_service.py | 5 +- jobs/jobs/main.py | 11 +- jobs/jobs/utils.py | 25 +- jobs/tests/conftest.py | 19 +- .../test_API_functions/test_change_job.py | 9 +- .../test_API_functions/test_create_job.py | 3 +- .../test_other_API_functions.py | 8 +- .../test_API_functions/test_search_jobs.py | 6 +- jobs/tests/test_utils.py | 3 +- lib/filter_lib/tests/test_dict_parser.py | 1 - lib/filter_lib/tests/test_enum_generator.py | 10 +- lib/filter_lib/tests/test_pagination.py | 13 +- lib/filter_lib/tests/test_query_modifier.py | 11 +- lib/filter_lib/tests/test_schema_generator.py | 8 +- lib/filter_lib/usage_example/app.py | 9 +- lib/tenants/tests/conftest.py | 3 +- lib/tenants/tests/test_dependency_hs256.py | 7 +- lib/tenants/tests/test_dependency_rs256.py | 7 +- lib/tenants/tests/test_schema.py | 1 - models/alembic/env.py | 6 +- ...4fd362de_add_description_field_to_model.py | 1 - .../5c3092bc3517_add_columns_to_basement.py | 1 - ...a_added_archive_field_to_training_model.py | 1 - .../versions/683f401ed33e_create_tables.py | 3 +- ...0bca_add_annotation_dataset_to_training.py | 1 - .../826680104247_pod_limits_column.py | 3 +- ...eff4c79fd3_modify_basement_and_training.py | 3 +- ...add_latest_and_version_columns_to_model.py | 1 - ...9f68f00d4_add_field_type_to_table_model.py | 1 - models/models/colab_ssh_utils.py | 5 +- models/models/crud.py | 11 +- models/models/db.py | 13 +- models/models/main.py | 27 +-- models/models/routers/basements_routers.py | 22 +- .../models/routers/deployed_models_routers.py | 1 + models/models/routers/models_routers.py | 14 +- models/models/routers/training_routers.py | 48 ++-- models/models/schemas.py | 3 +- models/models/utils.py | 26 +- models/tests/conftest.py | 23 +- models/tests/test_basement_routers.py | 2 +- models/tests/test_colab_interactions.py | 7 +- models/tests/test_colab_start_training.py | 7 +- models/tests/test_crud.py | 4 +- models/tests/test_models_routers.py | 6 +- models/tests/test_schemas.py | 2 +- models/tests/test_utils.py | 9 +- pipelines/alembic/env.py | 2 +- .../08ad5deb23eb_remove_token_column.py | 1 - ...3c56436d0_change_task_webhook_to_string.py | 2 +- ...5e65cf34b_fix_default_type_to_inference.py | 2 +- pipelines/alembic/versions/29f072fb5c9c_.py | 1 - pipelines/alembic/versions/58fa5399caa9.py | 1 - .../alembic/versions/5fd9d1fdcf5b_init.py | 3 +- ..._add_original_pipeline_id_and_is_latest.py | 2 +- ...69_add_type_description_and_summary_to_.py | 1 - ...aebbddd8_change_pipeline_version_to_int.py | 2 +- .../c26caf5e8a19_add_webhook_column.py | 1 - .../cd396f8a2df1_change_token_column_type.py | 1 - ...dd_parent_step_and_tenant_to_execution_.py | 3 +- pipelines/pipelines/execution.py | 11 +- pipelines/tests/conftest.py | 6 +- pipelines/tests/db/test_service.py | 2 +- pipelines/tests/test_app.py | 2 +- pipelines/tests/test_execution.py | 2 +- pipelines/tests/test_schemas.py | 2 +- processing/alembic/env.py | 6 +- .../alembic/versions/52af1473946f_init.py | 3 +- .../versions/8e973b70b26f_noneasnull.py | 2 +- .../versions/f637b13c744d_renamed_column.py | 3 +- .../processing/health_check_easy_ocr.py | 7 +- processing/processing/main.py | 18 +- .../processing/send_preprocess_results.py | 1 + processing/processing/tasks.py | 11 +- processing/processing/text_merge.py | 1 + processing/processing/utils/aiohttp_utils.py | 1 + processing/processing/utils/minio_utils.py | 1 + processing/processing/utils/utils.py | 3 +- processing/tests/conftest.py | 4 +- .../tests/integration/test_integration.py | 1 - processing/tests/test_text_merge.py | 18 +- scheduler/alembic/env.py | 4 +- scheduler/alembic/versions/0cadbdb7f0ea_.py | 3 +- scheduler/alembic/versions/449be82736bd_.py | 1 - scheduler/scheduler/app.py | 3 +- scheduler/scheduler/db/service.py | 3 +- scheduler/scheduler/heartbeat.py | 3 +- scheduler/scheduler/runner.py | 3 +- scheduler/scheduler/unit.py | 2 +- scheduler/tests/test_heartbeat.py | 2 +- scheduler/tests/test_service.py | 3 +- scheduler/tests/test_unit.py | 2 +- search/search/es.py | 1 + search/search/harvester.py | 5 +- search/search/main.py | 7 +- search/search/schemas/facets.py | 3 +- search/search/schemas/pieces.py | 1 + search/tests/conftest.py | 11 +- search/tests/test_harvester.py | 1 + taxonomy/alembic/env.py | 4 +- ...ecbed_add_association_taxonomy_category.py | 1 - .../versions/bdea8a93cafe_first_revision.py | 1 - .../d3ba69ca9d97_change_category_linking.py | 1 - taxonomy/documentation/update_docs.py | 1 + taxonomy/taxonomy/main.py | 24 +- taxonomy/taxonomy/models.py | 12 +- taxonomy/taxonomy/schemas/__init__.py | 27 +-- taxonomy/taxonomy/schemas/taxon.py | 1 + taxonomy/taxonomy/taxon/resources.py | 28 +-- taxonomy/taxonomy/taxon/services.py | 8 +- taxonomy/taxonomy/taxonomy/resources.py | 41 ++-- taxonomy/taxonomy/taxonomy/services.py | 11 +- taxonomy/tests/conftest.py | 21 +- taxonomy/tests/test_taxon_crud.py | 2 +- taxonomy/tests/test_taxonomy_router.py | 2 +- users/tests/keycloak/test_query.py | 3 +- users/tests/keycloak/test_schemas.py | 4 +- users/tests/keycloak/test_utils.py | 2 +- users/tests/test_main.py | 3 +- users/tests/test_utils.py | 1 + users/users/keycloak/query.py | 1 + users/users/main.py | 10 +- users/users/s3.py | 1 + users/users/utils.py | 1 + web/local.env | 41 ++++ 275 files changed, 1067 insertions(+), 1816 deletions(-) create mode 100644 web/local.env diff --git a/annotation/alembic/env.py b/annotation/alembic/env.py index 79acffc7c..b0e56d610 100644 --- a/annotation/alembic/env.py +++ b/annotation/alembic/env.py @@ -1,9 +1,9 @@ import os from logging.config import fileConfig +from alembic import context # type: ignore from sqlalchemy import engine_from_config, pool -from alembic import context # type: ignore from annotation.database import SQLALCHEMY_DATABASE_URL from annotation.utils import get_test_db_url diff --git a/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py b/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py index f48b2e818..b30b4becd 100644 --- a/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py +++ b/annotation/alembic/versions/1edef72a5043_add_categories_editor_url_data_.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "1edef72a5043" diff --git a/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py b/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py index e01797b0b..89a03e59f 100644 --- a/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py +++ b/annotation/alembic/versions/2b3ed53127ea_alter_categories_id_integer_to_varchar.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.engine.reflection import Inspector - from alembic import op +from sqlalchemy.engine.reflection import Inspector # revision identifiers, used by Alembic. revision = "2b3ed53127ea" diff --git a/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py b/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py index 2fbb85886..6b752eb98 100644 --- a/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py +++ b/annotation/alembic/versions/3136551008d8_agreement_metrics_relation.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "3136551008d8" diff --git a/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py b/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py index 2a3d17f77..63d6f97ab 100644 --- a/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py +++ b/annotation/alembic/versions/36bff2d016f7_expand_validationschema.py @@ -8,9 +8,8 @@ from enum import Enum import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "36bff2d016f7" diff --git a/annotation/alembic/versions/3a083a1fbba0_first_revision.py b/annotation/alembic/versions/3a083a1fbba0_first_revision.py index 0020e61d2..a0b245f19 100644 --- a/annotation/alembic/versions/3a083a1fbba0_first_revision.py +++ b/annotation/alembic/versions/3a083a1fbba0_first_revision.py @@ -6,11 +6,10 @@ """ import sqlalchemy as sa +from alembic import op from sqlalchemy.dialects import postgresql from sqlalchemy.engine.reflection import Inspector -from alembic import op - # revision identifiers, used by Alembic. revision = "3a083a1fbba0" down_revision = None diff --git a/annotation/alembic/versions/416952520dc1_add_name_and_job_type_to_job_model.py b/annotation/alembic/versions/416952520dc1_add_name_and_job_type_to_job_model.py index 7e2667058..15937f6d5 100644 --- a/annotation/alembic/versions/416952520dc1_add_name_and_job_type_to_job_model.py +++ b/annotation/alembic/versions/416952520dc1_add_name_and_job_type_to_job_model.py @@ -8,9 +8,8 @@ from enum import Enum import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "416952520dc1" diff --git a/annotation/alembic/versions/4272d0a43ff1_agreement_score.py b/annotation/alembic/versions/4272d0a43ff1_agreement_score.py index b48c75b06..41a8b43f1 100644 --- a/annotation/alembic/versions/4272d0a43ff1_agreement_score.py +++ b/annotation/alembic/versions/4272d0a43ff1_agreement_score.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "4272d0a43ff1" diff --git a/annotation/alembic/versions/4cd10ac49cc2_expand_annotated_doc.py b/annotation/alembic/versions/4cd10ac49cc2_expand_annotated_doc.py index 0299c744b..6635d5dfa 100644 --- a/annotation/alembic/versions/4cd10ac49cc2_expand_annotated_doc.py +++ b/annotation/alembic/versions/4cd10ac49cc2_expand_annotated_doc.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "4cd10ac49cc2" diff --git a/annotation/alembic/versions/615379303da2_added_links_json.py b/annotation/alembic/versions/615379303da2_added_links_json.py index de05a782f..cbc5b6f69 100644 --- a/annotation/alembic/versions/615379303da2_added_links_json.py +++ b/annotation/alembic/versions/615379303da2_added_links_json.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "615379303da2" diff --git a/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py b/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py index 58c611a5f..3a81f64e9 100644 --- a/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py +++ b/annotation/alembic/versions/66cd6054c2d0_add_categories_tree.py @@ -7,7 +7,6 @@ """ import sqlalchemy as sa import sqlalchemy_utils - from alembic import op # revision identifiers, used by Alembic. diff --git a/annotation/alembic/versions/6fb3e0d231ff_create_document_links_table.py b/annotation/alembic/versions/6fb3e0d231ff_create_document_links_table.py index bcd5eb461..01c22bab2 100644 --- a/annotation/alembic/versions/6fb3e0d231ff_create_document_links_table.py +++ b/annotation/alembic/versions/6fb3e0d231ff_create_document_links_table.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py b/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py index 08d6dc0be..75e333858 100644 --- a/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py +++ b/annotation/alembic/versions/71095b8e6343_add_extensive_coverage_parameter.py @@ -6,8 +6,8 @@ """ import sqlalchemy as sa - from alembic import op + from annotation.models import ValidationSchema # revision identifiers, used by Alembic. diff --git a/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py b/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py index a96504e7c..119b3603d 100644 --- a/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py +++ b/annotation/alembic/versions/7cc1ed83c309_compare_agreement_scores.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/annotation/alembic/versions/8ea2ff0fea64_create_association_doc_category_table.py b/annotation/alembic/versions/8ea2ff0fea64_create_association_doc_category_table.py index c349e14d6..e81e79300 100644 --- a/annotation/alembic/versions/8ea2ff0fea64_create_association_doc_category_table.py +++ b/annotation/alembic/versions/8ea2ff0fea64_create_association_doc_category_table.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py b/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py index c16f0523e..fe0fe5ac5 100644 --- a/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py +++ b/annotation/alembic/versions/8fbac489cef2_category_change_editor_data_attributes.py @@ -7,9 +7,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "8fbac489cef2" diff --git a/annotation/alembic/versions/9083dea3783e_make_document_categories_an_array_of_.py b/annotation/alembic/versions/9083dea3783e_make_document_categories_an_array_of_.py index 7e7eddea1..b675016b8 100644 --- a/annotation/alembic/versions/9083dea3783e_make_document_categories_an_array_of_.py +++ b/annotation/alembic/versions/9083dea3783e_make_document_categories_an_array_of_.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/annotation/alembic/versions/9c07a25ca06f_expand_file_model.py b/annotation/alembic/versions/9c07a25ca06f_expand_file_model.py index 971f25522..7db9c3788 100644 --- a/annotation/alembic/versions/9c07a25ca06f_expand_file_model.py +++ b/annotation/alembic/versions/9c07a25ca06f_expand_file_model.py @@ -8,9 +8,8 @@ from enum import Enum import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "9c07a25ca06f" diff --git a/annotation/alembic/versions/bda0eac5ce64_adds_overall_load_field_not_negative_.py b/annotation/alembic/versions/bda0eac5ce64_adds_overall_load_field_not_negative_.py index 4424ba0b4..dd84a9356 100644 --- a/annotation/alembic/versions/bda0eac5ce64_adds_overall_load_field_not_negative_.py +++ b/annotation/alembic/versions/bda0eac5ce64_adds_overall_load_field_not_negative_.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/annotation/alembic/versions/c053ae380212_expand_job_model.py b/annotation/alembic/versions/c053ae380212_expand_job_model.py index dd0d167c6..925580630 100644 --- a/annotation/alembic/versions/c053ae380212_expand_job_model.py +++ b/annotation/alembic/versions/c053ae380212_expand_job_model.py @@ -8,9 +8,8 @@ from enum import Enum import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "c053ae380212" diff --git a/annotation/alembic/versions/c06c594c7435_change_file_statuses.py b/annotation/alembic/versions/c06c594c7435_change_file_statuses.py index dd437a7e7..6ebef590f 100644 --- a/annotation/alembic/versions/c06c594c7435_change_file_statuses.py +++ b/annotation/alembic/versions/c06c594c7435_change_file_statuses.py @@ -8,9 +8,8 @@ from enum import Enum import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "c06c594c7435" diff --git a/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py b/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py index f26495237..0be193aaf 100644 --- a/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py +++ b/annotation/alembic/versions/cf633ca94498_add_statuses_to_job.py @@ -8,9 +8,8 @@ from enum import Enum import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "cf633ca94498" diff --git a/annotation/alembic/versions/d30649e367e0_adds_not_nullable_tenant_field_to_jobs_.py b/annotation/alembic/versions/d30649e367e0_adds_not_nullable_tenant_field_to_jobs_.py index 436dc8387..4db73da7b 100644 --- a/annotation/alembic/versions/d30649e367e0_adds_not_nullable_tenant_field_to_jobs_.py +++ b/annotation/alembic/versions/d30649e367e0_adds_not_nullable_tenant_field_to_jobs_.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py b/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py index 52b44dd70..87fd142b8 100644 --- a/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py +++ b/annotation/alembic/versions/f44cabeef963_drop_is_link_add_type_to_categories.py @@ -8,9 +8,8 @@ from enum import Enum import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "f44cabeef963" diff --git a/annotation/annotation/annotations/__init__.py b/annotation/annotation/annotations/__init__.py index 738e7380e..38cf2af3e 100644 --- a/annotation/annotation/annotations/__init__.py +++ b/annotation/annotation/annotations/__init__.py @@ -1,15 +1,10 @@ -from annotation.annotations.main import ( - LATEST, - MANIFEST, - S3_START_PATH, - accumulate_pages_info, - add_search_annotation_producer, - check_task_pages, - construct_annotated_doc, - create_manifest_json, - get_pages_sha, - row_to_dict, -) +from annotation.annotations.main import (LATEST, MANIFEST, S3_START_PATH, + accumulate_pages_info, + add_search_annotation_producer, + check_task_pages, + construct_annotated_doc, + create_manifest_json, get_pages_sha, + row_to_dict) __all__ = [ add_search_annotation_producer, diff --git a/annotation/annotation/annotations/main.py b/annotation/annotation/annotations/main.py index f6fae53c6..5e6f94256 100644 --- a/annotation/annotation/annotations/main.py +++ b/annotation/annotation/annotations/main.py @@ -18,13 +18,9 @@ from annotation.kafka_client import KAFKA_BOOTSTRAP_SERVER, KAFKA_SEARCH_TOPIC from annotation.kafka_client import producers as kafka_producers from annotation.models import AnnotatedDoc, DocumentLinks -from annotation.schemas import ( - AnnotatedDocSchema, - DocForSaveSchema, - PageSchema, - ParticularRevisionSchema, - RevisionLink, -) +from annotation.schemas import (AnnotatedDocSchema, DocForSaveSchema, + PageSchema, ParticularRevisionSchema, + RevisionLink) load_dotenv(find_dotenv()) ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL") diff --git a/annotation/annotation/annotations/resources.py b/annotation/annotation/annotations/resources.py index 5071acd66..e25a7cbdb 100644 --- a/annotation/annotation/annotations/resources.py +++ b/annotation/annotation/annotations/resources.py @@ -8,42 +8,27 @@ from annotation.database import get_db from annotation.errors import NoSuchRevisionsError -from annotation.microservice_communication.assets_communication import ( - get_file_path_and_bucket, -) -from annotation.microservice_communication.search import ( - X_CURRENT_TENANT_HEADER, -) -from annotation.schemas import ( - AnnotatedDocSchema, - BadRequestErrorSchema, - ConnectionErrorSchema, - DocForSaveSchema, - JobOutSchema, - NotFoundErrorSchema, - PageOutSchema, - ParticularRevisionSchema, - ValidationSchema, -) +from annotation.microservice_communication.assets_communication import \ + get_file_path_and_bucket +from annotation.microservice_communication.search import \ + X_CURRENT_TENANT_HEADER +from annotation.schemas import (AnnotatedDocSchema, BadRequestErrorSchema, + ConnectionErrorSchema, DocForSaveSchema, + JobOutSchema, NotFoundErrorSchema, + PageOutSchema, ParticularRevisionSchema, + ValidationSchema) from annotation.tags import ANNOTATION_TAG, JOBS_TAG, REVISION_TAG from annotation.tasks import update_task_status from ..models import AnnotatedDoc, File, Job, ManualAnnotationTask from ..token_dependency import TOKEN -from .main import ( - LATEST, - accumulate_pages_info, - add_search_annotation_producer, - check_if_kafka_message_is_needed, - check_null_fields, - check_task_pages, - construct_annotated_doc, - construct_particular_rev_response, - find_all_revisions_pages, - find_latest_revision_pages, - load_all_revisions_pages, - load_latest_revision_pages, -) +from .main import (LATEST, accumulate_pages_info, + add_search_annotation_producer, + check_if_kafka_message_is_needed, check_null_fields, + check_task_pages, construct_annotated_doc, + construct_particular_rev_response, find_all_revisions_pages, + find_latest_revision_pages, load_all_revisions_pages, + load_latest_revision_pages) router = APIRouter( prefix="/annotation", diff --git a/annotation/annotation/categories/resources.py b/annotation/annotation/categories/resources.py index 8efeb687f..24d496c5f 100644 --- a/annotation/annotation/categories/resources.py +++ b/annotation/annotation/categories/resources.py @@ -8,30 +8,18 @@ from annotation.database import get_db from annotation.errors import NoSuchCategoryError from annotation.filters import CategoryFilter -from annotation.microservice_communication.search import ( - X_CURRENT_TENANT_HEADER, -) -from annotation.schemas import ( - BadRequestErrorSchema, - CategoryBaseSchema, - CategoryInputSchema, - CategoryResponseSchema, - ConnectionErrorSchema, - NotFoundErrorSchema, - SubCategoriesOutSchema, -) +from annotation.microservice_communication.search import \ + X_CURRENT_TENANT_HEADER +from annotation.schemas import (BadRequestErrorSchema, CategoryBaseSchema, + CategoryInputSchema, CategoryResponseSchema, + ConnectionErrorSchema, NotFoundErrorSchema, + SubCategoriesOutSchema) from annotation.tags import CATEGORIES_TAG -from .services import ( - add_category_db, - delete_category_db, - fetch_category_db, - filter_category_db, - insert_category_tree, - recursive_subcategory_search, - response_object_from_db, - update_category_db, -) +from .services import (add_category_db, delete_category_db, fetch_category_db, + filter_category_db, insert_category_tree, + recursive_subcategory_search, response_object_from_db, + update_category_db) router = APIRouter( prefix="/categories", diff --git a/annotation/annotation/categories/services.py b/annotation/annotation/categories/services.py index c81adbb2b..6e6532b4e 100644 --- a/annotation/annotation/categories/services.py +++ b/annotation/annotation/categories/services.py @@ -10,19 +10,12 @@ from sqlalchemy_utils import Ltree from annotation import logger as app_logger -from annotation.errors import ( - CheckFieldError, - ForeignKeyError, - NoSuchCategoryError, - SelfParentError, -) +from annotation.errors import (CheckFieldError, ForeignKeyError, + NoSuchCategoryError, SelfParentError) from annotation.filters import CategoryFilter from annotation.models import Category, Job -from annotation.schemas import ( - CategoryInputSchema, - CategoryORMSchema, - CategoryResponseSchema, -) +from annotation.schemas import (CategoryInputSchema, CategoryORMSchema, + CategoryResponseSchema) cache = TTLCache(maxsize=128, ttl=300) diff --git a/annotation/annotation/distribution/__init__.py b/annotation/annotation/distribution/__init__.py index 2807c7548..0b5971176 100644 --- a/annotation/annotation/distribution/__init__.py +++ b/annotation/annotation/distribution/__init__.py @@ -1,17 +1,14 @@ -from annotation.distribution.main import ( - add_unassigned_file, - calculate_users_load, - distribute, - distribute_annotation_partial_files, - distribute_tasks, - distribute_validation_partial_files, - distribute_whole_files, - find_annotated_pages, - find_files_for_task, - find_unassigned_files, - find_unassigned_pages, - prepare_response, -) +from annotation.distribution.main import (add_unassigned_file, + calculate_users_load, distribute, + distribute_annotation_partial_files, + distribute_tasks, + distribute_validation_partial_files, + distribute_whole_files, + find_annotated_pages, + find_files_for_task, + find_unassigned_files, + find_unassigned_pages, + prepare_response) __all__ = [ add_unassigned_file, diff --git a/annotation/annotation/distribution/main.py b/annotation/annotation/distribution/main.py index 08c47ddc0..e86d92efd 100644 --- a/annotation/annotation/distribution/main.py +++ b/annotation/annotation/distribution/main.py @@ -48,9 +48,8 @@ from sqlalchemy.orm import Session from annotation.jobs import create_user, read_user -from annotation.microservice_communication.assets_communication import ( - FilesForDistribution, -) +from annotation.microservice_communication.assets_communication import \ + FilesForDistribution from annotation.models import File, User from annotation.schemas import TaskStatusEnumSchema, ValidationSchema from annotation.tasks import create_tasks as create_db_tasks diff --git a/annotation/annotation/distribution/resources.py b/annotation/annotation/distribution/resources.py index a8ef4a6ff..516934b50 100644 --- a/annotation/annotation/distribution/resources.py +++ b/annotation/annotation/distribution/resources.py @@ -6,31 +6,18 @@ from tenant_dependency import TenantData from annotation.database import get_db -from annotation.distribution import ( - distribute, - find_unassigned_files, - prepare_response, -) +from annotation.distribution import (distribute, find_unassigned_files, + prepare_response) from annotation.errors import FieldConstraintError -from annotation.jobs import ( - check_annotators, - check_validators, - get_job_attributes_for_post, -) +from annotation.jobs import (check_annotators, check_validators, + get_job_attributes_for_post) from annotation.microservice_communication.assets_communication import ( - get_files_info, - prepare_files_for_distribution, -) -from annotation.microservice_communication.search import ( - X_CURRENT_TENANT_HEADER, -) + get_files_info, prepare_files_for_distribution) +from annotation.microservice_communication.search import \ + X_CURRENT_TENANT_HEADER from annotation.models import File, Job, User -from annotation.schemas import ( - BadRequestErrorSchema, - ConnectionErrorSchema, - ManualAnnotationTaskSchema, - TaskInfoSchema, -) +from annotation.schemas import (BadRequestErrorSchema, ConnectionErrorSchema, + ManualAnnotationTaskSchema, TaskInfoSchema) from annotation.tags import TASKS_TAG from annotation.token_dependency import TOKEN diff --git a/annotation/annotation/filters.py b/annotation/annotation/filters.py index c8f8d1448..b07788120 100644 --- a/annotation/annotation/filters.py +++ b/annotation/annotation/filters.py @@ -1,12 +1,7 @@ from filter_lib import create_filter_model -from annotation.models import ( - AnnotatedDoc, - Category, - Job, - ManualAnnotationTask, - User, -) +from annotation.models import (AnnotatedDoc, Category, Job, + ManualAnnotationTask, User) CategoryFilter = create_filter_model( Category, diff --git a/annotation/annotation/jobs/__init__.py b/annotation/annotation/jobs/__init__.py index 8ba05bfbd..47c191cc0 100644 --- a/annotation/annotation/jobs/__init__.py +++ b/annotation/annotation/jobs/__init__.py @@ -1,19 +1,11 @@ -from annotation.jobs.services import ( - check_annotators, - check_validators, - clean_tasks_before_jobs_update, - collect_job_names, - create_user, - delete_redundant_users, - delete_tasks, - get_job, - get_job_attributes_for_post, - read_user, - recalculate_file_pages, - update_files, - update_inner_job_status, - update_user_overall_load, -) +from annotation.jobs.services import (check_annotators, check_validators, + clean_tasks_before_jobs_update, + collect_job_names, create_user, + delete_redundant_users, delete_tasks, + get_job, get_job_attributes_for_post, + read_user, recalculate_file_pages, + update_files, update_inner_job_status, + update_user_overall_load) __all__ = [ update_inner_job_status, diff --git a/annotation/annotation/jobs/resources.py b/annotation/annotation/jobs/resources.py index 030e860fb..ffb32f38b 100644 --- a/annotation/annotation/jobs/resources.py +++ b/annotation/annotation/jobs/resources.py @@ -1,15 +1,8 @@ from typing import Dict, List, Optional, Set, Union from uuid import UUID -from fastapi import ( - APIRouter, - Depends, - HTTPException, - Path, - Query, - Response, - status, -) +from fastapi import (APIRouter, Depends, HTTPException, Path, Query, Response, + status) from filter_lib import Page from sqlalchemy import and_ from sqlalchemy.orm import Session @@ -23,57 +16,30 @@ from annotation.database import get_db from annotation.distribution import distribute from annotation.filters import CategoryFilter -from annotation.microservice_communication.assets_communication import ( - get_files_info, -) +from annotation.microservice_communication.assets_communication import \ + get_files_info from annotation.microservice_communication.jobs_communication import ( - JobUpdateException, - update_job_status, -) -from annotation.microservice_communication.search import ( - X_CURRENT_TENANT_HEADER, -) -from annotation.schemas import ( - BadRequestErrorSchema, - CategoryResponseSchema, - ConnectionErrorSchema, - FileStatusEnumSchema, - JobFilesInfoSchema, - JobInfoSchema, - JobPatchSchema, - JobProgressSchema, - JobStatusEnumSchema, - JobTypeEnumSchema, - ManualAnnotationTaskSchema, - NotFoundErrorSchema, - TaskStatusEnumSchema, - UnassignedFilesInfoSchema, - ValidationSchema, -) + JobUpdateException, update_job_status) +from annotation.microservice_communication.search import \ + X_CURRENT_TENANT_HEADER +from annotation.schemas import (BadRequestErrorSchema, CategoryResponseSchema, + ConnectionErrorSchema, FileStatusEnumSchema, + JobFilesInfoSchema, JobInfoSchema, + JobPatchSchema, JobProgressSchema, + JobStatusEnumSchema, JobTypeEnumSchema, + ManualAnnotationTaskSchema, + NotFoundErrorSchema, TaskStatusEnumSchema, + UnassignedFilesInfoSchema, ValidationSchema) from annotation.tags import FILES_TAG, JOBS_TAG from annotation.token_dependency import TOKEN -from ..models import ( - AnnotatedDoc, - Category, - File, - Job, - ManualAnnotationTask, - User, -) -from .services import ( - clean_tasks_before_jobs_update, - collect_job_names, - delete_redundant_users, - filter_job_categories, - find_users, - get_job, - get_jobs_by_files, - update_inner_job_status, - update_job_categories, - update_job_files, - update_jobs_users, -) +from ..models import (AnnotatedDoc, Category, File, Job, ManualAnnotationTask, + User) +from .services import (clean_tasks_before_jobs_update, collect_job_names, + delete_redundant_users, filter_job_categories, + find_users, get_job, get_jobs_by_files, + update_inner_job_status, update_job_categories, + update_job_files, update_jobs_users) logger = app_logger.Logger diff --git a/annotation/annotation/jobs/services.py b/annotation/annotation/jobs/services.py index b87486504..d3cb5428d 100644 --- a/annotation/annotation/jobs/services.py +++ b/annotation/annotation/jobs/services.py @@ -11,36 +11,20 @@ from annotation.categories import fetch_bunch_categories_db from annotation.categories.services import response_object_from_db from annotation.database import Base -from annotation.errors import ( - EnumValidationError, - FieldConstraintError, - WrongJobError, -) -from annotation.microservice_communication.assets_communication import ( - get_files_info, -) -from annotation.microservice_communication.jobs_communication import ( - get_job_names, -) -from annotation.models import ( - Category, - File, - Job, - ManualAnnotationTask, - User, - association_job_annotator, - association_job_owner, - association_job_validator, -) -from annotation.schemas import ( - CROSS_MIN_ANNOTATORS_NUMBER, - CategoryResponseSchema, - FileStatusEnumSchema, - JobInfoSchema, - JobStatusEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) +from annotation.errors import (EnumValidationError, FieldConstraintError, + WrongJobError) +from annotation.microservice_communication.assets_communication import \ + get_files_info +from annotation.microservice_communication.jobs_communication import \ + get_job_names +from annotation.models import (Category, File, Job, ManualAnnotationTask, User, + association_job_annotator, + association_job_owner, + association_job_validator) +from annotation.schemas import (CROSS_MIN_ANNOTATORS_NUMBER, + CategoryResponseSchema, FileStatusEnumSchema, + JobInfoSchema, JobStatusEnumSchema, + TaskStatusEnumSchema, ValidationSchema) def update_inner_job_status( diff --git a/annotation/annotation/main.py b/annotation/annotation/main.py index 45f4c283b..09da927eb 100644 --- a/annotation/annotation/main.py +++ b/annotation/annotation/main.py @@ -11,31 +11,23 @@ from annotation.annotations import resources as annotations_resources from annotation.categories import resources as categories_resources from annotation.distribution import resources as distribution_resources -from annotation.errors import ( - AgreementScoreServiceException, - CheckFieldError, - EnumValidationError, - FieldConstraintError, - ForeignKeyError, - NoSuchCategoryError, - NoSuchRevisionsError, - SelfParentError, - WrongJobError, - agreement_score_service_error_handler, - category_foreign_key_error_handler, - category_parent_child_error_handler, - category_unique_field_error_handler, - db_dbapi_error_handler, - db_s3_error_handler, - db_sa_error_handler, - debug_exception_handler, - enum_validation_error_handler, - field_constraint_error_handler, - minio_no_such_bucket_error_handler, - no_such_category_error_handler, - no_such_revisions_error_handler, - wrong_job_error_handler, -) +from annotation.errors import (AgreementScoreServiceException, CheckFieldError, + EnumValidationError, FieldConstraintError, + ForeignKeyError, NoSuchCategoryError, + NoSuchRevisionsError, SelfParentError, + WrongJobError, + agreement_score_service_error_handler, + category_foreign_key_error_handler, + category_parent_child_error_handler, + category_unique_field_error_handler, + db_dbapi_error_handler, db_s3_error_handler, + db_sa_error_handler, debug_exception_handler, + enum_validation_error_handler, + field_constraint_error_handler, + minio_no_such_bucket_error_handler, + no_such_category_error_handler, + no_such_revisions_error_handler, + wrong_job_error_handler) from annotation.jobs import resources as jobs_resources from annotation.metadata import resources as metadata_resources from annotation.revisions import resources as revision_resources diff --git a/annotation/annotation/metadata/resources.py b/annotation/annotation/metadata/resources.py index f64c2aab1..3cbaabcee 100644 --- a/annotation/annotation/metadata/resources.py +++ b/annotation/annotation/metadata/resources.py @@ -1,8 +1,7 @@ from fastapi import APIRouter, status -from annotation.microservice_communication.search import ( - X_CURRENT_TENANT_HEADER, -) +from annotation.microservice_communication.search import \ + X_CURRENT_TENANT_HEADER from annotation.schemas import EntitiesStatusesSchema from annotation.tags import METADATA_TAG, TASKS_TAG diff --git a/annotation/annotation/microservice_communication/assets_communication.py b/annotation/annotation/microservice_communication/assets_communication.py index bf0b52eb1..77e81661d 100644 --- a/annotation/annotation/microservice_communication/assets_communication.py +++ b/annotation/annotation/microservice_communication/assets_communication.py @@ -6,12 +6,8 @@ from requests import ConnectionError, RequestException, Timeout from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, - get_response, - raise_request_exception, -) + AUTHORIZATION, BEARER, HEADER_TENANT, get_response, + raise_request_exception) load_dotenv(find_dotenv()) ASSETS_FILES_URL = os.environ.get("ASSETS_FILES_URL") diff --git a/annotation/annotation/microservice_communication/jobs_communication.py b/annotation/annotation/microservice_communication/jobs_communication.py index 2c031ddb6..604d89dbf 100644 --- a/annotation/annotation/microservice_communication/jobs_communication.py +++ b/annotation/annotation/microservice_communication/jobs_communication.py @@ -5,12 +5,10 @@ from dotenv import find_dotenv, load_dotenv from requests import RequestException -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, - get_response, -) +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT, + get_response) load_dotenv(find_dotenv()) JOBS_SEARCH_URL = os.environ.get("JOBS_SEARCH_URL") diff --git a/annotation/annotation/microservice_communication/task.py b/annotation/annotation/microservice_communication/task.py index 88ec5af6b..59536c31f 100644 --- a/annotation/annotation/microservice_communication/task.py +++ b/annotation/annotation/microservice_communication/task.py @@ -6,15 +6,11 @@ from requests import RequestException from annotation.errors import AgreementScoreServiceException -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) -from annotation.schemas import ( - AgreementScoreServiceInput, - AgreementScoreServiceResponse, -) +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) +from annotation.schemas import (AgreementScoreServiceInput, + AgreementScoreServiceResponse) load_dotenv(find_dotenv()) AGREEMENT_SCORE_SERVICE_URL = os.environ.get("AGREEMENT_SCORE_SERVICE_URL") diff --git a/annotation/annotation/models.py b/annotation/annotation/models.py index 5f8f40536..2fd9047ba 100644 --- a/annotation/annotation/models.py +++ b/annotation/annotation/models.py @@ -1,38 +1,21 @@ from datetime import datetime from typing import Callable -from sqlalchemy import ( - BOOLEAN, - FLOAT, - INTEGER, - TIMESTAMP, - VARCHAR, - CheckConstraint, - Column, - DateTime, - ForeignKey, - ForeignKeyConstraint, - Index, - PrimaryKeyConstraint, - Table, - func, -) +from sqlalchemy import (BOOLEAN, FLOAT, INTEGER, TIMESTAMP, VARCHAR, + CheckConstraint, Column, DateTime, ForeignKey, + ForeignKeyConstraint, Index, PrimaryKeyConstraint, + Table, func) from sqlalchemy.dialects.postgresql import ARRAY, ENUM, JSON, JSONB, UUID from sqlalchemy.orm import relationship, validates from sqlalchemy_utils import Ltree, LtreeType from annotation.database import Base from annotation.errors import CheckFieldError -from annotation.schemas import ( - DEFAULT_LOAD, - AnnotationStatisticsEventEnumSchema, - CategoryTypeSchema, - FileStatusEnumSchema, - JobStatusEnumSchema, - JobTypeEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) +from annotation.schemas import (DEFAULT_LOAD, + AnnotationStatisticsEventEnumSchema, + CategoryTypeSchema, FileStatusEnumSchema, + JobStatusEnumSchema, JobTypeEnumSchema, + TaskStatusEnumSchema, ValidationSchema) association_job_annotator = Table( "association_job_annotator", diff --git a/annotation/annotation/revisions/resources.py b/annotation/annotation/revisions/resources.py index 2bc837d27..53d9ae215 100644 --- a/annotation/annotation/revisions/resources.py +++ b/annotation/annotation/revisions/resources.py @@ -5,9 +5,8 @@ from starlette import status from annotation.database import get_db -from annotation.microservice_communication.search import ( - X_CURRENT_TENANT_HEADER, -) +from annotation.microservice_communication.search import \ + X_CURRENT_TENANT_HEADER from annotation.models import AnnotatedDoc from annotation.schemas import AnnotatedDocSchema, ConnectionErrorSchema from annotation.tags import ANNOTATION_TAG, REVISION_TAG diff --git a/annotation/annotation/schemas/__init__.py b/annotation/annotation/schemas/__init__.py index db9dd1960..f2963d825 100644 --- a/annotation/annotation/schemas/__init__.py +++ b/annotation/annotation/schemas/__init__.py @@ -1,64 +1,42 @@ -from annotation.schemas.annotations import ( - AnnotatedDocSchema, - DocForSaveSchema, - PageOutSchema, - PageSchema, - ParticularRevisionSchema, - RevisionLink, -) -from annotation.schemas.categories import ( - CategoryBaseSchema, - CategoryDataAttributeNames, - CategoryInputSchema, - CategoryORMSchema, - CategoryResponseSchema, - CategoryTypeSchema, - SubCategoriesOutSchema, -) -from annotation.schemas.errors import ( - BadRequestErrorSchema, - ConnectionErrorSchema, - NotFoundErrorSchema, -) -from annotation.schemas.jobs import ( - CROSS_MIN_ANNOTATORS_NUMBER, - DEFAULT_LOAD, - FileInfoSchema, - FileStatusEnumSchema, - JobFilesInfoSchema, - JobInfoSchema, - JobOutSchema, - JobPatchSchema, - JobProgressSchema, - JobStatusEnumSchema, - JobTypeEnumSchema, - UnassignedFileSchema, - UnassignedFilesInfoSchema, - ValidationSchema, -) +from annotation.schemas.annotations import (AnnotatedDocSchema, + DocForSaveSchema, PageOutSchema, + PageSchema, + ParticularRevisionSchema, + RevisionLink) +from annotation.schemas.categories import (CategoryBaseSchema, + CategoryDataAttributeNames, + CategoryInputSchema, + CategoryORMSchema, + CategoryResponseSchema, + CategoryTypeSchema, + SubCategoriesOutSchema) +from annotation.schemas.errors import (BadRequestErrorSchema, + ConnectionErrorSchema, + NotFoundErrorSchema) +from annotation.schemas.jobs import (CROSS_MIN_ANNOTATORS_NUMBER, DEFAULT_LOAD, + FileInfoSchema, FileStatusEnumSchema, + JobFilesInfoSchema, JobInfoSchema, + JobOutSchema, JobPatchSchema, + JobProgressSchema, JobStatusEnumSchema, + JobTypeEnumSchema, UnassignedFileSchema, + UnassignedFilesInfoSchema, + ValidationSchema) from annotation.schemas.metadata import EntitiesStatusesSchema -from annotation.schemas.tasks import ( - AgreementScoreComparingResult, - AgreementScoreServiceInput, - AgreementScoreServiceResponse, - AnnotationAndValidationActionsSchema, - AnnotationStatisticsEventEnumSchema, - AnnotationStatisticsInputSchema, - AnnotationStatisticsResponseSchema, - ExpandedManualAnnotationTaskSchema, - ExportTaskStatsInput, - ManualAnnotationTaskInSchema, - ManualAnnotationTaskSchema, - NameSchema, - PagesInfoSchema, - ResponseScore, - TaskInfoSchema, - TaskMetric, - TaskPatchSchema, - TaskStatusEnumSchema, - TaskStatusSchema, - ValidationEndSchema, -) +from annotation.schemas.tasks import (AgreementScoreComparingResult, + AgreementScoreServiceInput, + AgreementScoreServiceResponse, + AnnotationAndValidationActionsSchema, + AnnotationStatisticsEventEnumSchema, + AnnotationStatisticsInputSchema, + AnnotationStatisticsResponseSchema, + ExpandedManualAnnotationTaskSchema, + ExportTaskStatsInput, + ManualAnnotationTaskInSchema, + ManualAnnotationTaskSchema, NameSchema, + PagesInfoSchema, ResponseScore, + TaskInfoSchema, TaskMetric, + TaskPatchSchema, TaskStatusEnumSchema, + TaskStatusSchema, ValidationEndSchema) __all__ = [ AnnotatedDocSchema, diff --git a/annotation/annotation/tasks/__init__.py b/annotation/annotation/tasks/__init__.py index a19a0c458..5dac992e7 100644 --- a/annotation/annotation/tasks/__init__.py +++ b/annotation/annotation/tasks/__init__.py @@ -1,9 +1,5 @@ -from .services import ( - add_task_stats_record, - create_tasks, - get_task_revisions, - update_task_status, -) +from .services import (add_task_stats_record, create_tasks, get_task_revisions, + update_task_status) __all__ = [ add_task_stats_record, diff --git a/annotation/annotation/tasks/resources.py b/annotation/annotation/tasks/resources.py index e82134953..120b71af1 100644 --- a/annotation/annotation/tasks/resources.py +++ b/annotation/annotation/tasks/resources.py @@ -5,17 +5,8 @@ from uuid import UUID import dotenv -from fastapi import ( - APIRouter, - Body, - Depends, - Header, - HTTPException, - Path, - Query, - Response, - status, -) +from fastapi import (APIRouter, Body, Depends, Header, HTTPException, Path, + Query, Response, status) from fastapi.responses import JSONResponse, StreamingResponse from filter_lib import Page from sqlalchemy import and_, not_ @@ -27,75 +18,44 @@ from annotation.annotations import accumulate_pages_info, row_to_dict from annotation.database import get_db from annotation.filters import TaskFilter -from annotation.jobs import ( - collect_job_names, - delete_tasks, - get_job, - get_job_attributes_for_post, - recalculate_file_pages, - update_files, - update_inner_job_status, - update_user_overall_load, -) +from annotation.jobs import (collect_job_names, delete_tasks, get_job, + get_job_attributes_for_post, + recalculate_file_pages, update_files, + update_inner_job_status, update_user_overall_load) from annotation.logger import Logger -from annotation.microservice_communication.assets_communication import ( - get_file_names, -) +from annotation.microservice_communication.assets_communication import \ + get_file_names from annotation.microservice_communication.jobs_communication import ( - JobUpdateException, - update_job_status, -) + JobUpdateException, update_job_status) from annotation.microservice_communication.search import ( - X_CURRENT_TENANT_HEADER, - expand_response, -) + X_CURRENT_TENANT_HEADER, expand_response) from annotation.microservice_communication.user import ( - GetUserInfoAccessDenied, - get_user_logins, -) -from annotation.schemas import ( - AnnotationStatisticsInputSchema, - AnnotationStatisticsResponseSchema, - BadRequestErrorSchema, - ConnectionErrorSchema, - ExpandedManualAnnotationTaskSchema, - ExportTaskStatsInput, - FileStatusEnumSchema, - JobStatusEnumSchema, - ManualAnnotationTaskInSchema, - ManualAnnotationTaskSchema, - NotFoundErrorSchema, - PagesInfoSchema, - TaskPatchSchema, - TaskStatusEnumSchema, - ValidationEndSchema, - ValidationSchema, -) + GetUserInfoAccessDenied, get_user_logins) +from annotation.schemas import (AnnotationStatisticsInputSchema, + AnnotationStatisticsResponseSchema, + BadRequestErrorSchema, ConnectionErrorSchema, + ExpandedManualAnnotationTaskSchema, + ExportTaskStatsInput, FileStatusEnumSchema, + JobStatusEnumSchema, + ManualAnnotationTaskInSchema, + ManualAnnotationTaskSchema, + NotFoundErrorSchema, PagesInfoSchema, + TaskPatchSchema, TaskStatusEnumSchema, + ValidationEndSchema, ValidationSchema) from annotation.tags import REVISION_TAG, TASKS_TAG -from annotation.tasks.validation import ( - create_annotation_tasks, - create_validation_tasks, -) +from annotation.tasks.validation import (create_annotation_tasks, + create_validation_tasks) from annotation.token_dependency import TOKEN from ..models import File, Job, ManualAnnotationTask -from .services import ( - add_task_stats_record, - count_annotation_tasks, - create_annotation_task, - create_export_csv, - evaluate_agreement_score, - filter_tasks_db, - finish_validation_task, - get_task_info, - get_task_revisions, - read_annotation_task, - read_annotation_tasks, - save_agreement_metrics, - unblock_validation_tasks, - validate_task_info, - validate_user_actions, -) +from .services import (add_task_stats_record, count_annotation_tasks, + create_annotation_task, create_export_csv, + evaluate_agreement_score, filter_tasks_db, + finish_validation_task, get_task_info, + get_task_revisions, read_annotation_task, + read_annotation_tasks, save_agreement_metrics, + unblock_validation_tasks, validate_task_info, + validate_user_actions) dotenv.load_dotenv(dotenv.find_dotenv()) AGREEMENT_SCORE_ENABLED = os.getenv("AGREEMENT_SCORE_ENABLED", "false") diff --git a/annotation/annotation/tasks/services.py b/annotation/annotation/tasks/services.py index 823b41c0f..67b4acc85 100644 --- a/annotation/annotation/tasks/services.py +++ b/annotation/annotation/tasks/services.py @@ -15,31 +15,21 @@ from annotation.errors import CheckFieldError, FieldConstraintError from annotation.filters import TaskFilter from annotation.jobs import update_files, update_user_overall_load -from annotation.microservice_communication.assets_communication import ( - get_file_path_and_bucket, -) +from annotation.microservice_communication.assets_communication import \ + get_file_path_and_bucket from annotation.microservice_communication.task import get_agreement_score -from annotation.models import ( - AgreementMetrics, - AnnotatedDoc, - AnnotationStatistics, - File, - ManualAnnotationTask, - association_job_annotator, - association_job_validator, -) -from annotation.schemas import ( - AgreementScoreComparingResult, - AgreementScoreServiceInput, - AgreementScoreServiceResponse, - AnnotationStatisticsInputSchema, - ExportTaskStatsInput, - ManualAnnotationTaskInSchema, - ResponseScore, - TaskMetric, - TaskStatusEnumSchema, - ValidationSchema, -) +from annotation.models import (AgreementMetrics, AnnotatedDoc, + AnnotationStatistics, File, + ManualAnnotationTask, association_job_annotator, + association_job_validator) +from annotation.schemas import (AgreementScoreComparingResult, + AgreementScoreServiceInput, + AgreementScoreServiceResponse, + AnnotationStatisticsInputSchema, + ExportTaskStatsInput, + ManualAnnotationTaskInSchema, ResponseScore, + TaskMetric, TaskStatusEnumSchema, + ValidationSchema) dotenv.load_dotenv(dotenv.find_dotenv()) AGREEMENT_SCORE_MIN_MATCH = float(os.getenv("AGREEMENT_SCORE_MIN_MATCH")) diff --git a/annotation/annotation/tasks/validation.py b/annotation/annotation/tasks/validation.py index c04082c53..f0724c2fe 100644 --- a/annotation/annotation/tasks/validation.py +++ b/annotation/annotation/tasks/validation.py @@ -8,15 +8,11 @@ from sqlalchemy.orm import Session from annotation.distribution import prepare_response -from annotation.microservice_communication.assets_communication import ( - FilesForDistribution, -) +from annotation.microservice_communication.assets_communication import \ + FilesForDistribution from annotation.models import AnnotatedDoc, Job, User -from annotation.schemas import ( - AnnotationAndValidationActionsSchema, - TaskStatusEnumSchema, - ValidationSchema, -) +from annotation.schemas import (AnnotationAndValidationActionsSchema, + TaskStatusEnumSchema, ValidationSchema) from .services import create_tasks diff --git a/annotation/tests/conftest.py b/annotation/tests/conftest.py index 4e171e140..61b579bf1 100644 --- a/annotation/tests/conftest.py +++ b/annotation/tests/conftest.py @@ -9,167 +9,105 @@ import pytest import sqlalchemy import sqlalchemy_utils -from moto import mock_s3 -from sqlalchemy.engine import create_engine -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.orm import Session, sessionmaker -from sqlalchemy.orm.exc import FlushError - import tests.test_get_accumulated_revisions as accumulated_revs import tests.test_get_jobs_info_by_files as jobs_info_by_files import tests.test_validation as validation from alembic import command from alembic.config import Config -from annotation.annotations import MANIFEST, S3_START_PATH -from annotation.categories import cache -from annotation.database import SQLALCHEMY_DATABASE_URL, Base -from annotation.jobs import update_user_overall_load -from annotation.models import ( - AnnotatedDoc, - Category, - DocumentLinks, - File, - Job, - ManualAnnotationTask, - User, -) -from annotation.schemas import ( - AnnotationStatisticsInputSchema, - CategoryTypeSchema, - FileStatusEnumSchema, - JobStatusEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) -from annotation.tasks import add_task_stats_record -from annotation.utils import get_test_db_url +from moto import mock_s3 +from sqlalchemy.engine import create_engine +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session, sessionmaker +from sqlalchemy.orm.exc import FlushError from tests.override_app_dependency import TEST_TENANT -from tests.test_annotators_overall_load import ( - OVERALL_LOAD_CREATED_TASKS, - OVERALL_LOAD_JOBS, - OVERALL_LOAD_USERS, - TASK_FILES_OVERALL_LOAD, - VALIDATED_DOC_OVERALL_LOAD, -) -from tests.test_delete_batch_tasks import ( - DELETE_BATCH_TASKS_ANNOTATOR, - DELETE_BATCH_TASKS_FILE, - DELETE_BATCH_TASKS_JOB, - DIFF_STATUSES_TASKS, -) -from tests.test_finish_task import ( - CATEGORIES, - CATEGORIES_2, - FINISH_DOCS, - FINISH_DOCS_CHECK_DELETED_ANNOTATOR, - FINISH_TASK_1, - FINISH_TASK_1_SAME_JOB, - FINISH_TASK_2, - FINISH_TASK_2_SAME_JOB, - FINISH_TASK_CHECK_DELETE_USER_ANNOTATOR_1, - FINISH_TASK_CHECK_DELETE_USER_ANNOTATOR_2, - FINISH_TASK_CHECK_DELETE_USER_VALIDATOR, - FINISH_TASK_FILE_1, - FINISH_TASK_FILE_2, - FINISH_TASK_FILE_3, - FINISH_TASK_FILE_4, - FINISH_TASK_JOB_1, - FINISH_TASK_JOB_2, - FINISH_TASK_JOB_3, - FINISH_TASK_JOB_4, - FINISH_TASK_USER_1, - FINISH_TASK_USER_2, - FINISH_TASK_USER_3, - TASK_NOT_IN_PROGRESS_STATUS, - VALIDATION_TASKS_TO_READY, -) +from tests.test_annotators_overall_load import (OVERALL_LOAD_CREATED_TASKS, + OVERALL_LOAD_JOBS, + OVERALL_LOAD_USERS, + TASK_FILES_OVERALL_LOAD, + VALIDATED_DOC_OVERALL_LOAD) +from tests.test_delete_batch_tasks import (DELETE_BATCH_TASKS_ANNOTATOR, + DELETE_BATCH_TASKS_FILE, + DELETE_BATCH_TASKS_JOB, + DIFF_STATUSES_TASKS) +from tests.test_finish_task import (CATEGORIES, CATEGORIES_2, FINISH_DOCS, + FINISH_DOCS_CHECK_DELETED_ANNOTATOR, + FINISH_TASK_1, FINISH_TASK_1_SAME_JOB, + FINISH_TASK_2, FINISH_TASK_2_SAME_JOB, + FINISH_TASK_CHECK_DELETE_USER_ANNOTATOR_1, + FINISH_TASK_CHECK_DELETE_USER_ANNOTATOR_2, + FINISH_TASK_CHECK_DELETE_USER_VALIDATOR, + FINISH_TASK_FILE_1, FINISH_TASK_FILE_2, + FINISH_TASK_FILE_3, FINISH_TASK_FILE_4, + FINISH_TASK_JOB_1, FINISH_TASK_JOB_2, + FINISH_TASK_JOB_3, FINISH_TASK_JOB_4, + FINISH_TASK_USER_1, FINISH_TASK_USER_2, + FINISH_TASK_USER_3, + TASK_NOT_IN_PROGRESS_STATUS, + VALIDATION_TASKS_TO_READY) from tests.test_get_annotation_for_particular_revision import ( - PART_REV_ANNOTATOR, - PART_REV_DOC, - PART_REV_PAGES, -) -from tests.test_get_child_categories import ( - COMMON_CHILD_CATEGORIES, - CYCLIC_TENANT_CHILD_CATEGORIES, - OTHER_TENANT_CHILD_CATEGORY, -) -from tests.test_get_job import ( - GET_FILES, - GET_JOBS, - JOB_TEST_ANNOTATORS, - JOB_TEST_REVISIONS, -) + PART_REV_ANNOTATOR, PART_REV_DOC, PART_REV_PAGES) +from tests.test_get_child_categories import (COMMON_CHILD_CATEGORIES, + CYCLIC_TENANT_CHILD_CATEGORIES, + OTHER_TENANT_CHILD_CATEGORY) +from tests.test_get_job import (GET_FILES, GET_JOBS, JOB_TEST_ANNOTATORS, + JOB_TEST_REVISIONS) from tests.test_get_job_files import GET_JOB_FILES, GET_JOB_FILES_JOBS -from tests.test_get_job_progress import ( - FILE_TEST_PROGRESS, - JOBS_TO_TEST_PROGRESS, - TASKS_TEST_PROGRESS, -) +from tests.test_get_job_progress import (FILE_TEST_PROGRESS, + JOBS_TO_TEST_PROGRESS, + TASKS_TEST_PROGRESS) from tests.test_get_pages_info import PAGES_INFO_ENTITIES -from tests.test_get_revisions import ( - JOBS_IDS, - PAGE, - PAGES_PATHS, - REVISIONS, - USERS_IDS, -) +from tests.test_get_revisions import (JOBS_IDS, PAGE, PAGES_PATHS, REVISIONS, + USERS_IDS) from tests.test_get_revisions_without_annotation import ( - REV_WITHOUT_ANNOTATION_DOC_1, - REV_WITHOUT_ANNOTATION_DOC_2, - REV_WITHOUT_ANNOTATION_DOC_3, - REV_WITHOUT_ANNOTATION_JOB, - REV_WITHOUT_ANNOTATION_TASK, -) + REV_WITHOUT_ANNOTATION_DOC_1, REV_WITHOUT_ANNOTATION_DOC_2, + REV_WITHOUT_ANNOTATION_DOC_3, REV_WITHOUT_ANNOTATION_JOB, + REV_WITHOUT_ANNOTATION_TASK) from tests.test_get_unassigned_files import UNASSIGNED_FILES_ENTITIES -from tests.test_get_users_for_job import ( - USERS_FOR_JOB_ANNOTATORS, - USERS_FOR_JOB_JOBS, -) +from tests.test_get_users_for_job import (USERS_FOR_JOB_ANNOTATORS, + USERS_FOR_JOB_JOBS) from tests.test_job_categories import CATEGORIES_USERS, MOCK_ID from tests.test_post import POST_JOBS, TEST_POST_USERS -from tests.test_post_annotation import ( - ANNOTATION_VALIDATION_TASKS_PG, - MANIFEST_IN_MINIO, - POST_ANNOTATION_ANNOTATOR, - POST_ANNOTATION_FILE_1, - POST_ANNOTATION_JOB_1, - POST_ANNOTATION_PG_DOC, - POST_ANNOTATION_PG_TASK_1, - POST_ANNOTATION_PG_TASK_2, - POST_ANNOTATION_VALIDATION_JOB, - POST_ANNOTATION_VALIDATOR, - S3_PATH, -) +from tests.test_post_annotation import (ANNOTATION_VALIDATION_TASKS_PG, + MANIFEST_IN_MINIO, + POST_ANNOTATION_ANNOTATOR, + POST_ANNOTATION_FILE_1, + POST_ANNOTATION_JOB_1, + POST_ANNOTATION_PG_DOC, + POST_ANNOTATION_PG_TASK_1, + POST_ANNOTATION_PG_TASK_2, + POST_ANNOTATION_VALIDATION_JOB, + POST_ANNOTATION_VALIDATOR, S3_PATH) from tests.test_post_job import POST_JOB_EXISTING_JOB from tests.test_post_next_task import NEXT_TASK_ANNOTATION_TASKS, NEXT_TASK_JOB -from tests.test_post_unassgined_files import ( - ANNOTATORS_POST_UN_FILES, - JOBS_FILES_TASKS_POST_UN_FILES, -) -from tests.test_search_kafka import ( - ANNOTATION_KAFKA_FILE, - ANNOTATION_KAFKA_JOB, - ANNOTATION_KAFKA_TASK, -) +from tests.test_post_unassgined_files import (ANNOTATORS_POST_UN_FILES, + JOBS_FILES_TASKS_POST_UN_FILES) +from tests.test_search_kafka import (ANNOTATION_KAFKA_FILE, + ANNOTATION_KAFKA_JOB, + ANNOTATION_KAFKA_TASK) from tests.test_start_job import CHANGE_STATUSES_JOBS, CHANGE_STATUSES_TASKS from tests.test_tasks_crud_cr import CRUD_CR_ANNOTATION_TASKS, CRUD_CR_JOBS from tests.test_tasks_crud_cr import FILES as CRUD_CR_FILES -from tests.test_tasks_crud_ud import ( - CRUD_UD_CONSTRAINTS_FILES, - CRUD_UD_CONSTRAINTS_JOBS, - CRUD_UD_CONSTRAINTS_TASKS, - CRUD_UD_CONSTRAINTS_USERS, - CRUD_UD_JOB_1, - CRUD_UD_JOB_2, - CRUD_UD_TASK, -) -from tests.test_update_job import ( - UPDATE_JOB_CATEGORIES, - UPDATE_JOB_FILES, - UPDATE_JOB_USERS, - UPDATE_JOBS, - UPDATE_USER_NO_JOBS, -) +from tests.test_tasks_crud_ud import (CRUD_UD_CONSTRAINTS_FILES, + CRUD_UD_CONSTRAINTS_JOBS, + CRUD_UD_CONSTRAINTS_TASKS, + CRUD_UD_CONSTRAINTS_USERS, CRUD_UD_JOB_1, + CRUD_UD_JOB_2, CRUD_UD_TASK) +from tests.test_update_job import (UPDATE_JOB_CATEGORIES, UPDATE_JOB_FILES, + UPDATE_JOB_USERS, UPDATE_JOBS, + UPDATE_USER_NO_JOBS) + +from annotation.annotations import MANIFEST, S3_START_PATH +from annotation.categories import cache +from annotation.database import SQLALCHEMY_DATABASE_URL, Base +from annotation.jobs import update_user_overall_load +from annotation.models import (AnnotatedDoc, Category, DocumentLinks, File, + Job, ManualAnnotationTask, User) +from annotation.schemas import (AnnotationStatisticsInputSchema, + CategoryTypeSchema, FileStatusEnumSchema, + JobStatusEnumSchema, TaskStatusEnumSchema, + ValidationSchema) +from annotation.tasks import add_task_stats_record +from annotation.utils import get_test_db_url DEFAULT_REGION = "us-east-1" diff --git a/annotation/tests/override_app_dependency.py b/annotation/tests/override_app_dependency.py index 0e143a6f4..20853ffaa 100644 --- a/annotation/tests/override_app_dependency.py +++ b/annotation/tests/override_app_dependency.py @@ -13,17 +13,13 @@ from sqlalchemy.orm import sessionmaker from tenant_dependency import TenantData -from annotation.database import get_db +from annotation.database import SQLALCHEMY_DATABASE_URL, get_db from annotation.main import app -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) from annotation.token_dependency import TOKEN from annotation.utils import get_test_db_url -from annotation.database import SQLALCHEMY_DATABASE_URL - TEST_TOKEN = "token" TEST_TENANT = "test" diff --git a/annotation/tests/test_annotators_overall_load.py b/annotation/tests/test_annotators_overall_load.py index 972e14d77..6342205ae 100644 --- a/annotation/tests/test_annotators_overall_load.py +++ b/annotation/tests/test_annotators_overall_load.py @@ -5,31 +5,20 @@ from pytest import mark, raises from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session - -from annotation.jobs import update_user_overall_load -from annotation.main import app -from annotation.microservice_communication.assets_communication import ( - ASSETS_FILES_URL, -) -from annotation.models import ( - AnnotatedDoc, - Category, - File, - Job, - ManualAnnotationTask, - User, -) -from annotation.schemas import ( - CategoryTypeSchema, - FileStatusEnumSchema, - JobStatusEnumSchema, - JobTypeEnumSchema, - ValidationSchema, -) from tests.consts import CRUD_TASKS_PATH, FINISH_TASK_PATH from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT from tests.test_tasks_crud_ud import construct_path +from annotation.jobs import update_user_overall_load +from annotation.main import app +from annotation.microservice_communication.assets_communication import \ + ASSETS_FILES_URL +from annotation.models import (AnnotatedDoc, Category, File, Job, + ManualAnnotationTask, User) +from annotation.schemas import (CategoryTypeSchema, FileStatusEnumSchema, + JobStatusEnumSchema, JobTypeEnumSchema, + ValidationSchema) + client = TestClient(app) OVERALL_LOAD_USERS = [ diff --git a/annotation/tests/test_assets_communication.py b/annotation/tests/test_assets_communication.py index fa51d52c9..1052b95ed 100644 --- a/annotation/tests/test_assets_communication.py +++ b/annotation/tests/test_assets_communication.py @@ -4,16 +4,11 @@ import responses from fastapi import HTTPException from requests import ConnectionError, RequestException, Timeout +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, TEST_TOKEN from annotation.microservice_communication.assets_communication import ( - ASSETS_FILES_URL, - ASSETS_URL, - get_dataset_info, - get_file_names, - get_file_path_and_bucket, - get_files_info, -) -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, TEST_TOKEN + ASSETS_FILES_URL, ASSETS_URL, get_dataset_info, get_file_names, + get_file_path_and_bucket, get_files_info) FILES = [ { diff --git a/annotation/tests/test_category_crud.py b/annotation/tests/test_category_crud.py index 0815dd18d..0cd6d455e 100644 --- a/annotation/tests/test_category_crud.py +++ b/annotation/tests/test_category_crud.py @@ -8,11 +8,11 @@ from fastapi.testclient import TestClient from pytest import fixture, mark from sqlalchemy.exc import IntegrityError, SQLAlchemyError - -from annotation.models import Category from tests.consts import CATEGORIES_PATH from tests.override_app_dependency import TEST_HEADERS, app +from annotation.models import Category + client = TestClient(app) ATTRIBUTES_NOT_IN_CATEGORY_MODEL = ("is_leaf",) diff --git a/annotation/tests/test_compare_scores.py b/annotation/tests/test_compare_scores.py index 11a0642a5..1723e861b 100644 --- a/annotation/tests/test_compare_scores.py +++ b/annotation/tests/test_compare_scores.py @@ -2,11 +2,9 @@ import pytest -from annotation.schemas.tasks import ( - AgreementScoreComparingResult, - AgreementScoreServiceResponse, - TaskMetric, -) +from annotation.schemas.tasks import (AgreementScoreComparingResult, + AgreementScoreServiceResponse, + TaskMetric) from annotation.tasks.services import compare_agreement_scores min_match_1 = 0.8 diff --git a/annotation/tests/test_cross_validation.py b/annotation/tests/test_cross_validation.py index e52b85cce..793a36406 100644 --- a/annotation/tests/test_cross_validation.py +++ b/annotation/tests/test_cross_validation.py @@ -2,15 +2,13 @@ from uuid import UUID import pytest +from tests.test_distribution import JOB_ID -from annotation.distribution import ( - distribute_validation_partial_files, - distribute_whole_files, -) +from annotation.distribution import (distribute_validation_partial_files, + distribute_whole_files) from annotation.errors import FieldConstraintError from annotation.jobs import check_annotators, check_validators from annotation.schemas import TaskStatusEnumSchema, ValidationSchema -from tests.test_distribution import JOB_ID TASKS_STATUS = TaskStatusEnumSchema.pending VALIDATION_TYPE = ValidationSchema.cross diff --git a/annotation/tests/test_delete_batch_tasks.py b/annotation/tests/test_delete_batch_tasks.py index 3a321ab10..7f0b231b5 100644 --- a/annotation/tests/test_delete_batch_tasks.py +++ b/annotation/tests/test_delete_batch_tasks.py @@ -3,19 +3,16 @@ import pytest from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError - -from annotation.annotations import row_to_dict -from annotation.models import Category, File, Job, ManualAnnotationTask, User -from annotation.schemas import ( - CategoryTypeSchema, - TaskStatusEnumSchema, - ValidationSchema, -) from tests.consts import CRUD_TASKS_PATH from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from tests.test_post import check_files_distributed_pages from tests.test_tasks_crud_ud import BAD_ID, NOT_EXISTING_ID +from annotation.annotations import row_to_dict +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import (CategoryTypeSchema, TaskStatusEnumSchema, + ValidationSchema) + client = TestClient(app) DELETE_BATCH_TASKS_ANNOTATOR = User( diff --git a/annotation/tests/test_distribution.py b/annotation/tests/test_distribution.py index 1043f6d87..fa8cead4b 100644 --- a/annotation/tests/test_distribution.py +++ b/annotation/tests/test_distribution.py @@ -2,25 +2,19 @@ from copy import copy import pytest +from tests.override_app_dependency import TEST_TENANT -from annotation.distribution import ( - add_unassigned_file, - calculate_users_load, - distribute_annotation_partial_files, - distribute_tasks, - distribute_whole_files, - find_annotated_pages, - find_files_for_task, - find_unassigned_files, - find_unassigned_pages, -) +from annotation.distribution import (add_unassigned_file, calculate_users_load, + distribute_annotation_partial_files, + distribute_tasks, distribute_whole_files, + find_annotated_pages, find_files_for_task, + find_unassigned_files, + find_unassigned_pages) from annotation.distribution.main import distribute_tasks_extensively -from annotation.microservice_communication.assets_communication import ( - prepare_files_for_distribution, -) +from annotation.microservice_communication.assets_communication import \ + prepare_files_for_distribution from annotation.models import File from annotation.schemas import FileStatusEnumSchema, TaskStatusEnumSchema -from tests.override_app_dependency import TEST_TENANT JOB_ID = 1 ANNOTATORS = [ diff --git a/annotation/tests/test_finish_task.py b/annotation/tests/test_finish_task.py index e3074af82..46e2be9ac 100644 --- a/annotation/tests/test_finish_task.py +++ b/annotation/tests/test_finish_task.py @@ -10,28 +10,17 @@ from sqlalchemy import asc, not_ from sqlalchemy.exc import DBAPIError, SQLAlchemyError from sqlalchemy.orm import Session +from tests.consts import FINISH_TASK_PATH +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from annotation.annotations import accumulate_pages_info, row_to_dict -from annotation.models import ( - AgreementMetrics, - AnnotatedDoc, - Category, - File, - Job, - ManualAnnotationTask, - User, -) -from annotation.schemas import ( - AgreementScoreServiceResponse, - CategoryTypeSchema, - FileStatusEnumSchema, - JobStatusEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) +from annotation.models import (AgreementMetrics, AnnotatedDoc, Category, File, + Job, ManualAnnotationTask, User) +from annotation.schemas import (AgreementScoreServiceResponse, + CategoryTypeSchema, FileStatusEnumSchema, + JobStatusEnumSchema, TaskStatusEnumSchema, + ValidationSchema) from annotation.tasks import get_task_revisions -from tests.consts import FINISH_TASK_PATH -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app client = TestClient(app) diff --git a/annotation/tests/test_get_accumulated_revisions.py b/annotation/tests/test_get_accumulated_revisions.py index c5ee0869e..48676a1ee 100644 --- a/annotation/tests/test_get_accumulated_revisions.py +++ b/annotation/tests/test_get_accumulated_revisions.py @@ -3,18 +3,16 @@ import pytest from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError - -from annotation.annotations import LATEST -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) -from annotation.models import AnnotatedDoc, User from tests.consts import ANNOTATION_PATH from tests.override_app_dependency import TEST_TOKEN, app from tests.test_post_annotation import POST_ANNOTATION_PG_DOC +from annotation.annotations import LATEST +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) +from annotation.models import AnnotatedDoc, User + client = TestClient(app) diff --git a/annotation/tests/test_get_annotation_for_particular_revision.py b/annotation/tests/test_get_annotation_for_particular_revision.py index 85017a026..34a5c0803 100644 --- a/annotation/tests/test_get_annotation_for_particular_revision.py +++ b/annotation/tests/test_get_annotation_for_particular_revision.py @@ -3,16 +3,14 @@ import pytest from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError - -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) -from annotation.models import AnnotatedDoc, User from tests.consts import ANNOTATION_PATH from tests.override_app_dependency import TEST_TENANT, TEST_TOKEN, app +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) +from annotation.models import AnnotatedDoc, User + client = TestClient(app) NOT_EXISTING_TENANT = "not-exist" diff --git a/annotation/tests/test_get_child_categories.py b/annotation/tests/test_get_child_categories.py index f8da46ccc..27505e7a0 100644 --- a/annotation/tests/test_get_child_categories.py +++ b/annotation/tests/test_get_child_categories.py @@ -6,27 +6,20 @@ from pytest import mark from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session - -from annotation.microservice_communication.assets_communication import ( - ASSETS_FILES_URL, -) -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) -from annotation.models import Category -from annotation.schemas import CategoryTypeSchema from tests.consts import CATEGORIES_PATH, POST_JOBS_PATH -from tests.override_app_dependency import ( - TEST_HEADERS, - TEST_TENANT, - TEST_TOKEN, - app, -) +from tests.override_app_dependency import (TEST_HEADERS, TEST_TENANT, + TEST_TOKEN, app) from tests.test_job_categories import prepare_job_body from tests.test_post_next_task import ASSETS_RESPONSE +from annotation.microservice_communication.assets_communication import \ + ASSETS_FILES_URL +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) +from annotation.models import Category +from annotation.schemas import CategoryTypeSchema + # Cyclic categories have tree hierarchical structure of ids: # "1" -> "2" -> "4" -> "1" # '- -> "3" diff --git a/annotation/tests/test_get_entities_status.py b/annotation/tests/test_get_entities_status.py index dd96fd74e..0040fce04 100644 --- a/annotation/tests/test_get_entities_status.py +++ b/annotation/tests/test_get_entities_status.py @@ -1,8 +1,8 @@ import pytest from fastapi.testclient import TestClient +from tests.override_app_dependency import TEST_HEADERS, app from annotation.schemas import EntitiesStatusesSchema, TaskStatusEnumSchema -from tests.override_app_dependency import TEST_HEADERS, app client = TestClient(app) diff --git a/annotation/tests/test_get_job.py b/annotation/tests/test_get_job.py index 785ab0c80..ebecbbdd6 100644 --- a/annotation/tests/test_get_job.py +++ b/annotation/tests/test_get_job.py @@ -4,17 +4,15 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from tests.consts import ANNOTATION_PATH +from tests.override_app_dependency import TEST_TOKEN, app from annotation.jobs import collect_job_names -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) from annotation.models import Category, File, Job, User from annotation.schemas import FileStatusEnumSchema, ValidationSchema -from tests.consts import ANNOTATION_PATH -from tests.override_app_dependency import TEST_TOKEN, app client = TestClient(app) diff --git a/annotation/tests/test_get_job_files.py b/annotation/tests/test_get_job_files.py index ceb4788bc..c2f72d7f9 100644 --- a/annotation/tests/test_get_job_files.py +++ b/annotation/tests/test_get_job_files.py @@ -4,19 +4,14 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from tests.override_app_dependency import TEST_TOKEN, app -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) from annotation.models import Category, File, Job, User -from annotation.schemas import ( - CategoryTypeSchema, - FileStatusEnumSchema, - ValidationSchema, -) -from tests.override_app_dependency import TEST_TOKEN, app +from annotation.schemas import (CategoryTypeSchema, FileStatusEnumSchema, + ValidationSchema) client = TestClient(app) diff --git a/annotation/tests/test_get_job_progress.py b/annotation/tests/test_get_job_progress.py index 6a8cd27da..9ec67141b 100644 --- a/annotation/tests/test_get_job_progress.py +++ b/annotation/tests/test_get_job_progress.py @@ -1,21 +1,15 @@ import pytest from fastapi.testclient import TestClient - -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) -from annotation.models import Category, File, Job, User -from annotation.schemas import ( - CategoryTypeSchema, - FileStatusEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) from tests.consts import POST_JOBS_PATH from tests.override_app_dependency import TEST_TOKEN, app +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) +from annotation.models import Category, File, Job, User +from annotation.schemas import (CategoryTypeSchema, FileStatusEnumSchema, + TaskStatusEnumSchema, ValidationSchema) + client = TestClient(app) JOB_TENANT = "test" diff --git a/annotation/tests/test_get_jobs_info_by_files.py b/annotation/tests/test_get_jobs_info_by_files.py index 791f1dd97..68f86b924 100644 --- a/annotation/tests/test_get_jobs_info_by_files.py +++ b/annotation/tests/test_get_jobs_info_by_files.py @@ -3,12 +3,12 @@ import pytest from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError +from tests.consts import POST_JOBS_PATH +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from annotation.jobs.services import get_jobs_by_files from annotation.models import File, Job, User from annotation.schemas import JobStatusEnumSchema, ValidationSchema -from tests.consts import POST_JOBS_PATH -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app client = TestClient(app) diff --git a/annotation/tests/test_get_pages_info.py b/annotation/tests/test_get_pages_info.py index ea5e0b91d..897b12748 100644 --- a/annotation/tests/test_get_pages_info.py +++ b/annotation/tests/test_get_pages_info.py @@ -4,24 +4,17 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from tests.consts import CRUD_TASKS_PATH +from tests.override_app_dependency import TEST_TENANT, TEST_TOKEN, app from annotation.annotations import accumulate_pages_info -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) -from annotation.models import ( - AnnotatedDoc, - File, - Job, - ManualAnnotationTask, - User, -) +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) +from annotation.models import (AnnotatedDoc, File, Job, ManualAnnotationTask, + User) from annotation.schemas import TaskStatusEnumSchema, ValidationSchema from annotation.tasks import get_task_revisions -from tests.consts import CRUD_TASKS_PATH -from tests.override_app_dependency import TEST_TENANT, TEST_TOKEN, app client = TestClient(app) diff --git a/annotation/tests/test_get_revisions.py b/annotation/tests/test_get_revisions.py index d9c99cfbe..d8c6307e4 100644 --- a/annotation/tests/test_get_revisions.py +++ b/annotation/tests/test_get_revisions.py @@ -6,11 +6,11 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from tests.consts import ANNOTATION_PATH +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from annotation.annotations import S3_START_PATH from annotation.models import DocumentLinks -from tests.consts import ANNOTATION_PATH -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app JOBS_IDS = ( 10, diff --git a/annotation/tests/test_get_revisions_without_annotation.py b/annotation/tests/test_get_revisions_without_annotation.py index c246f0c44..16ed98f5d 100644 --- a/annotation/tests/test_get_revisions_without_annotation.py +++ b/annotation/tests/test_get_revisions_without_annotation.py @@ -1,21 +1,16 @@ import pytest from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError - -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) -from annotation.models import Category, File, Job, ManualAnnotationTask, User -from annotation.schemas import ( - CategoryTypeSchema, - TaskStatusEnumSchema, - ValidationSchema, -) from tests.consts import ANNOTATION_PATH from tests.override_app_dependency import TEST_TENANT, TEST_TOKEN, app +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import (CategoryTypeSchema, TaskStatusEnumSchema, + ValidationSchema) + client = TestClient(app) DIFF_TENANT = "diff-tenant" diff --git a/annotation/tests/test_get_unassigned_files.py b/annotation/tests/test_get_unassigned_files.py index 0ee707bf3..90d2adb95 100644 --- a/annotation/tests/test_get_unassigned_files.py +++ b/annotation/tests/test_get_unassigned_files.py @@ -4,10 +4,10 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from annotation.models import Category, File, Job, User from annotation.schemas import CategoryTypeSchema, ValidationSchema -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app client = TestClient(app) diff --git a/annotation/tests/test_get_users_for_job.py b/annotation/tests/test_get_users_for_job.py index e44473316..24d517f14 100644 --- a/annotation/tests/test_get_users_for_job.py +++ b/annotation/tests/test_get_users_for_job.py @@ -1,10 +1,10 @@ import pytest from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from annotation.models import Job, User from annotation.schemas import ValidationSchema -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app client = TestClient(app) diff --git a/annotation/tests/test_job_categories.py b/annotation/tests/test_job_categories.py index d7522ccda..7a8804756 100644 --- a/annotation/tests/test_job_categories.py +++ b/annotation/tests/test_job_categories.py @@ -5,22 +5,16 @@ from fastapi.testclient import TestClient from pytest import mark from sqlalchemy.orm import Session +from tests.consts import POST_JOBS_PATH +from tests.override_app_dependency import (TEST_HEADERS, TEST_TENANT, + TEST_TOKEN, app) +from tests.test_category_crud import prepare_category_body -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) from annotation.models import Category, Job from annotation.schemas import JobTypeEnumSchema, ValidationSchema -from tests.consts import POST_JOBS_PATH -from tests.override_app_dependency import ( - TEST_HEADERS, - TEST_TENANT, - TEST_TOKEN, - app, -) -from tests.test_category_crud import prepare_category_body JOBS_PATH = "/jobs" MOCK_ID = 1 diff --git a/annotation/tests/test_microservices_search.py b/annotation/tests/test_microservices_search.py index e00795510..d74eebba5 100644 --- a/annotation/tests/test_microservices_search.py +++ b/annotation/tests/test_microservices_search.py @@ -1,28 +1,20 @@ import pytest import responses -from annotation.microservice_communication.assets_communication import ( - ASSETS_FILES_URL, -) -from annotation.microservice_communication.jobs_communication import ( - JOBS_SEARCH_URL, -) -from annotation.microservice_communication.search import ( - PAGE_SIZE, - calculate_amount_of_pagination_pages, - construct_search_params, - expand_response, - get_response, -) -from annotation.models import ManualAnnotationTask -from annotation.schemas import ( - ExpandedManualAnnotationTaskSchema, - TaskStatusEnumSchema, -) from fastapi import HTTPException from requests import ConnectionError, RequestException, Timeout - from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, TEST_TOKEN +from annotation.microservice_communication.assets_communication import \ + ASSETS_FILES_URL +from annotation.microservice_communication.jobs_communication import \ + JOBS_SEARCH_URL +from annotation.microservice_communication.search import ( + PAGE_SIZE, calculate_amount_of_pagination_pages, construct_search_params, + expand_response, get_response) +from annotation.models import ManualAnnotationTask +from annotation.schemas import (ExpandedManualAnnotationTaskSchema, + TaskStatusEnumSchema) + AMOUNT_OF_ELEMENTS = 150 IDS = [entity_id for entity_id in range(1, AMOUNT_OF_ELEMENTS)] diff --git a/annotation/tests/test_post.py b/annotation/tests/test_post.py index ded16b5ae..258cfbcce 100644 --- a/annotation/tests/test_post.py +++ b/annotation/tests/test_post.py @@ -6,13 +6,12 @@ from sqlalchemy import not_ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app -from annotation.microservice_communication.assets_communication import ( - ASSETS_URL, -) +from annotation.microservice_communication.assets_communication import \ + ASSETS_URL from annotation.models import Category, File, Job, ManualAnnotationTask, User from annotation.schemas import CategoryTypeSchema, ValidationSchema -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app client = TestClient(app) diff --git a/annotation/tests/test_post_annotation.py b/annotation/tests/test_post_annotation.py index 2b067617e..2c58f8149 100644 --- a/annotation/tests/test_post_annotation.py +++ b/annotation/tests/test_post_annotation.py @@ -12,54 +12,30 @@ from requests import RequestException from sqlalchemy.exc import DBAPIError, SQLAlchemyError from sqlalchemy.orm import Session - -from annotation.annotations import ( - MANIFEST, - check_task_pages, - construct_annotated_doc, - create_manifest_json, - get_pages_sha, - row_to_dict, -) -from annotation.annotations.main import ( - check_docs_identity, - upload_json_to_minio, - upload_pages_to_minio, -) -from annotation.kafka_client import producers -from annotation.microservice_communication.assets_communication import ( - ASSETS_FILES_URL, -) -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) -from annotation.models import ( - AnnotatedDoc, - Category, - File, - Job, - ManualAnnotationTask, - User, -) -from annotation.schemas import ( - CategoryTypeSchema, - DocForSaveSchema, - JobTypeEnumSchema, - PageSchema, - TaskStatusEnumSchema, - ValidationSchema, -) from tests.consts import ANNOTATION_PATH -from tests.override_app_dependency import ( - TEST_HEADERS, - TEST_TENANT, - TEST_TOKEN, - app, -) +from tests.override_app_dependency import (TEST_HEADERS, TEST_TENANT, + TEST_TOKEN, app) from tests.test_tasks_crud_ud import construct_path +from annotation.annotations import (MANIFEST, check_task_pages, + construct_annotated_doc, + create_manifest_json, get_pages_sha, + row_to_dict) +from annotation.annotations.main import (check_docs_identity, + upload_json_to_minio, + upload_pages_to_minio) +from annotation.kafka_client import producers +from annotation.microservice_communication.assets_communication import \ + ASSETS_FILES_URL +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) +from annotation.models import (AnnotatedDoc, Category, File, Job, + ManualAnnotationTask, User) +from annotation.schemas import (CategoryTypeSchema, DocForSaveSchema, + JobTypeEnumSchema, PageSchema, + TaskStatusEnumSchema, ValidationSchema) + client = TestClient(app) CATEGORIES = [ diff --git a/annotation/tests/test_post_job.py b/annotation/tests/test_post_job.py index 812824d2c..3d6f2ac14 100644 --- a/annotation/tests/test_post_job.py +++ b/annotation/tests/test_post_job.py @@ -6,30 +6,18 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.declarative.api import DeclarativeMeta from sqlalchemy.orm import Session +from tests.consts import POST_JOBS_PATH +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app +from tests.test_post import check_files_distributed_pages from annotation.annotations import row_to_dict from annotation.jobs import get_job_attributes_for_post from annotation.microservice_communication.assets_communication import ( - ASSETS_FILES_URL, - ASSETS_URL, -) -from annotation.models import ( - Category, - File, - Job, - ManualAnnotationTask, - User, - association_job_annotator, -) -from annotation.schemas import ( - CategoryTypeSchema, - JobStatusEnumSchema, - JobTypeEnumSchema, - ValidationSchema, -) -from tests.consts import POST_JOBS_PATH -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app -from tests.test_post import check_files_distributed_pages + ASSETS_FILES_URL, ASSETS_URL) +from annotation.models import (Category, File, Job, ManualAnnotationTask, User, + association_job_annotator) +from annotation.schemas import (CategoryTypeSchema, JobStatusEnumSchema, + JobTypeEnumSchema, ValidationSchema) client = TestClient(app) diff --git a/annotation/tests/test_post_next_task.py b/annotation/tests/test_post_next_task.py index 6592d5244..4cae1315e 100644 --- a/annotation/tests/test_post_next_task.py +++ b/annotation/tests/test_post_next_task.py @@ -3,34 +3,24 @@ import pytest import responses -from annotation.microservice_communication.assets_communication import ( - ASSETS_FILES_URL, -) -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) -from annotation.microservice_communication.user import USERS_SEARCH_URL -from annotation.models import Category, File, Job, ManualAnnotationTask, User -from annotation.schemas import ( - CategoryTypeSchema, - TaskStatusEnumSchema, - ValidationSchema, -) from fastapi.testclient import TestClient from requests import ConnectionError, RequestException, Timeout from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session - -from tests.override_app_dependency import ( - TEST_HEADERS, - TEST_TENANT, - TEST_TOKEN, - app, -) +from tests.override_app_dependency import (TEST_HEADERS, TEST_TENANT, + TEST_TOKEN, app) from tests.test_tasks_crud_cr import USERS_SEARCH_RESPONSE +from annotation.microservice_communication.assets_communication import \ + ASSETS_FILES_URL +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) +from annotation.microservice_communication.user import USERS_SEARCH_URL +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import (CategoryTypeSchema, TaskStatusEnumSchema, + ValidationSchema) + client = TestClient(app) POST_NEXT_TASK_PATH = "/tasks/next" diff --git a/annotation/tests/test_post_unassgined_files.py b/annotation/tests/test_post_unassgined_files.py index 01c15e343..96fa7526a 100644 --- a/annotation/tests/test_post_unassgined_files.py +++ b/annotation/tests/test_post_unassgined_files.py @@ -2,26 +2,17 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError from sqlalchemy.sql.elements import not_ +from tests.override_app_dependency import (TEST_HEADERS, TEST_TENANT, + TEST_TOKEN, app) +from tests.test_post import check_files_distributed_pages from annotation.annotations import row_to_dict -from annotation.microservice_communication.search import ( - AUTHORIZATION, - BEARER, - HEADER_TENANT, -) +from annotation.microservice_communication.search import (AUTHORIZATION, + BEARER, + HEADER_TENANT) from annotation.models import File, Job, ManualAnnotationTask, User -from annotation.schemas import ( - FileStatusEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) -from tests.override_app_dependency import ( - TEST_HEADERS, - TEST_TENANT, - TEST_TOKEN, - app, -) -from tests.test_post import check_files_distributed_pages +from annotation.schemas import (FileStatusEnumSchema, TaskStatusEnumSchema, + ValidationSchema) client = TestClient(app) diff --git a/annotation/tests/test_search_kafka.py b/annotation/tests/test_search_kafka.py index e2af4b537..1817e628b 100644 --- a/annotation/tests/test_search_kafka.py +++ b/annotation/tests/test_search_kafka.py @@ -1,24 +1,19 @@ from unittest import mock import responses -from annotation.annotations import add_search_annotation_producer -from annotation.kafka_client import producers -from annotation.microservice_communication.assets_communication import ( - ASSETS_FILES_URL, -) -from annotation.models import Category, File, Job, ManualAnnotationTask, User -from annotation.schemas import ( - CategoryTypeSchema, - JobStatusEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) from fastapi.testclient import TestClient from kafka.errors import NoBrokersAvailable from pytest import mark - from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app +from annotation.annotations import add_search_annotation_producer +from annotation.kafka_client import producers +from annotation.microservice_communication.assets_communication import \ + ASSETS_FILES_URL +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import (CategoryTypeSchema, JobStatusEnumSchema, + TaskStatusEnumSchema, ValidationSchema) + from .consts import ANNOTATION_PATH client = TestClient(app) diff --git a/annotation/tests/test_start_job.py b/annotation/tests/test_start_job.py index 3da8691bb..3acbc5999 100644 --- a/annotation/tests/test_start_job.py +++ b/annotation/tests/test_start_job.py @@ -6,17 +6,13 @@ from requests.exceptions import RequestException from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from annotation.annotations import row_to_dict from annotation.jobs import update_inner_job_status from annotation.models import Category, File, Job, ManualAnnotationTask, User -from annotation.schemas import ( - CategoryTypeSchema, - JobStatusEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app +from annotation.schemas import (CategoryTypeSchema, JobStatusEnumSchema, + TaskStatusEnumSchema, ValidationSchema) client = TestClient(app) diff --git a/annotation/tests/test_tasks_crud_cr.py b/annotation/tests/test_tasks_crud_cr.py index 0dc3e9d11..8d0334428 100644 --- a/annotation/tests/test_tasks_crud_cr.py +++ b/annotation/tests/test_tasks_crud_cr.py @@ -9,19 +9,17 @@ from fastapi.testclient import TestClient from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from tests.consts import CRUD_TASKS_PATH +from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app +from tests.test_post import check_files_distributed_pages -from annotation.microservice_communication.assets_communication import ( - ASSETS_FILES_URL, -) -from annotation.microservice_communication.jobs_communication import ( - JOBS_SEARCH_URL, -) +from annotation.microservice_communication.assets_communication import \ + ASSETS_FILES_URL +from annotation.microservice_communication.jobs_communication import \ + JOBS_SEARCH_URL from annotation.microservice_communication.user import USERS_SEARCH_URL from annotation.models import Category, File, Job, ManualAnnotationTask, User from annotation.schemas import CategoryTypeSchema, ValidationSchema -from tests.consts import CRUD_TASKS_PATH -from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app -from tests.test_post import check_files_distributed_pages client = TestClient(app) diff --git a/annotation/tests/test_tasks_crud_ud.py b/annotation/tests/test_tasks_crud_ud.py index e94c40465..7bf53949b 100644 --- a/annotation/tests/test_tasks_crud_ud.py +++ b/annotation/tests/test_tasks_crud_ud.py @@ -1,18 +1,15 @@ import pytest from fastapi.testclient import TestClient from sqlalchemy.exc import DBAPIError, SQLAlchemyError - -from annotation.annotations import row_to_dict -from annotation.models import Category, File, Job, ManualAnnotationTask, User -from annotation.schemas import ( - CategoryTypeSchema, - TaskStatusEnumSchema, - ValidationSchema, -) from tests.consts import CRUD_TASKS_PATH from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from tests.test_post import check_files_distributed_pages +from annotation.annotations import row_to_dict +from annotation.models import Category, File, Job, ManualAnnotationTask, User +from annotation.schemas import (CategoryTypeSchema, TaskStatusEnumSchema, + ValidationSchema) + client = TestClient(app) CRUD_UD_USER = User(user_id="1a8b0bd3-9159-4078-a60a-1d652b61c944") diff --git a/annotation/tests/test_update_job.py b/annotation/tests/test_update_job.py index 2761f3284..5260eae14 100644 --- a/annotation/tests/test_update_job.py +++ b/annotation/tests/test_update_job.py @@ -6,32 +6,18 @@ from pytest import mark from sqlalchemy import asc from sqlalchemy.exc import SQLAlchemyError +from tests.consts import POST_JOBS_PATH +from tests.override_app_dependency import (TEST_HEADERS, TEST_TENANT, + TEST_TOKEN, app) from annotation.annotations import row_to_dict -from annotation.models import ( - Category, - File, - Job, - User, - association_job_annotator, - association_job_category, - association_job_owner, - association_job_validator, -) -from annotation.schemas import ( - CategoryTypeSchema, - FileStatusEnumSchema, - JobStatusEnumSchema, - JobTypeEnumSchema, - ValidationSchema, -) -from tests.consts import POST_JOBS_PATH -from tests.override_app_dependency import ( - TEST_HEADERS, - TEST_TENANT, - TEST_TOKEN, - app, -) +from annotation.models import (Category, File, Job, User, + association_job_annotator, + association_job_category, association_job_owner, + association_job_validator) +from annotation.schemas import (CategoryTypeSchema, FileStatusEnumSchema, + JobStatusEnumSchema, JobTypeEnumSchema, + ValidationSchema) JOBS_SEARCH_URL = os.environ.get("JOBS_SEARCH_URL") diff --git a/annotation/tests/test_validation.py b/annotation/tests/test_validation.py index de03b8bdf..ea095c767 100644 --- a/annotation/tests/test_validation.py +++ b/annotation/tests/test_validation.py @@ -5,37 +5,26 @@ from fastapi import HTTPException from fastapi.testclient import TestClient from sqlalchemy import or_ - -from annotation.annotations import row_to_dict -from annotation.models import ( - AnnotatedDoc, - File, - Job, - ManualAnnotationTask, - User, -) -from annotation.schemas import ( - AnnotationAndValidationActionsSchema, - FileStatusEnumSchema, - TaskStatusEnumSchema, - ValidationSchema, -) -from annotation.tasks.validation import ( - _find_annotators_for_failed_pages, - check_user_job_action, - check_user_job_belonging, - check_uuid, - construct_tasks, - create_annotation_tasks, - create_validation_tasks, - find_initial_annotators, - get_annotators_revisions, -) from tests.consts import FINISH_TASK_PATH from tests.override_app_dependency import TEST_HEADERS, TEST_TENANT, app from tests.test_finish_task import check_files_finished_pages from tests.test_post import check_files_distributed_pages +from annotation.annotations import row_to_dict +from annotation.models import (AnnotatedDoc, File, Job, ManualAnnotationTask, + User) +from annotation.schemas import (AnnotationAndValidationActionsSchema, + FileStatusEnumSchema, TaskStatusEnumSchema, + ValidationSchema) +from annotation.tasks.validation import (_find_annotators_for_failed_pages, + check_user_job_action, + check_user_job_belonging, check_uuid, + construct_tasks, + create_annotation_tasks, + create_validation_tasks, + find_initial_annotators, + get_annotators_revisions) + client = TestClient(app) BAD_UUID = "bad_uuid" diff --git a/assets/alembic/env.py b/assets/alembic/env.py index 46caae68b..dd26bbaa8 100644 --- a/assets/alembic/env.py +++ b/assets/alembic/env.py @@ -1,11 +1,11 @@ import os from logging.config import fileConfig -from assets.config import settings -from assets.db.utils import get_test_db_url +from alembic import context from sqlalchemy import engine_from_config, pool -from alembic import context +from assets.config import settings +from assets.db.utils import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py b/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py index e9dd71ea0..3042acf87 100644 --- a/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py +++ b/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py @@ -7,11 +7,10 @@ """ import sqlalchemy as sa - +from alembic import op # revision identifiers, used by Alembic. from sqlalchemy.orm import Session # noqa -from alembic import op from assets.db.models import FileObject # noqa revision = "0f6c859c1d1c" diff --git a/assets/alembic/versions/9e837ea0c11d_image_pages.py b/assets/alembic/versions/9e837ea0c11d_image_pages.py index 9dd24bc92..b2dff0e96 100644 --- a/assets/alembic/versions/9e837ea0c11d_image_pages.py +++ b/assets/alembic/versions/9e837ea0c11d_image_pages.py @@ -5,9 +5,9 @@ Create Date: 2022-02-14 17:36:57.252191 """ +from alembic import op from sqlalchemy.orm import Session -from alembic import op from assets.db.models import FileObject # revision identifiers, used by Alembic. diff --git a/assets/alembic/versions/afa33cc83d57_new_fields.py b/assets/alembic/versions/afa33cc83d57_new_fields.py index 6d7aa33a6..b017a10c5 100644 --- a/assets/alembic/versions/afa33cc83d57_new_fields.py +++ b/assets/alembic/versions/afa33cc83d57_new_fields.py @@ -6,10 +6,10 @@ """ import sqlalchemy as sa -from assets.db.models import TSVector - from alembic import op +from assets.db.models import TSVector + # revision identifiers, used by Alembic. revision = "afa33cc83d57" down_revision = None diff --git a/assets/alembic/versions/fe5926249504_count_datasets.py b/assets/alembic/versions/fe5926249504_count_datasets.py index e1124f2f4..d5b2a4b46 100644 --- a/assets/alembic/versions/fe5926249504_count_datasets.py +++ b/assets/alembic/versions/fe5926249504_count_datasets.py @@ -6,9 +6,9 @@ """ import sqlalchemy as sa +from alembic import op from sqlalchemy.orm import Session -from alembic import op from assets.db.models import Association, Datasets, FileObject # revision identifiers, used by Alembic. diff --git a/assets/assets/db/models.py b/assets/assets/db/models.py index 119cb17e3..30edb04ad 100644 --- a/assets/assets/db/models.py +++ b/assets/assets/db/models.py @@ -2,7 +2,6 @@ from typing import Any, Dict, Optional import sqlalchemy as sa -from assets.config import settings from filter_lib import create_filter_model from sqlalchemy.dialects.postgresql import TSVECTOR from sqlalchemy.engine.default import DefaultExecutionContext @@ -10,6 +9,8 @@ from sqlalchemy.orm import relationship, sessionmaker from sqlalchemy.types import TypeDecorator +from assets.config import settings + Base = declarative_base() engine = sa.create_engine( settings.database_url, diff --git a/assets/assets/db/service.py b/assets/assets/db/service.py index 4492c48d7..a6620896d 100644 --- a/assets/assets/db/service.py +++ b/assets/assets/db/service.py @@ -1,12 +1,13 @@ from typing import Any, Dict, Optional, Tuple -from assets.db.models import Association, Datasets, FileObject, SessionLocal -from assets.logger import get_logger -from assets.schemas import FileProcessingStatusForUpdate from filter_lib import PaginationParams, form_query, map_request_to_filter from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Query, Session, load_only, selectinload +from assets.db.models import Association, Datasets, FileObject, SessionLocal +from assets.logger import get_logger +from assets.schemas import FileProcessingStatusForUpdate + logger = get_logger(__name__) diff --git a/assets/assets/routers/bonds_router.py b/assets/assets/routers/bonds_router.py index b8f3e6cad..38bfb6c0c 100644 --- a/assets/assets/routers/bonds_router.py +++ b/assets/assets/routers/bonds_router.py @@ -4,6 +4,7 @@ import fastapi import filter_lib import sqlalchemy.orm + from assets import db, schemas, utils router = fastapi.APIRouter(prefix="/datasets/bonds", tags=["bonds"]) diff --git a/assets/assets/routers/datasets_router.py b/assets/assets/routers/datasets_router.py index 2094cdf45..f6ffa3e89 100644 --- a/assets/assets/routers/datasets_router.py +++ b/assets/assets/routers/datasets_router.py @@ -6,6 +6,7 @@ import sqlalchemy.exc import sqlalchemy.orm import sqlalchemy_filters.exceptions + from assets import db, schemas router = fastapi.APIRouter(prefix="/datasets", tags=["datasets"]) diff --git a/assets/assets/routers/files_router.py b/assets/assets/routers/files_router.py index 1a8a70894..67ab315cf 100644 --- a/assets/assets/routers/files_router.py +++ b/assets/assets/routers/files_router.py @@ -6,6 +6,7 @@ import minio import sqlalchemy.orm import sqlalchemy_filters.exceptions + from assets import db, exceptions, schemas, utils router = fastapi.APIRouter(prefix="/files", tags=["files"]) diff --git a/assets/assets/routers/minio_router.py b/assets/assets/routers/minio_router.py index 38e36f086..9822a0fe5 100644 --- a/assets/assets/routers/minio_router.py +++ b/assets/assets/routers/minio_router.py @@ -4,6 +4,7 @@ import minio import sqlalchemy.orm import urllib3.exceptions + from assets import db, schemas, utils from assets.config import settings diff --git a/assets/assets/routers/s3_router.py b/assets/assets/routers/s3_router.py index 704c1433b..b145cff21 100644 --- a/assets/assets/routers/s3_router.py +++ b/assets/assets/routers/s3_router.py @@ -4,6 +4,7 @@ import minio import sqlalchemy.orm import urllib3.exceptions + from assets import db, exceptions, schemas, utils router = fastapi.APIRouter(prefix="/s3_upload", tags=["s_3"]) diff --git a/assets/assets/utils/common_utils.py b/assets/assets/utils/common_utils.py index f69734ceb..64cda6323 100644 --- a/assets/assets/utils/common_utils.py +++ b/assets/assets/utils/common_utils.py @@ -9,6 +9,7 @@ import requests import sqlalchemy.orm import starlette.datastructures + from assets import db, exceptions, logger, schemas from assets.config import settings from assets.utils import minio_utils diff --git a/assets/assets/utils/convert_service_utils.py b/assets/assets/utils/convert_service_utils.py index 757109fc8..69b0af269 100644 --- a/assets/assets/utils/convert_service_utils.py +++ b/assets/assets/utils/convert_service_utils.py @@ -1,4 +1,5 @@ import requests + from assets import logger from assets.config import settings diff --git a/assets/assets/utils/minio_utils.py b/assets/assets/utils/minio_utils.py index 856461911..a75a54291 100644 --- a/assets/assets/utils/minio_utils.py +++ b/assets/assets/utils/minio_utils.py @@ -6,9 +6,10 @@ import pdf2image.exceptions import PIL.Image import urllib3.exceptions +from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider + from assets import db, logger from assets.config import settings -from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider logger_ = logger.get_logger(__name__) diff --git a/assets/assets/utils/s3_utils.py b/assets/assets/utils/s3_utils.py index dbe1d7f05..df5a514ef 100644 --- a/assets/assets/utils/s3_utils.py +++ b/assets/assets/utils/s3_utils.py @@ -3,6 +3,7 @@ import boto3 import urllib3.exceptions + from assets import exceptions, logger from assets.config import settings diff --git a/assets/tests/conftest.py b/assets/tests/conftest.py index 8e82fd48d..45bca41c2 100644 --- a/assets/tests/conftest.py +++ b/assets/tests/conftest.py @@ -9,6 +9,8 @@ import pytest import urllib3 +from alembic import command +from alembic.config import Config from fastapi.testclient import TestClient from minio import Minio from sqlalchemy.engine import create_engine @@ -17,14 +19,12 @@ from sqlalchemy_utils import create_database, database_exists import assets.utils.minio_utils as minio_utils -from alembic import command -from alembic.config import Config from assets.config import settings from assets.db.models import Base from assets.db.service import session_scope_for_dependency +from assets.db.utils import get_test_db_url from assets.main import app, tenant from assets.utils.minio_utils import get_storage -from assets.db.utils import get_test_db_url BUCKET_TESTS = "tests" + uuid.uuid4().hex diff --git a/assets/tests/test_helpers.py b/assets/tests/test_helpers.py index e4a14473b..7bb28c998 100644 --- a/assets/tests/test_helpers.py +++ b/assets/tests/test_helpers.py @@ -5,11 +5,8 @@ from fastapi import HTTPException from assets.db.models import FileObject -from assets.db.service import ( - delete_file_from_db, - insert_file, - update_file_status, -) +from assets.db.service import (delete_file_from_db, insert_file, + update_file_status) from assets.schemas import FileProcessingStatus from assets.utils.minio_utils import check_bucket, delete_one_from_minio diff --git a/assets/tests/test_utils.py b/assets/tests/test_utils.py index 0864d79e1..4782213d1 100644 --- a/assets/tests/test_utils.py +++ b/assets/tests/test_utils.py @@ -12,19 +12,11 @@ import assets.utils.minio_utils as minio_utils from assets.config import settings from assets.db.models import FileObject -from assets.exceptions import ( - BucketError, - FileConversionError, - FileKeyError, - UploadLimitExceedError, -) +from assets.exceptions import (BucketError, FileConversionError, FileKeyError, + UploadLimitExceedError) from assets.schemas import ActionResponse -from assets.utils.common_utils import ( - FileConverter, - FileProcessor, - check_uploading_limit, - to_obj, -) +from assets.utils.common_utils import (FileConverter, FileProcessor, + check_uploading_limit, to_obj) from assets.utils.s3_utils import S3Manager ID_ = 12 diff --git a/common/model_api/model_api/pipeline.py b/common/model_api/model_api/pipeline.py index fe3bfdd81..6c4ac53b9 100644 --- a/common/model_api/model_api/pipeline.py +++ b/common/model_api/model_api/pipeline.py @@ -10,11 +10,8 @@ from .common import models as m from .common.minio_utils import MinioCommunicator from .storage_exchange import get_annotation, get_document, put_annotation -from .utils import ( - form_response, - get_needs_from_request_and_annotation, - update_annotation_categories, -) +from .utils import (form_response, get_needs_from_request_and_annotation, + update_annotation_categories) logger = logging.getLogger(__name__) diff --git a/common/model_api/tests/test_api.py b/common/model_api/tests/test_api.py index 212a7c529..a97093362 100644 --- a/common/model_api/tests/test_api.py +++ b/common/model_api/tests/test_api.py @@ -1,15 +1,12 @@ -import pytest - from pathlib import Path from unittest.mock import MagicMock import model_api.pipeline +import pytest from model_api.common import models as m -from model_api.utils import ( - update_annotation_categories, - form_response, - get_needs_from_request_and_annotation, -) +from model_api.utils import (form_response, + get_needs_from_request_and_annotation, + update_annotation_categories) # from model_api.inference import inference diff --git a/common/model_api/tests/test_preprocessing.py b/common/model_api/tests/test_preprocessing.py index 86514aa6d..aa7458c71 100644 --- a/common/model_api/tests/test_preprocessing.py +++ b/common/model_api/tests/test_preprocessing.py @@ -2,16 +2,13 @@ from pathlib import Path from unittest.mock import MagicMock, call -import pytest - import model_api.preprocessing -from model_api.config import settings +import pytest from model_api.common.models import GeometryObject, PageDOD, Size -from model_api.preprocessing import ( - calculate_dpi, - convert_figure_bbox_in_points, - crop_page_images, -) +from model_api.config import settings +from model_api.preprocessing import (calculate_dpi, + convert_figure_bbox_in_points, + crop_page_images) TEST_PDF = Path(__file__).parent / "test_files" / "test_pdf.pdf" diff --git a/common/model_api/tests/test_smoke.py b/common/model_api/tests/test_smoke.py index a2cffa4d7..f2843b7a2 100644 --- a/common/model_api/tests/test_smoke.py +++ b/common/model_api/tests/test_smoke.py @@ -1,9 +1,9 @@ from pathlib import Path -from unittest.mock import MagicMock from pprint import pprint -import pytest +from unittest.mock import MagicMock import model_api +import pytest from model_api.common import models as m from model_api.pipeline import pipeline diff --git a/convert/convert/coco_export/convert.py b/convert/convert/coco_export/convert.py index 2420e7f05..c356bf12a 100644 --- a/convert/convert/coco_export/convert.py +++ b/convert/convert/coco_export/convert.py @@ -9,6 +9,7 @@ import requests from botocore.exceptions import ClientError + from convert.config import minio_client, minio_resource, settings from convert.logger import get_logger from convert.models.coco import Annotation, Category, CocoDataset, Image diff --git a/convert/convert/coco_import/import_service.py b/convert/convert/coco_import/import_service.py index 4292fbda7..891c46012 100644 --- a/convert/convert/coco_import/import_service.py +++ b/convert/convert/coco_import/import_service.py @@ -4,6 +4,8 @@ from urllib.parse import urljoin import requests +from fastapi import HTTPException, status + from convert.coco_import.convert import ConvertToBadgerdoc from convert.config import settings from convert.exceptions import UploadLimitExceedError @@ -11,7 +13,6 @@ from convert.models import coco from convert.utils.common_utils import check_uploading_limit from convert.utils.s3_utils import S3Manager, s3_download_files -from fastapi import HTTPException, status LOGGER = get_logger(__file__) diff --git a/convert/convert/config.py b/convert/convert/config.py index 1a3a070e7..70fa9d24d 100644 --- a/convert/convert/config.py +++ b/convert/convert/config.py @@ -4,7 +4,6 @@ import boto3 from botocore.client import BaseClient -from convert import logger from dotenv import load_dotenv from mypy_extensions import KwArg, VarArg from pydantic import BaseSettings, Field @@ -12,6 +11,8 @@ from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry +from convert import logger + load_dotenv() diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py index dd0d1148c..a06248619 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter.py @@ -1,19 +1,11 @@ from typing import Any, List, Optional, Tuple from ..models import bd_annotation_model_practic -from ..models.bd_annotation_model import ( - AnnotationLink, - BadgerdocAnnotation, - Obj, - Page, - Size, -) +from ..models.bd_annotation_model import (AnnotationLink, BadgerdocAnnotation, + Obj, Page, Size) from ..models.bd_tokens_model import Page as BadgerdocTokensPage -from ..models.label_studio_models import ( - LabelStudioModel, - ModelItem, - ResultItem, -) +from ..models.label_studio_models import (LabelStudioModel, ModelItem, + ResultItem) from .annotation_converter_practic import AnnotationConverterPractic diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py index 0178a8dd3..f85f84527 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/annotation_converter_practic.py @@ -1,13 +1,8 @@ from typing import List from ..models import bd_annotation_model_practic -from ..models.bd_annotation_model import ( - AnnotationLink, - BadgerdocAnnotation, - Obj, - Page, - Size, -) +from ..models.bd_annotation_model import (AnnotationLink, BadgerdocAnnotation, + Obj, Page, Size) from ..models.bd_tokens_model import Page as BadgerdocTokensPage FIRST_PAGE = 0 diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/badgerdoc_format.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/badgerdoc_format.py index bc03e4d5e..43778c2aa 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/badgerdoc_format.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/badgerdoc_format.py @@ -1,13 +1,9 @@ from pathlib import Path from typing import Optional -from ...config import ( - DEFAULT_PAGE_BORDER_OFFSET, - DEFAULT_PDF_FONT_HEIGHT, - DEFAULT_PDF_FONT_WIDTH, - DEFAULT_PDF_LINE_SPACING, - DEFAULT_PDF_PAGE_WIDTH, -) +from ...config import (DEFAULT_PAGE_BORDER_OFFSET, DEFAULT_PDF_FONT_HEIGHT, + DEFAULT_PDF_FONT_WIDTH, DEFAULT_PDF_LINE_SPACING, + DEFAULT_PDF_PAGE_WIDTH) from ..models.bd_annotation_model_practic import BadgerdocAnnotation from ..models.bd_tokens_model import Page from ..models.label_studio_models import LabelStudioModel diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py index 14c72b9c7..681535316 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_format/plain_text_converter.py @@ -2,13 +2,9 @@ import string from typing import Deque, List -from ...config import ( - DEFAULT_PAGE_BORDER_OFFSET, - DEFAULT_PDF_FONT_HEIGHT, - DEFAULT_PDF_FONT_WIDTH, - DEFAULT_PDF_LINE_SPACING, - DEFAULT_PDF_PAGE_WIDTH, -) +from ...config import (DEFAULT_PAGE_BORDER_OFFSET, DEFAULT_PDF_FONT_HEIGHT, + DEFAULT_PDF_FONT_WIDTH, DEFAULT_PDF_LINE_SPACING, + DEFAULT_PDF_PAGE_WIDTH) from ..models import BadgerdocToken, Offset, Page, PageSize diff --git a/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py b/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py index c956f2162..08f07c955 100644 --- a/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py +++ b/convert/convert/label_studio_to_badgerdoc/badgerdoc_to_label_studio_use_case.py @@ -3,15 +3,14 @@ from typing import NamedTuple from botocore.client import BaseClient -from convert.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter_practic import ( - AnnotationConverterToTheory, -) -from convert.label_studio_to_badgerdoc.labelstudio_format import ( - LabelStudioFormat, -) -from convert.logger import get_logger from tenant_dependency import TenantData +from convert.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter_practic import \ + AnnotationConverterToTheory +from convert.label_studio_to_badgerdoc.labelstudio_format import \ + LabelStudioFormat +from convert.logger import get_logger + from .models import S3Path, bd_annotation_model_practic from .models.bd_annotation_model import BadgerdocAnnotation from .models.bd_manifest_model_practic import Manifest diff --git a/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py b/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py index 11e90cb5b..496728042 100644 --- a/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py +++ b/convert/convert/label_studio_to_badgerdoc/label_studio_to_badgerdoc_use_case.py @@ -7,33 +7,25 @@ import requests from botocore.client import BaseClient from botocore.exceptions import ClientError -from convert.config import DEFAULT_PAGE_BORDER_OFFSET, settings -from convert.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter import ( - AnnotationConverter, -) -from convert.label_studio_to_badgerdoc.badgerdoc_format.badgerdoc_format import ( - BadgerdocFormat, -) -from convert.label_studio_to_badgerdoc.badgerdoc_format.pdf_renderer import ( - PDFRenderer, -) -from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( - TextToBadgerdocTokensConverter, -) -from convert.label_studio_to_badgerdoc.models import ( - BadgerdocToken, - DocumentLink, -) -from convert.label_studio_to_badgerdoc.models.label_studio_models import ( - LabelStudioModel, - S3Path, - ValidationType, -) -from convert.logger import get_logger from fastapi import HTTPException, status from fastapi.encoders import jsonable_encoder from tenant_dependency import TenantData +from convert.config import DEFAULT_PAGE_BORDER_OFFSET, settings +from convert.label_studio_to_badgerdoc.badgerdoc_format.annotation_converter import \ + AnnotationConverter +from convert.label_studio_to_badgerdoc.badgerdoc_format.badgerdoc_format import \ + BadgerdocFormat +from convert.label_studio_to_badgerdoc.badgerdoc_format.pdf_renderer import \ + PDFRenderer +from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import \ + TextToBadgerdocTokensConverter +from convert.label_studio_to_badgerdoc.models import (BadgerdocToken, + DocumentLink) +from convert.label_studio_to_badgerdoc.models.label_studio_models import ( + LabelStudioModel, S3Path, ValidationType) +from convert.logger import get_logger + LOGGER = get_logger(__file__) LOGGER.setLevel("DEBUG") diff --git a/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py b/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py index a7987b42f..1d90f9aaa 100644 --- a/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py +++ b/convert/convert/label_studio_to_badgerdoc/labelstudio_format/label_studio_format.py @@ -2,23 +2,17 @@ from typing import Any, Dict, List, Optional import requests +from fastapi import HTTPException, status + from convert.config import settings from convert.logger import get_logger -from fastapi import HTTPException, status from ..models.bd_annotation_model import AnnotationLink, BadgerdocAnnotation from ..models.bd_manifest_model_practic import Manifest from ..models.bd_tokens_model import BadgerdocToken, Page -from ..models.label_studio_models import ( - Annotation, - Data, - DocumentRelation, - LabelStudioModel, - Meta, - ModelItem, - ResultItem, - Value, -) +from ..models.label_studio_models import (Annotation, Data, DocumentRelation, + LabelStudioModel, Meta, ModelItem, + ResultItem, Value) LOGGER = get_logger(__file__) LOGGER.setLevel("DEBUG") diff --git a/convert/convert/label_studio_to_badgerdoc/models/__init__.py b/convert/convert/label_studio_to_badgerdoc/models/__init__.py index 01d1e8f04..648f82ad2 100644 --- a/convert/convert/label_studio_to_badgerdoc/models/__init__.py +++ b/convert/convert/label_studio_to_badgerdoc/models/__init__.py @@ -2,12 +2,6 @@ from .bd_annotation_model_practic import DocumentLink from .bd_tokens_model import BadgerdocToken, Offset, Page, PageSize from .common import S3Path -from .label_studio_models import ( - Annotation, - BadgerdocToLabelStudioRequest, - LabelStudioModel, - LabelStudioRequest, - ModelItem, - Prediction, - ResultItem, -) +from .label_studio_models import (Annotation, BadgerdocToLabelStudioRequest, + LabelStudioModel, LabelStudioRequest, + ModelItem, Prediction, ResultItem) diff --git a/convert/convert/label_studio_to_badgerdoc/text_to_badgerdoc_use_case.py b/convert/convert/label_studio_to_badgerdoc/text_to_badgerdoc_use_case.py index 54d130221..97c8629c3 100644 --- a/convert/convert/label_studio_to_badgerdoc/text_to_badgerdoc_use_case.py +++ b/convert/convert/label_studio_to_badgerdoc/text_to_badgerdoc_use_case.py @@ -4,9 +4,8 @@ from ..config import DEFAULT_PAGE_BORDER_OFFSET from .badgerdoc_format.badgerdoc_format import BadgerdocFormat from .badgerdoc_format.pdf_renderer import PDFRenderer -from .badgerdoc_format.plain_text_converter import ( - TextToBadgerdocTokensConverter, -) +from .badgerdoc_format.plain_text_converter import \ + TextToBadgerdocTokensConverter from .models.common import S3Path diff --git a/convert/convert/main.py b/convert/convert/main.py index 281d03761..83b3403b5 100644 --- a/convert/convert/main.py +++ b/convert/convert/main.py @@ -2,7 +2,7 @@ from convert.config import API_NAME, API_VERSION, settings from convert.logger import get_logger -from convert.routers import coco, text, label_studio +from convert.routers import coco, label_studio, text LOGGER = get_logger(__file__) diff --git a/convert/convert/routers/coco.py b/convert/convert/routers/coco.py index 200ae48df..c440091c3 100644 --- a/convert/convert/routers/coco.py +++ b/convert/convert/routers/coco.py @@ -2,21 +2,20 @@ from urllib.parse import urlparse import requests +from fastapi import APIRouter, BackgroundTasks, Depends, Header, status +from fastapi.responses import Response, StreamingResponse +from requests import HTTPError +from tenant_dependency import TenantData, get_tenant_info + from convert.coco_export.convert import ConvertToCoco, ExportBadgerdoc -from convert.coco_export.export_service import ( - export_run, - export_run_and_return_url, -) +from convert.coco_export.export_service import (export_run, + export_run_and_return_url) from convert.coco_import.convert import ConvertToBadgerdoc from convert.coco_import.import_job import create_import_job from convert.config import minio_client, settings from convert.logger import get_logger from convert.models import coco from convert.utils.s3_utils import get_bucket_path -from fastapi import APIRouter, BackgroundTasks, Depends, Header, status -from fastapi.responses import Response, StreamingResponse -from requests import HTTPError -from tenant_dependency import TenantData, get_tenant_info router = APIRouter(prefix="/coco", tags=["coco"]) LOGGER = get_logger(__file__) diff --git a/convert/convert/routers/label_studio.py b/convert/convert/routers/label_studio.py index 57628ceba..fa240dcee 100644 --- a/convert/convert/routers/label_studio.py +++ b/convert/convert/routers/label_studio.py @@ -1,19 +1,17 @@ from typing import Optional -from convert.config import minio_client, settings -from convert.label_studio_to_badgerdoc.badgerdoc_to_label_studio_use_case import ( - BDToLabelStudioConvertUseCase, -) -from convert.label_studio_to_badgerdoc.label_studio_to_badgerdoc_use_case import ( - LabelStudioToBDConvertUseCase, -) -from convert.label_studio_to_badgerdoc.models import LabelStudioRequest -from convert.label_studio_to_badgerdoc.models.label_studio_models import ( - BadgerdocToLabelStudioRequest, -) from fastapi import APIRouter, Depends, Header, status from tenant_dependency import TenantData, get_tenant_info +from convert.config import minio_client, settings +from convert.label_studio_to_badgerdoc.badgerdoc_to_label_studio_use_case import \ + BDToLabelStudioConvertUseCase +from convert.label_studio_to_badgerdoc.label_studio_to_badgerdoc_use_case import \ + LabelStudioToBDConvertUseCase +from convert.label_studio_to_badgerdoc.models import LabelStudioRequest +from convert.label_studio_to_badgerdoc.models.label_studio_models import \ + BadgerdocToLabelStudioRequest + router = APIRouter(prefix="/label_studio", tags=["label_studio"]) tenant = get_tenant_info( url=settings.keycloak_url, algorithm="RS256", debug=True diff --git a/convert/convert/routers/text.py b/convert/convert/routers/text.py index fd0b413e4..9f12863f9 100644 --- a/convert/convert/routers/text.py +++ b/convert/convert/routers/text.py @@ -1,9 +1,9 @@ +from fastapi import APIRouter, status + from convert.config import minio_client from convert.label_studio_to_badgerdoc.models.text_model import TextRequest -from convert.label_studio_to_badgerdoc.text_to_badgerdoc_use_case import ( - TextToBDConvertUseCase, -) -from fastapi import APIRouter, status +from convert.label_studio_to_badgerdoc.text_to_badgerdoc_use_case import \ + TextToBDConvertUseCase router = APIRouter(prefix="/text", tags=["text"]) diff --git a/convert/convert/utils/render_pdf_page.py b/convert/convert/utils/render_pdf_page.py index 4e653cb89..d17d8101f 100644 --- a/convert/convert/utils/render_pdf_page.py +++ b/convert/convert/utils/render_pdf_page.py @@ -3,6 +3,7 @@ from zipfile import ZipFile import pdfplumber + from convert.config import settings from convert.logger import get_logger from convert.utils.common_utils import add_to_zip_and_local_remove diff --git a/convert/convert/utils/s3_utils.py b/convert/convert/utils/s3_utils.py index 12a119b05..16efd9755 100644 --- a/convert/convert/utils/s3_utils.py +++ b/convert/convert/utils/s3_utils.py @@ -3,16 +3,14 @@ import boto3 import urllib3 +from fastapi import HTTPException, status + from convert.config import settings -from convert.exceptions import ( - BucketError, - FileKeyError, - UploadLimitExceedError, -) +from convert.exceptions import (BucketError, FileKeyError, + UploadLimitExceedError) from convert.logger import get_logger from convert.models import coco from convert.utils.common_utils import check_uploading_limit -from fastapi import HTTPException, status logger = get_logger(__name__) diff --git a/convert/tests/test_label_studio/test_export.py b/convert/tests/test_label_studio/test_export.py index e3a30b379..01ca384bb 100644 --- a/convert/tests/test_label_studio/test_export.py +++ b/convert/tests/test_label_studio/test_export.py @@ -1,19 +1,14 @@ from pathlib import Path -from convert.label_studio_to_badgerdoc.badgerdoc_format import ( - annotation_converter_practic, -) -from convert.label_studio_to_badgerdoc.labelstudio_format.label_studio_format import ( - LabelStudioFormat, -) +from convert.label_studio_to_badgerdoc.badgerdoc_format import \ + annotation_converter_practic +from convert.label_studio_to_badgerdoc.labelstudio_format.label_studio_format import \ + LabelStudioFormat from convert.label_studio_to_badgerdoc.models import ( - bd_annotation_model_practic, - bd_manifest_model_practic, -) + bd_annotation_model_practic, bd_manifest_model_practic) from convert.label_studio_to_badgerdoc.models.bd_tokens_model import Page -from convert.label_studio_to_badgerdoc.models.label_studio_models import ( - LabelStudioModel, -) +from convert.label_studio_to_badgerdoc.models.label_studio_models import \ + LabelStudioModel TEST_FILES_DIR = Path(__file__).parent / "test_data" diff --git a/convert/tests/test_label_studio/test_import.py b/convert/tests/test_label_studio/test_import.py index 048bbe6e0..c1a53782c 100644 --- a/convert/tests/test_label_studio/test_import.py +++ b/convert/tests/test_label_studio/test_import.py @@ -2,22 +2,15 @@ from pathlib import Path from tempfile import TemporaryDirectory -from convert.config import ( - DEFAULT_PAGE_BORDER_OFFSET, - DEFAULT_PDF_FONT_HEIGHT, - DEFAULT_PDF_FONT_WIDTH, - DEFAULT_PDF_LINE_SPACING, - DEFAULT_PDF_PAGE_WIDTH, -) -from convert.label_studio_to_badgerdoc.badgerdoc_format.badgerdoc_format import ( - BadgerdocFormat, -) -from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( - TextToBadgerdocTokensConverter, -) -from convert.label_studio_to_badgerdoc.models.label_studio_models import ( - LabelStudioModel, -) +from convert.config import (DEFAULT_PAGE_BORDER_OFFSET, + DEFAULT_PDF_FONT_HEIGHT, DEFAULT_PDF_FONT_WIDTH, + DEFAULT_PDF_LINE_SPACING, DEFAULT_PDF_PAGE_WIDTH) +from convert.label_studio_to_badgerdoc.badgerdoc_format.badgerdoc_format import \ + BadgerdocFormat +from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import \ + TextToBadgerdocTokensConverter +from convert.label_studio_to_badgerdoc.models.label_studio_models import \ + LabelStudioModel TEST_FILES_DIR = Path(__file__).parent / "test_data" diff --git a/convert/tests/test_label_studio/test_text_wrapper.py b/convert/tests/test_label_studio/test_text_wrapper.py index aef00398f..27b3f3d3f 100644 --- a/convert/tests/test_label_studio/test_text_wrapper.py +++ b/convert/tests/test_label_studio/test_text_wrapper.py @@ -1,9 +1,8 @@ import collections import string -from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import ( # noqa: E501 - TextWrapper, -) +from convert.label_studio_to_badgerdoc.badgerdoc_format.plain_text_converter import \ + TextWrapper # noqa: E501 def test_pop_beginning_whitespaces_text_begin_with_whitespaces(): diff --git a/dev_runner/conf/shared.env b/dev_runner/conf/shared.env index 1a0e58726..1d420eba2 100644 --- a/dev_runner/conf/shared.env +++ b/dev_runner/conf/shared.env @@ -85,7 +85,7 @@ GOTENBERG_LIBRE_OFFICE_ENDPOINT="http://${GOTENBERG}/forms/libreoffice/convert" GOTENBERG_FORMATS=[".txt",".docx",".doc",".bib",".xml",".fodt",".html",".ltx",".odt",".ott",".pdb",".psw",".rtf",".sdw",".stw",".sxw",".uot",".vor",".wps",".epub",".emf",".fodg",".met",".odd",".otg",".std",".svg",".svm",".swf",".sxd",".sxw",".tiff",".xhtml",".xpm",".fodp",".potm",".pot",".pptx",".pps",".ppt",".pwp",".sda",".sdd",".sti",".sxi",".uop",".wmf",".odp"] IMAGE_FORMATS=[".png",".bmp",".pbm",".pct",".pgm",".ppm",".ras",".tiff"] -ROOT_PATH= +ROOT_PATH=/ LOG_LEVEL=DEBUG ES_HOST_TEST=localhost diff --git a/dev_runner/dev_runner/conf.py b/dev_runner/dev_runner/conf.py index 7db17fc22..c7050ffda 100644 --- a/dev_runner/dev_runner/conf.py +++ b/dev_runner/dev_runner/conf.py @@ -1,6 +1,6 @@ import os -from pydantic import BaseSettings +from pydantic import BaseSettings BASE_PORT = os.environ.get("BD_BASE_PORT", 8000) diff --git a/dev_runner/dev_runner/runners/jobs_runner.py b/dev_runner/dev_runner/runners/jobs_runner.py index d1a845cfd..a61d3b652 100644 --- a/dev_runner/dev_runner/runners/jobs_runner.py +++ b/dev_runner/dev_runner/runners/jobs_runner.py @@ -1,6 +1,7 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class JobsRunner(BaseRunner): PACKAGE_NAME = "jobs" diff --git a/dev_runner/dev_runner/runners/models_runner.py b/dev_runner/dev_runner/runners/models_runner.py index 9ba6ca029..004c9a886 100644 --- a/dev_runner/dev_runner/runners/models_runner.py +++ b/dev_runner/dev_runner/runners/models_runner.py @@ -1,6 +1,7 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class ModelsRunner(BaseRunner): PACKAGE_NAME = "models" diff --git a/dev_runner/dev_runner/runners/pipelines_runner.py b/dev_runner/dev_runner/runners/pipelines_runner.py index 73a29a32d..caff6c5aa 100644 --- a/dev_runner/dev_runner/runners/pipelines_runner.py +++ b/dev_runner/dev_runner/runners/pipelines_runner.py @@ -1,6 +1,7 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class PipelinesRunner(BaseRunner): PACKAGE_NAME = "pipelines" diff --git a/dev_runner/dev_runner/runners/processing_runner.py b/dev_runner/dev_runner/runners/processing_runner.py index 498f6432b..669ed4ce7 100644 --- a/dev_runner/dev_runner/runners/processing_runner.py +++ b/dev_runner/dev_runner/runners/processing_runner.py @@ -1,7 +1,9 @@ -from .base_runner import BaseRunner -from dev_runner.conf import settings import logging +from dev_runner.conf import settings + +from .base_runner import BaseRunner + class ProcessingRunner(BaseRunner): PACKAGE_NAME = "processing" diff --git a/dev_runner/dev_runner/runners/search_runner.py b/dev_runner/dev_runner/runners/search_runner.py index adabe94ca..75dbc336c 100644 --- a/dev_runner/dev_runner/runners/search_runner.py +++ b/dev_runner/dev_runner/runners/search_runner.py @@ -1,6 +1,7 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class SearchRunner(BaseRunner): PACKAGE_NAME = "search" diff --git a/dev_runner/dev_runner/runners/taxonomy_runner.py b/dev_runner/dev_runner/runners/taxonomy_runner.py index 2de80283a..c73f2f011 100644 --- a/dev_runner/dev_runner/runners/taxonomy_runner.py +++ b/dev_runner/dev_runner/runners/taxonomy_runner.py @@ -1,6 +1,7 @@ -from .base_runner import BaseRunner from dev_runner.conf import settings +from .base_runner import BaseRunner + class TaxonomyRunner(BaseRunner): PACKAGE_NAME = "taxonomy" diff --git a/dev_runner/goten.env b/dev_runner/goten.env index 70bc115b2..10d351790 100644 --- a/dev_runner/goten.env +++ b/dev_runner/goten.env @@ -16,7 +16,7 @@ DATABASE_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POST S3_PREFIX= S3_ENDPOINT=minio:9000 S3_ACCESS_KEY=minioadmin -S3_SECRET_KEY =minioadmin +S3_SECRET_KEY=minioadmin TEST_REGION=us-west-2 MINIO_SECURE_CONNECTION=False diff --git a/dev_runner/migration.sh b/dev_runner/migration.sh index f682da2c4..2d279fbc6 100755 --- a/dev_runner/migration.sh +++ b/dev_runner/migration.sh @@ -1,6 +1,8 @@ SHARED_PATH=$(realpath "./conf/shared.env") ASSETS_PATH="./conf/assets.env" +set -e + for service in "assets" "annotation" "jobs" "models" "pipelines" "processing" "scheduler" "taxonomy" do diff --git a/dev_runner/start.py b/dev_runner/start.py index f4e9d0ab8..5bfe67fed 100644 --- a/dev_runner/start.py +++ b/dev_runner/start.py @@ -2,6 +2,8 @@ from pathlib import Path import click +from dotenv import load_dotenv + from dev_runner.runners.annotation_runner import AnnotationRunner from dev_runner.runners.assets_runner import AssetsRunner from dev_runner.runners.base_runner import RunnerRegistry @@ -14,7 +16,6 @@ from dev_runner.runners.search_runner import SearchRunner from dev_runner.runners.taxonomy_runner import TaxonomyRunner from dev_runner.runners.users_runner import UsersRunner -from dotenv import load_dotenv ROOT_DIR = Path(__file__).parent SHARED_DOT_ENV = ROOT_DIR / "conf" / "shared.env" diff --git a/jobs/alembic/env.py b/jobs/alembic/env.py index adc3a929f..461221e00 100644 --- a/jobs/alembic/env.py +++ b/jobs/alembic/env.py @@ -2,10 +2,10 @@ import os from logging.config import fileConfig -from jobs.utils import get_test_db_url +from alembic import context from sqlalchemy import engine_from_config, pool -from alembic import context +from jobs.utils import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/jobs/alembic/versions/13ac4bb3abd2_.py b/jobs/alembic/versions/13ac4bb3abd2_.py index 8db2dcf3d..557d9c2a5 100644 --- a/jobs/alembic/versions/13ac4bb3abd2_.py +++ b/jobs/alembic/versions/13ac4bb3abd2_.py @@ -7,9 +7,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "13ac4bb3abd2" diff --git a/jobs/alembic/versions/3f5b2d199d38_.py b/jobs/alembic/versions/3f5b2d199d38_.py index 8b1872cd8..e7d0884d9 100644 --- a/jobs/alembic/versions/3f5b2d199d38_.py +++ b/jobs/alembic/versions/3f5b2d199d38_.py @@ -7,7 +7,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/jobs/alembic/versions/7511c6790067_.py b/jobs/alembic/versions/7511c6790067_.py index 2857a099a..d17007933 100644 --- a/jobs/alembic/versions/7511c6790067_.py +++ b/jobs/alembic/versions/7511c6790067_.py @@ -7,9 +7,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "7511c6790067" diff --git a/jobs/alembic/versions/83694c0b2df6_.py b/jobs/alembic/versions/83694c0b2df6_.py index 64f769bc2..1398e1840 100644 --- a/jobs/alembic/versions/83694c0b2df6_.py +++ b/jobs/alembic/versions/83694c0b2df6_.py @@ -7,7 +7,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/jobs/alembic/versions/86f432539475_.py b/jobs/alembic/versions/86f432539475_.py index ec341ec58..18e95a46d 100644 --- a/jobs/alembic/versions/86f432539475_.py +++ b/jobs/alembic/versions/86f432539475_.py @@ -7,7 +7,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/jobs/alembic/versions/9229e70d2791_.py b/jobs/alembic/versions/9229e70d2791_.py index 19b0c9c87..d9223fd51 100644 --- a/jobs/alembic/versions/9229e70d2791_.py +++ b/jobs/alembic/versions/9229e70d2791_.py @@ -7,9 +7,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "9229e70d2791" diff --git a/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py b/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py index fd8323931..80f51df0a 100644 --- a/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py +++ b/jobs/alembic/versions/b4afb5ae8923_add_start_manual_job_automatically_flag.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/jobs/alembic/versions/d1ddce2d5352_.py b/jobs/alembic/versions/d1ddce2d5352_.py index 1e1ea086b..2b2c28312 100644 --- a/jobs/alembic/versions/d1ddce2d5352_.py +++ b/jobs/alembic/versions/d1ddce2d5352_.py @@ -7,7 +7,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py b/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py index 2ae6058a5..2dab1ada2 100644 --- a/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py +++ b/jobs/alembic/versions/f60dd492b17f_add_extensive_coverage_param.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/jobs/jobs/create_job_funcs.py b/jobs/jobs/create_job_funcs.py index 25550c1a4..da528d783 100644 --- a/jobs/jobs/create_job_funcs.py +++ b/jobs/jobs/create_job_funcs.py @@ -1,13 +1,14 @@ import itertools from typing import Any, Dict, List, Tuple +from fastapi import Depends, HTTPException, status +from sqlalchemy.orm import Session + import jobs.db_service as db_service import jobs.models as dbm import jobs.schemas as schemas import jobs.utils as utils -from fastapi import Depends, HTTPException, status from jobs.schemas import ExtractionJobParams -from sqlalchemy.orm import Session async def get_all_datasets_and_files_data( diff --git a/jobs/jobs/db_service.py b/jobs/jobs/db_service.py index 7955eac2e..7d9b2f3e8 100644 --- a/jobs/jobs/db_service.py +++ b/jobs/jobs/db_service.py @@ -1,11 +1,12 @@ from datetime import datetime from typing import Any, Dict, Generator, List, Union +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker + import jobs.config as config import jobs.models as dbm import jobs.schemas as schemas -from sqlalchemy import create_engine -from sqlalchemy.orm import Session, sessionmaker engine = create_engine(config.POSTGRESQL_JOBMANAGER_DATABASE_URI) LocalSession = sessionmaker(autocommit=False, autoflush=False, bind=engine) diff --git a/jobs/jobs/main.py b/jobs/jobs/main.py index 1840355f0..57a8939f7 100644 --- a/jobs/jobs/main.py +++ b/jobs/jobs/main.py @@ -1,18 +1,19 @@ import asyncio from typing import Any, Dict, List, Optional, Union +from fastapi import Depends, FastAPI, Header, HTTPException, status +from filter_lib import Page, form_query, map_request_to_filter, paginate +from sqlalchemy.orm import Session +from sqlalchemy_filters.exceptions import BadFilterFormat +from tenant_dependency import TenantData, get_tenant_info + import jobs.create_job_funcs as create_job_funcs import jobs.db_service as db_service import jobs.models as dbm import jobs.run_job_funcs as run_job_funcs import jobs.schemas as schemas import jobs.utils as utils -from fastapi import Depends, FastAPI, Header, HTTPException, status -from filter_lib import Page, form_query, map_request_to_filter, paginate from jobs.config import KEYCLOAK_HOST, ROOT_PATH, API_current_version -from sqlalchemy.orm import Session -from sqlalchemy_filters.exceptions import BadFilterFormat -from tenant_dependency import TenantData, get_tenant_info tenant = get_tenant_info(url=KEYCLOAK_HOST, algorithm="RS256", debug=True) app = FastAPI( diff --git a/jobs/jobs/utils.py b/jobs/jobs/utils.py index 358d26687..522725c0f 100644 --- a/jobs/jobs/utils.py +++ b/jobs/jobs/utils.py @@ -2,26 +2,17 @@ import aiohttp.client_exceptions import fastapi.encoders +from sqlalchemy.orm import Session + from jobs import db_service -from jobs.config import ( - HOST_ANNOTATION, - HOST_ASSETS, - HOST_PIPELINES, - HOST_TAXONOMY, - JOBS_HOST, - PAGINATION_THRESHOLD, - ROOT_PATH, -) +from jobs.config import (HOST_ANNOTATION, HOST_ASSETS, HOST_PIPELINES, + HOST_TAXONOMY, JOBS_HOST, PAGINATION_THRESHOLD, + ROOT_PATH) from jobs.logger import logger from jobs.models import CombinedJob -from jobs.schemas import ( - AnnotationJobUpdateParamsInAnnotation, - CategoryLinkInput, - CategoryLinkParams, - JobMode, - JobParamsToChange, -) -from sqlalchemy.orm import Session +from jobs.schemas import (AnnotationJobUpdateParamsInAnnotation, + CategoryLinkInput, CategoryLinkParams, JobMode, + JobParamsToChange) async def get_files_data_from_datasets( diff --git a/jobs/tests/conftest.py b/jobs/tests/conftest.py index 667f04a4d..fc3ae807f 100644 --- a/jobs/tests/conftest.py +++ b/jobs/tests/conftest.py @@ -4,24 +4,21 @@ from typing import List from unittest.mock import patch -import jobs.db_service as service -import jobs.main as main -import jobs.schemas as schemas import pytest +from alembic import command +from alembic.config import Config from fastapi.testclient import TestClient -from jobs.utils import get_test_db_url from pydantic import BaseModel from sqlalchemy import create_engine # type: ignore from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import sessionmaker # type: ignore -from sqlalchemy_utils import ( # type: ignore - create_database, - database_exists, - drop_database, -) +from sqlalchemy_utils import (create_database, database_exists, # type: ignore + drop_database) -from alembic import command -from alembic.config import Config +import jobs.db_service as service +import jobs.main as main +import jobs.schemas as schemas +from jobs.utils import get_test_db_url main_database_url = os.environ.get("POSTGRESQL_JOBMANAGER_DATABASE_URI") test_db_url = get_test_db_url(main_database_url) diff --git a/jobs/tests/test_API_functions/test_change_job.py b/jobs/tests/test_API_functions/test_change_job.py index dff718b0c..ef423d1b2 100644 --- a/jobs/tests/test_API_functions/test_change_job.py +++ b/jobs/tests/test_API_functions/test_change_job.py @@ -1,12 +1,11 @@ import asyncio -import pytest from unittest.mock import patch +import pytest +from tests.test_db import (create_mock_annotation_job_in_db, + create_mock_extraction_job_in_db) + import jobs.schemas as schemas -from tests.test_db import ( - create_mock_annotation_job_in_db, - create_mock_extraction_job_in_db, -) def test_change_job_status_with_validation_correct_jwt_provided( diff --git a/jobs/tests/test_API_functions/test_create_job.py b/jobs/tests/test_API_functions/test_create_job.py index 31a91435e..47b4d2bbc 100644 --- a/jobs/tests/test_API_functions/test_create_job.py +++ b/jobs/tests/test_API_functions/test_create_job.py @@ -4,9 +4,10 @@ import aiohttp.client_exceptions import freezegun +import pytest + import jobs.create_job_funcs as create_job_funcs import jobs.schemas as schemas -import pytest # ----------- Create Job Drafts ------------- # diff --git a/jobs/tests/test_API_functions/test_other_API_functions.py b/jobs/tests/test_API_functions/test_other_API_functions.py index 8447713ed..1f6471ece 100644 --- a/jobs/tests/test_API_functions/test_other_API_functions.py +++ b/jobs/tests/test_API_functions/test_other_API_functions.py @@ -1,10 +1,10 @@ import asyncio from unittest.mock import patch + +from tests.test_db import (create_mock_annotation_job_in_db, + create_mock_extraction_job_in_db) + import jobs.schemas as schemas -from tests.test_db import ( - create_mock_annotation_job_in_db, - create_mock_extraction_job_in_db, -) def test_get_all_jobs_endpoint( diff --git a/jobs/tests/test_API_functions/test_search_jobs.py b/jobs/tests/test_API_functions/test_search_jobs.py index e53234843..84695aecb 100644 --- a/jobs/tests/test_API_functions/test_search_jobs.py +++ b/jobs/tests/test_API_functions/test_search_jobs.py @@ -1,7 +1,5 @@ -from tests.test_db import ( - create_mock_annotation_job_in_db, - create_mock_extraction_job_in_db, -) +from tests.test_db import (create_mock_annotation_job_in_db, + create_mock_extraction_job_in_db) def test_search_job_positive(testing_app, testing_session): diff --git a/jobs/tests/test_utils.py b/jobs/tests/test_utils.py index ec68c45a2..641f21caa 100644 --- a/jobs/tests/test_utils.py +++ b/jobs/tests/test_utils.py @@ -1,10 +1,11 @@ from unittest.mock import patch import aiohttp.client_exceptions -import jobs.utils as utils import pytest from fastapi import HTTPException +import jobs.utils as utils + # --------------TEST get_files_data_from_datasets--------------- diff --git a/lib/filter_lib/tests/test_dict_parser.py b/lib/filter_lib/tests/test_dict_parser.py index 5228d14b8..29173a00e 100644 --- a/lib/filter_lib/tests/test_dict_parser.py +++ b/lib/filter_lib/tests/test_dict_parser.py @@ -1,6 +1,5 @@ from ..src.dict_parser import map_request_to_filter - example_1 = { "pagination": {"page_num": 1, "page_size": 50}, "filters": [ diff --git a/lib/filter_lib/tests/test_enum_generator.py b/lib/filter_lib/tests/test_enum_generator.py index a064f6414..c8985c624 100644 --- a/lib/filter_lib/tests/test_enum_generator.py +++ b/lib/filter_lib/tests/test_enum_generator.py @@ -1,10 +1,6 @@ -from .conftest import User, Address -from ..src.enum_generator import ( - _get_model_fields, - _exclude_fields, - _get_table_name, - _create_enum_model, -) +from ..src.enum_generator import (_create_enum_model, _exclude_fields, + _get_model_fields, _get_table_name) +from .conftest import Address, User def test_get_model_fields(): diff --git a/lib/filter_lib/tests/test_pagination.py b/lib/filter_lib/tests/test_pagination.py index 638faf66b..4d535fe5c 100644 --- a/lib/filter_lib/tests/test_pagination.py +++ b/lib/filter_lib/tests/test_pagination.py @@ -1,12 +1,9 @@ -from ..src.pagination import ( - paginate, - PaginationParams, - make_pagination, - _calculate_num_pages, -) -from .conftest import User -from pydantic import ValidationError import pytest +from pydantic import ValidationError + +from ..src.pagination import (PaginationParams, _calculate_num_pages, + make_pagination, paginate) +from .conftest import User @pytest.mark.parametrize( diff --git a/lib/filter_lib/tests/test_query_modifier.py b/lib/filter_lib/tests/test_query_modifier.py index 2a6f36b75..182e4a683 100644 --- a/lib/filter_lib/tests/test_query_modifier.py +++ b/lib/filter_lib/tests/test_query_modifier.py @@ -1,12 +1,7 @@ from ..src.enum_generator import get_enum_from_orm -from ..src.query_modificator import ( - _create_filter, - _create_or_condition, - _get_column, - _get_entity, - _op_is_not, - form_query, -) +from ..src.query_modificator import (_create_filter, _create_or_condition, + _get_column, _get_entity, _op_is_not, + form_query) from .conftest import Address, Category, User diff --git a/lib/filter_lib/tests/test_schema_generator.py b/lib/filter_lib/tests/test_schema_generator.py index ce842929a..a7310cc0c 100644 --- a/lib/filter_lib/tests/test_schema_generator.py +++ b/lib/filter_lib/tests/test_schema_generator.py @@ -1,9 +1,5 @@ -from .conftest import User, Address -from ..src.schema_generator import ( - create_filter_model, - Page, - PaginationOut, -) +from ..src.schema_generator import Page, PaginationOut, create_filter_model +from .conftest import Address, User def test_search_class_creating(): diff --git a/lib/filter_lib/usage_example/app.py b/lib/filter_lib/usage_example/app.py index e3f18d07e..05e3643ae 100644 --- a/lib/filter_lib/usage_example/app.py +++ b/lib/filter_lib/usage_example/app.py @@ -2,13 +2,8 @@ from db_example import Address, User, get_db from fastapi import Depends, FastAPI -from filter_lib import ( # type: ignore - Page, - create_filter_model, - form_query, - map_request_to_filter, - paginate, -) +from filter_lib import (Page, create_filter_model, form_query, # type: ignore + map_request_to_filter, paginate) from pydantic import BaseModel from sqlalchemy.orm import Session diff --git a/lib/tenants/tests/conftest.py b/lib/tenants/tests/conftest.py index 4132b7027..69df9f771 100644 --- a/lib/tenants/tests/conftest.py +++ b/lib/tenants/tests/conftest.py @@ -4,10 +4,9 @@ import pytest from fastapi import Depends, FastAPI from fastapi.testclient import TestClient -from usage_example.jwt_generator import create_access_token - from src import TenantData from src.dependency import get_tenant_info +from usage_example.jwt_generator import create_access_token SECRET_KEY = "test_secret_key" diff --git a/lib/tenants/tests/test_dependency_hs256.py b/lib/tenants/tests/test_dependency_hs256.py index 8b71bc0b5..ef441cad5 100644 --- a/lib/tenants/tests/test_dependency_hs256.py +++ b/lib/tenants/tests/test_dependency_hs256.py @@ -1,8 +1,5 @@ -from src.dependency import ( - TenantDependencyBase, - TenantDependencyDocs, - get_tenant_info, -) +from src.dependency import (TenantDependencyBase, TenantDependencyDocs, + get_tenant_info) CURRENT_TENANT = "tenant1" diff --git a/lib/tenants/tests/test_dependency_rs256.py b/lib/tenants/tests/test_dependency_rs256.py index b7f8280dc..4e0198e05 100644 --- a/lib/tenants/tests/test_dependency_rs256.py +++ b/lib/tenants/tests/test_dependency_rs256.py @@ -1,8 +1,5 @@ -from src.dependency import ( - TenantDependencyBase, - TenantDependencyDocs, - get_tenant_info, -) +from src.dependency import (TenantDependencyBase, TenantDependencyDocs, + get_tenant_info) CURRENT_TENANT = "tenant1" diff --git a/lib/tenants/tests/test_schema.py b/lib/tenants/tests/test_schema.py index c79257cec..d2e89148d 100644 --- a/lib/tenants/tests/test_schema.py +++ b/lib/tenants/tests/test_schema.py @@ -1,6 +1,5 @@ import pytest from pydantic import ValidationError - from src.schema import SupportedAlgorithms, TenantData diff --git a/models/alembic/env.py b/models/alembic/env.py index 040ba1808..c3d0384de 100644 --- a/models/alembic/env.py +++ b/models/alembic/env.py @@ -1,12 +1,12 @@ import os from logging.config import fileConfig +from alembic import context +from sqlalchemy import engine_from_config, pool + from models.constants import DATABASE_URL from models.db import Base from models.utils import get_test_db_url -from sqlalchemy import engine_from_config, pool - -from alembic import context # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py b/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py index a3e04f982..92751944d 100644 --- a/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py +++ b/models/alembic/versions/0c3e4fd362de_add_description_field_to_model.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py b/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py index 79a891592..79146f5aa 100644 --- a/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py +++ b/models/alembic/versions/5c3092bc3517_add_columns_to_basement.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py b/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py index f4f40b469..26d9c8b94 100644 --- a/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py +++ b/models/alembic/versions/61787083221a_added_archive_field_to_training_model.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/models/alembic/versions/683f401ed33e_create_tables.py b/models/alembic/versions/683f401ed33e_create_tables.py index a1829eb95..dba89c8e7 100644 --- a/models/alembic/versions/683f401ed33e_create_tables.py +++ b/models/alembic/versions/683f401ed33e_create_tables.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "683f401ed33e" diff --git a/models/alembic/versions/6dc508050bca_add_annotation_dataset_to_training.py b/models/alembic/versions/6dc508050bca_add_annotation_dataset_to_training.py index 9907bd9df..f2717a143 100644 --- a/models/alembic/versions/6dc508050bca_add_annotation_dataset_to_training.py +++ b/models/alembic/versions/6dc508050bca_add_annotation_dataset_to_training.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/models/alembic/versions/826680104247_pod_limits_column.py b/models/alembic/versions/826680104247_pod_limits_column.py index 770f778d9..6050b8d29 100644 --- a/models/alembic/versions/826680104247_pod_limits_column.py +++ b/models/alembic/versions/826680104247_pod_limits_column.py @@ -8,9 +8,8 @@ from json import dumps import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "826680104247" diff --git a/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py b/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py index 3f83181a2..9ec0895b8 100644 --- a/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py +++ b/models/alembic/versions/abeff4c79fd3_modify_basement_and_training.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "abeff4c79fd3" diff --git a/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py b/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py index f6d970c46..3bef41178 100644 --- a/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py +++ b/models/alembic/versions/b4c5225515f1_add_latest_and_version_columns_to_model.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py b/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py index a826bbd03..d3025066f 100644 --- a/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py +++ b/models/alembic/versions/c769f68f00d4_add_field_type_to_table_model.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/models/models/colab_ssh_utils.py b/models/models/colab_ssh_utils.py index 587df88bf..55c40d843 100644 --- a/models/models/colab_ssh_utils.py +++ b/models/models/colab_ssh_utils.py @@ -5,11 +5,12 @@ from typing import BinaryIO, Iterator, Union from botocore.response import StreamingBody +from paramiko import AutoAddPolicy, SSHClient +from paramiko.ssh_exception import SSHException + from models.constants import MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY from models.errors import ColabFileUploadError from models.schemas import TrainingCredentials -from paramiko import AutoAddPolicy, SSHClient -from paramiko.ssh_exception import SSHException LOGGER = logging.getLogger(name="models") COLAB_TRAINING_DIRECTORY = "/content/training/" diff --git a/models/models/crud.py b/models/models/crud.py index 6b00da2a0..5dc1ea0bb 100644 --- a/models/models/crud.py +++ b/models/models/crud.py @@ -1,15 +1,12 @@ from typing import Dict, Optional, Tuple, Union -from models.db import Basement, Model, Training -from models.schemas import ( - BasementBase, - ModelBase, - TrainingBase, - TrainingUpdate, -) from sqlalchemy import desc from sqlalchemy.orm import Session +from models.db import Basement, Model, Training +from models.schemas import (BasementBase, ModelBase, TrainingBase, + TrainingUpdate) + def is_id_existing( session: Session, diff --git a/models/models/db.py b/models/models/db.py index 854e6b735..a59105b7e 100644 --- a/models/models/db.py +++ b/models/models/db.py @@ -1,17 +1,8 @@ import datetime import enum -from sqlalchemy import ( - VARCHAR, - Boolean, - Column, - DateTime, - Enum, - ForeignKey, - Integer, - String, - create_engine, -) +from sqlalchemy import (VARCHAR, Boolean, Column, DateTime, Enum, ForeignKey, + Integer, String, create_engine) from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION, JSON from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.mutable import MutableDict, MutableList diff --git a/models/models/main.py b/models/models/main.py index 7f2525886..7d7d1230b 100644 --- a/models/models/main.py +++ b/models/models/main.py @@ -9,23 +9,16 @@ from sqlalchemy.exc import SQLAlchemyError from models.constants import API_NAME, API_VERSION, ROOT_PATH -from models.errors import ( - ColabFileUploadError, - NoSuchTenant, - botocore_error_handler, - colab_execution_error_handler, - minio_client_error_handler, - minio_no_such_bucket_error_handler, - sqlalchemy_db_error_handler, - ssh_connection_error_handler, - subprocess_called_error_handler, -) -from models.routers import ( - basements_routers, - deployed_models_routers, - models_routers, - training_routers, -) +from models.errors import (ColabFileUploadError, NoSuchTenant, + botocore_error_handler, + colab_execution_error_handler, + minio_client_error_handler, + minio_no_such_bucket_error_handler, + sqlalchemy_db_error_handler, + ssh_connection_error_handler, + subprocess_called_error_handler) +from models.routers import (basements_routers, deployed_models_routers, + models_routers, training_routers) LOGGER = logging.getLogger(name=API_NAME) LOGGING_FORMAT = "[%(asctime)s] - [%(name)s] - [%(levelname)s] - [%(message)s]" diff --git a/models/models/routers/basements_routers.py b/models/models/routers/basements_routers.py index aa13294f3..6dc9f4202 100644 --- a/models/models/routers/basements_routers.py +++ b/models/models/routers/basements_routers.py @@ -2,24 +2,16 @@ from typing import Any, Dict, Optional, Union from fastapi import APIRouter, Depends, File, Header, HTTPException, UploadFile -from filter_lib import ( - Page, - create_filter_model, - form_query, - map_request_to_filter, - paginate, -) +from filter_lib import (Page, create_filter_model, form_query, + map_request_to_filter, paginate) +from sqlalchemy.orm import Session +from tenant_dependency import TenantData + from models import crud, schemas from models.db import Basement, get_db from models.routers import tenant -from models.utils import ( - NoSuchTenant, - convert_bucket_name_if_s3prefix, - get_minio_resource, - upload_to_object_storage, -) -from sqlalchemy.orm import Session -from tenant_dependency import TenantData +from models.utils import (NoSuchTenant, convert_bucket_name_if_s3prefix, + get_minio_resource, upload_to_object_storage) LOGGER = logging.getLogger(name="models") diff --git a/models/models/routers/deployed_models_routers.py b/models/models/routers/deployed_models_routers.py index 22b0c4f34..8ca563f03 100644 --- a/models/models/routers/deployed_models_routers.py +++ b/models/models/routers/deployed_models_routers.py @@ -5,6 +5,7 @@ from fastapi import APIRouter, HTTPException from kubernetes import client, config from kubernetes.client.exceptions import ApiException + from models import schemas, utils from models.constants import MODELS_NAMESPACE diff --git a/models/models/routers/models_routers.py b/models/models/routers/models_routers.py index 7058dfa41..2e73d294e 100644 --- a/models/models/routers/models_routers.py +++ b/models/models/routers/models_routers.py @@ -2,19 +2,15 @@ from typing import Any, Dict, Union from fastapi import APIRouter, Depends, Header, HTTPException, Path -from filter_lib import ( - Page, - create_filter_model, - form_query, - map_request_to_filter, - paginate, -) +from filter_lib import (Page, create_filter_model, form_query, + map_request_to_filter, paginate) +from sqlalchemy.orm import Session +from tenant_dependency import TenantData + from models import crud, schemas, utils from models.crud import get_latest_model, get_second_latest_model from models.db import Basement, Model, Training, get_db from models.routers import tenant -from sqlalchemy.orm import Session -from tenant_dependency import TenantData LOGGER = logging.getLogger(name="models") diff --git a/models/models/routers/training_routers.py b/models/models/routers/training_routers.py index 3a1c19135..9ba7484d7 100644 --- a/models/models/routers/training_routers.py +++ b/models/models/routers/training_routers.py @@ -3,44 +3,24 @@ import tempfile from typing import Any, Dict, Union -from fastapi import ( - APIRouter, - Depends, - File, - Header, - HTTPException, - Path, - Response, - UploadFile, - status, -) -from filter_lib import ( - Page, - create_filter_model, - form_query, - map_request_to_filter, - paginate, -) +from fastapi import (APIRouter, Depends, File, Header, HTTPException, Path, + Response, UploadFile, status) +from filter_lib import (Page, create_filter_model, form_query, + map_request_to_filter, paginate) +from sqlalchemy.orm import Session +from tenant_dependency import TenantData + from models import crud, schemas, utils -from models.colab_ssh_utils import ( - COLAB_TRAINING_DIRECTORY, - check_aws_credentials_file, - connect_colab, - local_mount_colab_drive, - sync_colab_with_minio, - upload_file_to_colab, -) +from models.colab_ssh_utils import (COLAB_TRAINING_DIRECTORY, + check_aws_credentials_file, connect_colab, + local_mount_colab_drive, + sync_colab_with_minio, + upload_file_to_colab) from models.convert_utils import prepare_dataset_info from models.db import Basement, Training, get_db from models.routers import tenant -from models.utils import ( - NoSuchTenant, - convert_bucket_name_if_s3prefix, - get_minio_object, - get_minio_resource, -) -from sqlalchemy.orm import Session -from tenant_dependency import TenantData +from models.utils import (NoSuchTenant, convert_bucket_name_if_s3prefix, + get_minio_object, get_minio_resource) LOGGER = logging.getLogger(name="models") TRAINING_SCRIPT_NAME = "training_script.py" diff --git a/models/models/schemas.py b/models/models/schemas.py index 1fd2e62e1..71f5e13f6 100644 --- a/models/models/schemas.py +++ b/models/models/schemas.py @@ -3,9 +3,10 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from models.db import StatusEnum from pydantic import BaseModel, ConstrainedStr, Field, PositiveInt, validator +from models.db import StatusEnum + class AtLeastOneChar(ConstrainedStr): min_length = 1 diff --git a/models/models/utils.py b/models/models/utils.py index 2148227a4..7c2977d88 100644 --- a/models/models/utils.py +++ b/models/models/utils.py @@ -1,32 +1,24 @@ from typing import Dict, List, Optional, Tuple import boto3 -import models.logger as logger from botocore.client import Config from botocore.exceptions import BotoCoreError, ClientError from botocore.response import StreamingBody from kubernetes import client, config from kubernetes.client.rest import ApiException from kubernetes.config import ConfigException -from models.constants import ( - CONTAINER_NAME, - DOCKER_REGISTRY_URL, - DOMAIN_NAME, - INFERENCE_HOST, - INFERENCE_PORT, - MINIO_ACCESS_KEY, - MINIO_HOST, - MINIO_PUBLIC_HOST, - MINIO_SECRET_KEY, - MODELS_NAMESPACE, - S3_CREDENTIALS_PROVIDER, - S3_PREFIX, -) +from sqlalchemy.orm import Session +from starlette.datastructures import UploadFile + +import models.logger as logger +from models.constants import (CONTAINER_NAME, DOCKER_REGISTRY_URL, DOMAIN_NAME, + INFERENCE_HOST, INFERENCE_PORT, MINIO_ACCESS_KEY, + MINIO_HOST, MINIO_PUBLIC_HOST, MINIO_SECRET_KEY, + MODELS_NAMESPACE, S3_CREDENTIALS_PROVIDER, + S3_PREFIX) from models.db import Basement, Model from models.errors import NoSuchTenant from models.schemas import DeployedModelPod, MinioHTTPMethod -from sqlalchemy.orm import Session -from starlette.datastructures import UploadFile logger_ = logger.get_logger(__name__) diff --git a/models/tests/conftest.py b/models/tests/conftest.py index 35f041828..287dade2e 100644 --- a/models/tests/conftest.py +++ b/models/tests/conftest.py @@ -6,6 +6,7 @@ import boto3 import pytest +from alembic import command from botocore.config import Config from botocore.exceptions import ClientError from fastapi.testclient import TestClient @@ -15,27 +16,19 @@ from sqlalchemy.orm import Session, sessionmaker from sqlalchemy_utils import create_database, database_exists -from alembic import command -from models.constants import ( - DATABASE_URL, - MINIO_ACCESS_KEY, - MINIO_HOST, - MINIO_SECRET_KEY, -) +from models.constants import (DATABASE_URL, MINIO_ACCESS_KEY, MINIO_HOST, + MINIO_SECRET_KEY) from models.db import Base, Basement, Training, get_db from models.main import app from models.routers import tenant from models.utils import get_test_db_url from .override_app_dependency import override -from .test_colab_start_training import ( - BASEMENT_ID, - EXIST_TRAINING_ID, - TRAINING_ARCHIVE_DATA, - TRAINING_ARCHIVE_KEY, - TRAINING_SCRIPT_DATA, - TRAINING_SCRIPT_KEY, -) +from .test_colab_start_training import (BASEMENT_ID, EXIST_TRAINING_ID, + TRAINING_ARCHIVE_DATA, + TRAINING_ARCHIVE_KEY, + TRAINING_SCRIPT_DATA, + TRAINING_SCRIPT_KEY) from .test_crud import GET_BASEMENT, GET_LATEST_MODELS, GET_TRAINING from .test_utils import TEST_LIMITS, TEST_TENANT diff --git a/models/tests/test_basement_routers.py b/models/tests/test_basement_routers.py index 6756b340f..6d4ec46f4 100644 --- a/models/tests/test_basement_routers.py +++ b/models/tests/test_basement_routers.py @@ -4,11 +4,11 @@ import pytest from fastapi.exceptions import HTTPException from fastapi.testclient import TestClient +from tests.test_utils import TEST_LIMITS from models.db import Basement from models.main import app from models.routers import basements_routers -from tests.test_utils import TEST_LIMITS @pytest.fixture(scope="function") diff --git a/models/tests/test_colab_interactions.py b/models/tests/test_colab_interactions.py index 17b16b454..8722d5c74 100644 --- a/models/tests/test_colab_interactions.py +++ b/models/tests/test_colab_interactions.py @@ -3,11 +3,8 @@ import pytest -from models.colab_ssh_utils import ( - COLAB_TRAINING_DIRECTORY, - connect_colab, - upload_file_to_colab, -) +from models.colab_ssh_utils import (COLAB_TRAINING_DIRECTORY, connect_colab, + upload_file_to_colab) from models.errors import ColabFileUploadError TEST_FILE_NAME = "test_file.py" diff --git a/models/tests/test_colab_start_training.py b/models/tests/test_colab_start_training.py index 0e02fde52..a359f97b0 100644 --- a/models/tests/test_colab_start_training.py +++ b/models/tests/test_colab_start_training.py @@ -5,11 +5,8 @@ from paramiko.ssh_exception import SSHException from sqlalchemy.exc import SQLAlchemyError -from .override_app_dependency import ( - OTHER_TENANT_HEADER, - TEST_HEADER, - TEST_TENANTS, -) +from .override_app_dependency import (OTHER_TENANT_HEADER, TEST_HEADER, + TEST_TENANTS) TEST_CREDENTIALS = { "host": "test_host", diff --git a/models/tests/test_crud.py b/models/tests/test_crud.py index 8a097ad01..68ab0b252 100644 --- a/models/tests/test_crud.py +++ b/models/tests/test_crud.py @@ -1,13 +1,13 @@ from unittest.mock import Mock from pytest import mark +from tests.test_utils import TEST_LIMITS +from tests.utils import create_expected_models, delete_date_field, row_to_dict from models import crud from models.crud import get_instance, get_latest_model from models.db import Basement, Model, StatusEnum, Training from models.schemas import BasementBase -from tests.test_utils import TEST_LIMITS -from tests.utils import create_expected_models, delete_date_field, row_to_dict GET_BASEMENT = Basement( id="base_id", name="basement_name", gpu_support=True, limits=TEST_LIMITS diff --git a/models/tests/test_models_routers.py b/models/tests/test_models_routers.py index 893f273c3..2055b2246 100644 --- a/models/tests/test_models_routers.py +++ b/models/tests/test_models_routers.py @@ -4,13 +4,13 @@ import pytest from fastapi.exceptions import HTTPException from fastapi.testclient import TestClient +from tests.override_app_dependency import TEST_HEADER, TEST_TENANTS +from tests.test_crud import GET_BASEMENT +from tests.utils import create_expected_models, delete_date_field, row_to_dict from models.db import Basement, Model, StatusEnum from models.main import app from models.routers import models_routers -from tests.override_app_dependency import TEST_HEADER, TEST_TENANTS -from tests.test_crud import GET_BASEMENT -from tests.utils import create_expected_models, delete_date_field, row_to_dict @pytest.fixture(scope="function") diff --git a/models/tests/test_schemas.py b/models/tests/test_schemas.py index 5bd2e7cd7..a9fb17ab1 100644 --- a/models/tests/test_schemas.py +++ b/models/tests/test_schemas.py @@ -1,8 +1,8 @@ import pytest from pydantic import ValidationError +from tests.test_utils import TEST_LIMITS from models import schemas -from tests.test_utils import TEST_LIMITS def test_empty_id_in_modelbase_raises_error(): diff --git a/models/tests/test_utils.py b/models/tests/test_utils.py index 187efbe12..e488234ee 100644 --- a/models/tests/test_utils.py +++ b/models/tests/test_utils.py @@ -11,13 +11,8 @@ from models import utils from models.constants import MINIO_HOST from models.errors import NoSuchTenant -from models.schemas import ( - BasementBase, - DeployedModelPod, - MinioHTTPMethod, - MinioPath, - Model, -) +from models.schemas import (BasementBase, DeployedModelPod, MinioHTTPMethod, + MinioPath, Model) TEST_TENANT = "test" TEST_LIMITS = { diff --git a/pipelines/alembic/env.py b/pipelines/alembic/env.py index 3754b3c1c..2baf268d7 100644 --- a/pipelines/alembic/env.py +++ b/pipelines/alembic/env.py @@ -1,10 +1,10 @@ import os from logging.config import fileConfig +from alembic import context from sqlalchemy import engine_from_config, pool import pipelines.config as settings -from alembic import context from pipelines.db.models import Base from pipelines.db.service import get_test_db_url diff --git a/pipelines/alembic/versions/08ad5deb23eb_remove_token_column.py b/pipelines/alembic/versions/08ad5deb23eb_remove_token_column.py index 6d2be02fa..0f34c6d7e 100644 --- a/pipelines/alembic/versions/08ad5deb23eb_remove_token_column.py +++ b/pipelines/alembic/versions/08ad5deb23eb_remove_token_column.py @@ -8,7 +8,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/0a53c56436d0_change_task_webhook_to_string.py b/pipelines/alembic/versions/0a53c56436d0_change_task_webhook_to_string.py index 9a472edb7..13564231d 100644 --- a/pipelines/alembic/versions/0a53c56436d0_change_task_webhook_to_string.py +++ b/pipelines/alembic/versions/0a53c56436d0_change_task_webhook_to_string.py @@ -6,9 +6,9 @@ """ import sqlalchemy as sa +from alembic import op from sqlalchemy import orm -from alembic import op from pipelines.db import models # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py index a1dbc20c6..9218792ee 100644 --- a/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py +++ b/pipelines/alembic/versions/0ab5e65cf34b_fix_default_type_to_inference.py @@ -5,9 +5,9 @@ Create Date: 2022-04-26 19:37:27.263471 """ +from alembic import op from sqlalchemy import orm -from alembic import op from pipelines.db import models # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/29f072fb5c9c_.py b/pipelines/alembic/versions/29f072fb5c9c_.py index 88b3be178..269820a10 100644 --- a/pipelines/alembic/versions/29f072fb5c9c_.py +++ b/pipelines/alembic/versions/29f072fb5c9c_.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/58fa5399caa9.py b/pipelines/alembic/versions/58fa5399caa9.py index 0f45f2909..cec33d4a5 100644 --- a/pipelines/alembic/versions/58fa5399caa9.py +++ b/pipelines/alembic/versions/58fa5399caa9.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/5fd9d1fdcf5b_init.py b/pipelines/alembic/versions/5fd9d1fdcf5b_init.py index 2b62f9e81..b1f8db5bf 100644 --- a/pipelines/alembic/versions/5fd9d1fdcf5b_init.py +++ b/pipelines/alembic/versions/5fd9d1fdcf5b_init.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py index 46a937c4c..895bdd4e5 100644 --- a/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py +++ b/pipelines/alembic/versions/764961499e2b_add_original_pipeline_id_and_is_latest.py @@ -6,9 +6,9 @@ """ import sqlalchemy as sa +from alembic import op from sqlalchemy import orm -from alembic import op from pipelines.db import models # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py b/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py index abefe8942..a82ef9847 100644 --- a/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py +++ b/pipelines/alembic/versions/8a589dda3869_add_type_description_and_summary_to_.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py index cf22b1af8..74512697f 100644 --- a/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py +++ b/pipelines/alembic/versions/b0cbaebbddd8_change_pipeline_version_to_int.py @@ -6,9 +6,9 @@ """ import sqlalchemy as sa +from alembic import op from sqlalchemy import orm -from alembic import op from pipelines.db import models # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/c26caf5e8a19_add_webhook_column.py b/pipelines/alembic/versions/c26caf5e8a19_add_webhook_column.py index d107fab9d..fc3b4d583 100644 --- a/pipelines/alembic/versions/c26caf5e8a19_add_webhook_column.py +++ b/pipelines/alembic/versions/c26caf5e8a19_add_webhook_column.py @@ -7,7 +7,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/cd396f8a2df1_change_token_column_type.py b/pipelines/alembic/versions/cd396f8a2df1_change_token_column_type.py index 62760093d..1f8ffdc80 100644 --- a/pipelines/alembic/versions/cd396f8a2df1_change_token_column_type.py +++ b/pipelines/alembic/versions/cd396f8a2df1_change_token_column_type.py @@ -7,7 +7,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py b/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py index bdeecba1b..0ca774fdb 100644 --- a/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py +++ b/pipelines/alembic/versions/df42f45f4ddf_add_parent_step_and_tenant_to_execution_.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "df42f45f4ddf" diff --git a/pipelines/pipelines/execution.py b/pipelines/pipelines/execution.py index e46ee8e40..f7db094b0 100644 --- a/pipelines/pipelines/execution.py +++ b/pipelines/pipelines/execution.py @@ -17,15 +17,8 @@ import pipelines.db.models as dbm import pipelines.db.service as service import pipelines.result_processing as postprocessing -from pipelines import ( - config, - http_utils, - log, - s3, - schemas, - service_token, - webhooks, -) +from pipelines import (config, http_utils, log, s3, schemas, service_token, + webhooks) logger = log.get_logger(__file__) minio_client = s3.get_minio_client() diff --git a/pipelines/tests/conftest.py b/pipelines/tests/conftest.py index 663ae2db3..f80333aa1 100644 --- a/pipelines/tests/conftest.py +++ b/pipelines/tests/conftest.py @@ -2,6 +2,9 @@ from unittest.mock import patch import pytest +import tests.testing_data as td +from alembic import command +from alembic.config import Config from fastapi.testclient import TestClient from pydantic import BaseModel from sqlalchemy import create_engine @@ -13,9 +16,6 @@ import pipelines.db.models as dbm import pipelines.db.service as service import pipelines.execution as execution -import tests.testing_data as td -from alembic import command -from alembic.config import Config from pipelines.config import DB_URI test_db_url = service.get_test_db_url(DB_URI) diff --git a/pipelines/tests/db/test_service.py b/pipelines/tests/db/test_service.py index 8fb89409a..d7bb08ab3 100644 --- a/pipelines/tests/db/test_service.py +++ b/pipelines/tests/db/test_service.py @@ -6,6 +6,7 @@ from unittest.mock import patch import pytest +import tests.testing_data as td from aiokafka import AIOKafkaProducer from freezegun import freeze_time @@ -13,7 +14,6 @@ import pipelines.db.service as service import pipelines.execution as execution import pipelines.schemas as schemas -import tests.testing_data as td pytest_plugins = ("pytest_asyncio",) diff --git a/pipelines/tests/test_app.py b/pipelines/tests/test_app.py index 28a8affbb..8ba390cf8 100644 --- a/pipelines/tests/test_app.py +++ b/pipelines/tests/test_app.py @@ -4,13 +4,13 @@ from typing import Dict import pytest +import tests.testing_data as td import pipelines.app as app import pipelines.db.models as dbm import pipelines.db.service as service import pipelines.execution as execution import pipelines.schemas as schemas -import tests.testing_data as td def test_add_pipeline(testing_app, adjust_mock): diff --git a/pipelines/tests/test_execution.py b/pipelines/tests/test_execution.py index 57f01988b..254fbb695 100644 --- a/pipelines/tests/test_execution.py +++ b/pipelines/tests/test_execution.py @@ -5,6 +5,7 @@ from unittest.mock import PropertyMock, patch import pytest +import tests.testing_data as td from aiokafka import AIOKafkaProducer from fastapi import HTTPException from pydantic import BaseModel @@ -12,7 +13,6 @@ import pipelines.db.models as dbm import pipelines.execution as execution import pipelines.schemas as schemas -import tests.testing_data as td LOGGER = logging.getLogger(__name__) diff --git a/pipelines/tests/test_schemas.py b/pipelines/tests/test_schemas.py index df147e83f..9888fcdb0 100644 --- a/pipelines/tests/test_schemas.py +++ b/pipelines/tests/test_schemas.py @@ -1,10 +1,10 @@ """Testing pipelines/schemas.py.""" import pytest +import tests.testing_data as td import pipelines.db.models as dbm import pipelines.schemas as schemas -import tests.testing_data as td def test_init_input_args(): diff --git a/processing/alembic/env.py b/processing/alembic/env.py index fd5924480..f943afabc 100644 --- a/processing/alembic/env.py +++ b/processing/alembic/env.py @@ -1,11 +1,11 @@ import os from logging.config import fileConfig -from processing.config import settings -from processing.db.service import get_test_db_url +from alembic import context from sqlalchemy import engine_from_config, pool -from alembic import context +from processing.config import settings +from processing.db.service import get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/processing/alembic/versions/52af1473946f_init.py b/processing/alembic/versions/52af1473946f_init.py index 36ccdb9cd..98083fada 100644 --- a/processing/alembic/versions/52af1473946f_init.py +++ b/processing/alembic/versions/52af1473946f_init.py @@ -5,9 +5,8 @@ Create Date: 2022-05-18 14:46:07.845635 """ -from alembic import op import sqlalchemy as sa - +from alembic import op # revision identifiers, used by Alembic. revision = "52af1473946f" diff --git a/processing/alembic/versions/8e973b70b26f_noneasnull.py b/processing/alembic/versions/8e973b70b26f_noneasnull.py index 29c46cba0..bdd24a8f2 100644 --- a/processing/alembic/versions/8e973b70b26f_noneasnull.py +++ b/processing/alembic/versions/8e973b70b26f_noneasnull.py @@ -5,8 +5,8 @@ Create Date: 2022-05-18 21:42:03.973476 """ -from alembic import op import sqlalchemy as sa +from alembic import op # revision identifiers, used by Alembic. revision = "8e973b70b26f" diff --git a/processing/alembic/versions/f637b13c744d_renamed_column.py b/processing/alembic/versions/f637b13c744d_renamed_column.py index 268f4ccfa..546d6510b 100644 --- a/processing/alembic/versions/f637b13c744d_renamed_column.py +++ b/processing/alembic/versions/f637b13c744d_renamed_column.py @@ -5,9 +5,8 @@ Create Date: 2022-05-19 12:43:52.309487 """ -from alembic import op import sqlalchemy as sa - +from alembic import op # revision identifiers, used by Alembic. revision = "f637b13c744d" diff --git a/processing/processing/health_check_easy_ocr.py b/processing/processing/health_check_easy_ocr.py index 13baebcf0..1c8a8ad28 100644 --- a/processing/processing/health_check_easy_ocr.py +++ b/processing/processing/health_check_easy_ocr.py @@ -3,12 +3,11 @@ from fastapi import HTTPException from minio.error import MinioException + from processing.utils.aiohttp_utils import send_request from processing.utils.logger import get_logger -from processing.utils.minio_utils import ( - MinioCommunicator, - convert_bucket_name_if_s3prefix, -) +from processing.utils.minio_utils import (MinioCommunicator, + convert_bucket_name_if_s3prefix) logger = get_logger(__name__) minio_client = MinioCommunicator().client diff --git a/processing/processing/main.py b/processing/processing/main.py index bfd7447d7..98946b7d4 100644 --- a/processing/processing/main.py +++ b/processing/processing/main.py @@ -1,16 +1,10 @@ from typing import Dict, List, Optional, Set -from fastapi import ( - Body, - Depends, - FastAPI, - Header, - HTTPException, - Path, - Query, - Response, - status, -) +from fastapi import (Body, Depends, FastAPI, Header, HTTPException, Path, + Query, Response, status) +from sqlalchemy.orm import Session +from tenant_dependency import TenantData, get_tenant_info + from processing import db, schema from processing.config import settings from processing.health_check_easy_ocr import health_check_preprocessing @@ -20,8 +14,6 @@ from processing.utils.logger import get_logger from processing.utils.minio_utils import convert_bucket_name_if_s3prefix from processing.utils.utils import map_finish_status_for_assets -from sqlalchemy.orm import Session -from tenant_dependency import TenantData, get_tenant_info logger = get_logger(__name__) app = FastAPI( diff --git a/processing/processing/send_preprocess_results.py b/processing/processing/send_preprocess_results.py index a36e0caf8..955807047 100644 --- a/processing/processing/send_preprocess_results.py +++ b/processing/processing/send_preprocess_results.py @@ -4,6 +4,7 @@ from fastapi import HTTPException from minio.error import MinioException + from processing.utils.logger import get_logger from processing.utils.minio_utils import MinioCommunicator diff --git a/processing/processing/tasks.py b/processing/processing/tasks.py index 5aeb4117b..db20fd90b 100644 --- a/processing/processing/tasks.py +++ b/processing/processing/tasks.py @@ -8,17 +8,14 @@ from cache import AsyncTTL from fastapi import HTTPException, status +from sqlalchemy.orm import Session + from processing.config import settings from processing.schema import PreprocessingStatus, Status from processing.utils.aiohttp_utils import send_request from processing.utils.logger import get_log_exception_msg, get_logger -from processing.utils.utils import ( - execute_pipeline, - get_files_data, - get_model_url, - split_iterable, -) -from sqlalchemy.orm import Session +from processing.utils.utils import (execute_pipeline, get_files_data, + get_model_url, split_iterable) logger = get_logger(__name__) diff --git a/processing/processing/text_merge.py b/processing/processing/text_merge.py index 300e45b1b..2858d9fb5 100644 --- a/processing/processing/text_merge.py +++ b/processing/processing/text_merge.py @@ -11,6 +11,7 @@ from fastapi import HTTPException from minio.error import MinioException + from processing import schema from processing.schema import AnnotationData, MatchedPage, Page, ParagraphBbox from processing.third_party_code.box_util import stitch_boxes_into_lines diff --git a/processing/processing/utils/aiohttp_utils.py b/processing/processing/utils/aiohttp_utils.py index 7c0318df3..9f7abec55 100644 --- a/processing/processing/utils/aiohttp_utils.py +++ b/processing/processing/utils/aiohttp_utils.py @@ -4,6 +4,7 @@ import aiohttp from aiohttp import ContentTypeError from fastapi import HTTPException + from processing.config import settings from processing.utils.logger import get_logger diff --git a/processing/processing/utils/minio_utils.py b/processing/processing/utils/minio_utils.py index 19a0c96a4..166fcf5e1 100644 --- a/processing/processing/utils/minio_utils.py +++ b/processing/processing/utils/minio_utils.py @@ -1,5 +1,6 @@ from minio import Minio from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider + from processing.config import settings from processing.utils.logger import get_logger diff --git a/processing/processing/utils/utils.py b/processing/processing/utils/utils.py index 40b927595..b9753cd50 100644 --- a/processing/processing/utils/utils.py +++ b/processing/processing/utils/utils.py @@ -2,11 +2,12 @@ from urllib.parse import urljoin from cache import AsyncTTL +from sqlalchemy.orm import Session + from processing import db, schema from processing.config import settings from processing.utils.aiohttp_utils import send_request from processing.utils.logger import get_log_exception_msg, get_logger -from sqlalchemy.orm import Session logger = get_logger(__name__) T = TypeVar("T") diff --git a/processing/tests/conftest.py b/processing/tests/conftest.py index 578bf6709..138182aed 100644 --- a/processing/tests/conftest.py +++ b/processing/tests/conftest.py @@ -3,13 +3,13 @@ from unittest.mock import patch import pytest +from alembic import command +from alembic.config import Config from sqlalchemy import create_engine from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import sessionmaker from sqlalchemy_utils import create_database, database_exists, drop_database -from alembic import command -from alembic.config import Config from processing.config import settings from processing.db.service import get_test_db_url diff --git a/processing/tests/integration/test_integration.py b/processing/tests/integration/test_integration.py index 834ea24b9..9cd6ae1ec 100644 --- a/processing/tests/integration/test_integration.py +++ b/processing/tests/integration/test_integration.py @@ -10,7 +10,6 @@ from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry - pytestmark = pytest.mark.integration load_dotenv("./.env") diff --git a/processing/tests/test_text_merge.py b/processing/tests/test_text_merge.py index 512bd6c9e..9a1e63cbf 100644 --- a/processing/tests/test_text_merge.py +++ b/processing/tests/test_text_merge.py @@ -1,19 +1,9 @@ from unittest.mock import patch -from processing.schema import ( - AnnotationData, - MatchedPage, - Page, - ParagraphBbox, - PageSize, - Input, -) -from processing.text_merge import ( - convert_points_to_pixels, - match_page, - download_files, - stitch_boxes, -) +from processing.schema import (AnnotationData, Input, MatchedPage, Page, + PageSize, ParagraphBbox) +from processing.text_merge import (convert_points_to_pixels, download_files, + match_page, stitch_boxes) class ClientObj: diff --git a/scheduler/alembic/env.py b/scheduler/alembic/env.py index f21ea5240..6f106ff76 100644 --- a/scheduler/alembic/env.py +++ b/scheduler/alembic/env.py @@ -2,11 +2,11 @@ from logging import config as logging_config import sqlalchemy -from scheduler import config as scheduler_config +from alembic import context from scheduler.db import models from scheduler.db.service import get_test_db_url -from alembic import context +from scheduler import config as scheduler_config # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/scheduler/alembic/versions/0cadbdb7f0ea_.py b/scheduler/alembic/versions/0cadbdb7f0ea_.py index f13053fec..9cfa6eac5 100644 --- a/scheduler/alembic/versions/0cadbdb7f0ea_.py +++ b/scheduler/alembic/versions/0cadbdb7f0ea_.py @@ -6,9 +6,8 @@ """ import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "0cadbdb7f0ea" diff --git a/scheduler/alembic/versions/449be82736bd_.py b/scheduler/alembic/versions/449be82736bd_.py index ff603864d..baddc1db8 100644 --- a/scheduler/alembic/versions/449be82736bd_.py +++ b/scheduler/alembic/versions/449be82736bd_.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/scheduler/scheduler/app.py b/scheduler/scheduler/app.py index db894e53d..ee1f93ba3 100644 --- a/scheduler/scheduler/app.py +++ b/scheduler/scheduler/app.py @@ -3,9 +3,10 @@ import tenant_dependency from fastapi import Depends, FastAPI, Header, HTTPException, status -from scheduler import config, heartbeat, kafka_utils, log, runner, schemas from scheduler.db import service +from scheduler import config, heartbeat, kafka_utils, log, runner, schemas + logger = log.get_logger(__name__) tenant = tenant_dependency.get_tenant_info( diff --git a/scheduler/scheduler/db/service.py b/scheduler/scheduler/db/service.py index 3acc6388b..a51684b6b 100644 --- a/scheduler/scheduler/db/service.py +++ b/scheduler/scheduler/db/service.py @@ -2,10 +2,11 @@ from typing import Any, Dict, List, Union import sqlalchemy -from scheduler import config, unit from scheduler.db import models from sqlalchemy import orm +from scheduler import config, unit + engine = sqlalchemy.create_engine( config.DB_URL, pool_size=int(config.POOL_SIZE) ) diff --git a/scheduler/scheduler/heartbeat.py b/scheduler/scheduler/heartbeat.py index effdc86e8..a96374b1c 100644 --- a/scheduler/scheduler/heartbeat.py +++ b/scheduler/scheduler/heartbeat.py @@ -3,10 +3,11 @@ import random from aiokafka import AIOKafkaProducer -from scheduler import config, log, runner from scheduler.db import models, service from sqlalchemy import orm +from scheduler import config, log, runner + logger = log.get_logger(__name__) diff --git a/scheduler/scheduler/runner.py b/scheduler/scheduler/runner.py index e8cc25e2e..c7a33bacd 100644 --- a/scheduler/scheduler/runner.py +++ b/scheduler/scheduler/runner.py @@ -2,9 +2,10 @@ import uuid import aiokafka -from scheduler import exceptions, log, unit from scheduler.db import models +from scheduler import exceptions, log, unit + logger = log.get_logger(__name__) runner_id: str = str(uuid.uuid4()) diff --git a/scheduler/scheduler/unit.py b/scheduler/scheduler/unit.py index f3c021323..6e6424761 100644 --- a/scheduler/scheduler/unit.py +++ b/scheduler/scheduler/unit.py @@ -6,10 +6,10 @@ import aiohttp import aiokafka +from scheduler.db import models, service from sqlalchemy import exc from scheduler import config, exceptions, log -from scheduler.db import models, service logger = log.get_logger(__name__) diff --git a/scheduler/tests/test_heartbeat.py b/scheduler/tests/test_heartbeat.py index b035c7f06..f63a62614 100644 --- a/scheduler/tests/test_heartbeat.py +++ b/scheduler/tests/test_heartbeat.py @@ -5,9 +5,9 @@ import pytest from freezegun import freeze_time +from scheduler.db import models from scheduler import heartbeat, unit -from scheduler.db import models @freeze_time("2020-01-01") diff --git a/scheduler/tests/test_service.py b/scheduler/tests/test_service.py index 31a32cf22..07837cbd3 100644 --- a/scheduler/tests/test_service.py +++ b/scheduler/tests/test_service.py @@ -1,6 +1,7 @@ -from scheduler.db import models, service from unittest import mock +from scheduler.db import models, service + def test_add_into_db(testing_session, testing_unit_instance): service.add_into_db(testing_session, testing_unit_instance) diff --git a/scheduler/tests/test_unit.py b/scheduler/tests/test_unit.py index d14efdfc6..cf2b9b18c 100644 --- a/scheduler/tests/test_unit.py +++ b/scheduler/tests/test_unit.py @@ -1,6 +1,6 @@ +import pytest from tests import testing_data -import pytest from scheduler import exceptions, unit diff --git a/search/search/es.py b/search/search/es.py index 4ce5efda5..1905b07f3 100644 --- a/search/search/es.py +++ b/search/search/es.py @@ -3,6 +3,7 @@ import aiohttp from elasticsearch import AsyncElasticsearch from elasticsearch.exceptions import NotFoundError, RequestError + from search.config import settings INDEX_SETTINGS = { diff --git a/search/search/harvester.py b/search/search/harvester.py index 289261d04..df95b1291 100644 --- a/search/search/harvester.py +++ b/search/search/harvester.py @@ -2,10 +2,11 @@ from typing import Iterator, Optional import boto3 -import search.es as es -import search.schemas as schemas from botocore.errorfactory import ClientError from elasticsearch import helpers + +import search.es as es +import search.schemas as schemas from search.config import settings from search.logger import logger diff --git a/search/search/main.py b/search/search/main.py index dbe293ea9..86185784f 100644 --- a/search/search/main.py +++ b/search/search/main.py @@ -2,14 +2,15 @@ from typing import Optional import fastapi +from botocore.exceptions import BotoCoreError +from elasticsearch.exceptions import ElasticsearchException +from tenant_dependency import TenantData, get_tenant_info + import search.es as es import search.harvester as harvester import search.kafka_listener as kafka_listener import search.schemas as schemas -from botocore.exceptions import BotoCoreError -from elasticsearch.exceptions import ElasticsearchException from search.config import settings -from tenant_dependency import TenantData, get_tenant_info tags = [ { diff --git a/search/search/schemas/facets.py b/search/search/schemas/facets.py index 242f4d35a..f6c16d8e3 100644 --- a/search/search/schemas/facets.py +++ b/search/search/schemas/facets.py @@ -2,8 +2,9 @@ from typing import Any, Dict, List, Optional, Tuple, Union import aiocache.serializers -import search.common_utils as utils from pydantic import BaseModel, Field + +import search.common_utils as utils from search.config import settings from search.es import INDEX_SETTINGS, fetch diff --git a/search/search/schemas/pieces.py b/search/search/schemas/pieces.py index a5c4ec48e..638257f07 100644 --- a/search/search/schemas/pieces.py +++ b/search/search/schemas/pieces.py @@ -6,6 +6,7 @@ from typing import Any, Dict, List, Optional, Union import pydantic + import search.common_utils as utils import search.es as es diff --git a/search/tests/conftest.py b/search/tests/conftest.py index 7a1d2436c..e5c88003b 100644 --- a/search/tests/conftest.py +++ b/search/tests/conftest.py @@ -6,17 +6,12 @@ from elasticsearch import AsyncElasticsearch from kafka.errors import TopicAlreadyExistsError from moto import mock_s3 +from tests.test_get import CHILD_CATEGORIES_DATA, TEST_DATA +from tests.test_harvester import (DOCS_IN_ES, INDEX_NAME, MANIFESTS, + S3_FAIL_PAGES, S3_PAGES) from search.config import settings from search.es import INDEX_SETTINGS -from tests.test_get import CHILD_CATEGORIES_DATA, TEST_DATA -from tests.test_harvester import ( - DOCS_IN_ES, - INDEX_NAME, - MANIFESTS, - S3_FAIL_PAGES, - S3_PAGES, -) BUCKET_NAME = INDEX_NAME diff --git a/search/tests/test_harvester.py b/search/tests/test_harvester.py index 029d486c9..44395b7aa 100644 --- a/search/tests/test_harvester.py +++ b/search/tests/test_harvester.py @@ -2,6 +2,7 @@ from unittest.mock import Mock import pytest + from search.harvester import parse_json, start_harvester from .override_app_dependency import TEST_TENANT diff --git a/taxonomy/alembic/env.py b/taxonomy/alembic/env.py index 663ed2f08..4cef73004 100644 --- a/taxonomy/alembic/env.py +++ b/taxonomy/alembic/env.py @@ -1,10 +1,10 @@ import os from logging.config import fileConfig +from alembic import context # type: ignore from sqlalchemy import engine_from_config, pool -from taxonomy.database import SQLALCHEMY_DATABASE_URL, get_test_db_url -from alembic import context # type: ignore +from taxonomy.database import SQLALCHEMY_DATABASE_URL, get_test_db_url # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py b/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py index 0a5a8062f..25bc3778c 100644 --- a/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py +++ b/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py b/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py index 3adec516a..5ece0c493 100644 --- a/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py +++ b/taxonomy/alembic/versions/bdea8a93cafe_first_revision.py @@ -7,7 +7,6 @@ """ import sqlalchemy as sa import sqlalchemy_utils - from alembic import op # revision identifiers, used by Alembic. diff --git a/taxonomy/alembic/versions/d3ba69ca9d97_change_category_linking.py b/taxonomy/alembic/versions/d3ba69ca9d97_change_category_linking.py index 1dafe6073..8e37244bf 100644 --- a/taxonomy/alembic/versions/d3ba69ca9d97_change_category_linking.py +++ b/taxonomy/alembic/versions/d3ba69ca9d97_change_category_linking.py @@ -6,7 +6,6 @@ """ import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. diff --git a/taxonomy/documentation/update_docs.py b/taxonomy/documentation/update_docs.py index 5b2d93f41..8e10cf8ab 100644 --- a/taxonomy/documentation/update_docs.py +++ b/taxonomy/documentation/update_docs.py @@ -1,4 +1,5 @@ import yaml + from taxonomy.main import app diff --git a/taxonomy/taxonomy/main.py b/taxonomy/taxonomy/main.py index a52726e81..af43d8406 100644 --- a/taxonomy/taxonomy/main.py +++ b/taxonomy/taxonomy/main.py @@ -5,22 +5,14 @@ from fastapi import Depends, FastAPI from sqlalchemy.exc import DBAPIError, SQLAlchemyError -from taxonomy.errors import ( - CheckFieldError, - FieldConstraintError, - ForeignKeyError, - NoTaxonError, - NoTaxonomyError, - SelfParentError, - check_field_error_handler, - db_dbapi_error_handler, - db_sa_error_handler, - field_constraint_error_handler, - foreign_key_error_handler, - no_taxon_error_handler, - no_taxonomy_error_handler, - taxon_parent_child_error_handler, -) +from taxonomy.errors import (CheckFieldError, FieldConstraintError, + ForeignKeyError, NoTaxonError, NoTaxonomyError, + SelfParentError, check_field_error_handler, + db_dbapi_error_handler, db_sa_error_handler, + field_constraint_error_handler, + foreign_key_error_handler, no_taxon_error_handler, + no_taxonomy_error_handler, + taxon_parent_child_error_handler) from taxonomy.tags import TAGS from taxonomy.taxon import resources as taxon_resources from taxonomy.taxonomy import resources as taxonomy_resources diff --git a/taxonomy/taxonomy/models.py b/taxonomy/taxonomy/models.py index 29b70fb01..8e6072959 100644 --- a/taxonomy/taxonomy/models.py +++ b/taxonomy/taxonomy/models.py @@ -1,16 +1,8 @@ from typing import Callable from uuid import uuid4 -from sqlalchemy import ( - VARCHAR, - Boolean, - CheckConstraint, - Column, - ForeignKey, - ForeignKeyConstraint, - Index, - Integer, -) +from sqlalchemy import (VARCHAR, Boolean, CheckConstraint, Column, ForeignKey, + ForeignKeyConstraint, Index, Integer) from sqlalchemy.orm import relationship, validates from sqlalchemy_utils import Ltree, LtreeType diff --git a/taxonomy/taxonomy/schemas/__init__.py b/taxonomy/taxonomy/schemas/__init__.py index 818b3be2f..0a7fd55ca 100644 --- a/taxonomy/taxonomy/schemas/__init__.py +++ b/taxonomy/taxonomy/schemas/__init__.py @@ -1,21 +1,12 @@ -from taxonomy.schemas.errors import ( - BadRequestErrorSchema, - ConnectionErrorSchema, - NotFoundErrorSchema, -) -from taxonomy.schemas.taxon import ( - ParentsConcatenateResponseSchema, - TaxonBaseSchema, - TaxonInputSchema, - TaxonResponseSchema, -) -from taxonomy.schemas.taxonomy import ( - CategoryLinkSchema, - JobTaxonomySchema, - TaxonomyBaseSchema, - TaxonomyInputSchema, - TaxonomyResponseSchema, -) +from taxonomy.schemas.errors import (BadRequestErrorSchema, + ConnectionErrorSchema, + NotFoundErrorSchema) +from taxonomy.schemas.taxon import (ParentsConcatenateResponseSchema, + TaxonBaseSchema, TaxonInputSchema, + TaxonResponseSchema) +from taxonomy.schemas.taxonomy import (CategoryLinkSchema, JobTaxonomySchema, + TaxonomyBaseSchema, TaxonomyInputSchema, + TaxonomyResponseSchema) __all__ = [ BadRequestErrorSchema, diff --git a/taxonomy/taxonomy/schemas/taxon.py b/taxonomy/taxonomy/schemas/taxon.py index c55d6dd1f..b5d352e09 100644 --- a/taxonomy/taxonomy/schemas/taxon.py +++ b/taxonomy/taxonomy/schemas/taxon.py @@ -1,6 +1,7 @@ from typing import List, Optional from pydantic import BaseModel, Field, validator + from taxonomy.errors import CheckFieldError diff --git a/taxonomy/taxonomy/taxon/resources.py b/taxonomy/taxonomy/taxon/resources.py index 2868993fc..2b6c45ec0 100644 --- a/taxonomy/taxonomy/taxon/resources.py +++ b/taxonomy/taxonomy/taxon/resources.py @@ -9,26 +9,16 @@ from taxonomy.errors import NoTaxonError from taxonomy.filters import TaxonFilter from taxonomy.microservice_communication.search import X_CURRENT_TENANT_HEADER -from taxonomy.schemas import ( - BadRequestErrorSchema, - ConnectionErrorSchema, - NotFoundErrorSchema, - ParentsConcatenateResponseSchema, - TaxonBaseSchema, - TaxonInputSchema, - TaxonResponseSchema, -) +from taxonomy.schemas import (BadRequestErrorSchema, ConnectionErrorSchema, + NotFoundErrorSchema, + ParentsConcatenateResponseSchema, + TaxonBaseSchema, TaxonInputSchema, + TaxonResponseSchema) from taxonomy.tags import TAXON_TAG -from taxonomy.taxon.services import ( - add_taxon_db, - concatenated_parents_list, - delete_taxon_db, - fetch_bunch_taxons_db, - fetch_taxon_db, - filter_taxons, - insert_taxon_tree, - update_taxon_db, -) +from taxonomy.taxon.services import (add_taxon_db, concatenated_parents_list, + delete_taxon_db, fetch_bunch_taxons_db, + fetch_taxon_db, filter_taxons, + insert_taxon_tree, update_taxon_db) router = APIRouter( prefix="/taxons", diff --git a/taxonomy/taxonomy/taxon/services.py b/taxonomy/taxonomy/taxon/services.py index bc663f00b..8fd0b5159 100644 --- a/taxonomy/taxonomy/taxon/services.py +++ b/taxonomy/taxonomy/taxon/services.py @@ -6,14 +6,12 @@ from sqlalchemy.orm import Session from sqlalchemy.orm.query import Query from sqlalchemy_utils import Ltree + from taxonomy.errors import CheckFieldError, NoTaxonError, SelfParentError from taxonomy.filters import TaxonFilter from taxonomy.models import Taxon -from taxonomy.schemas import ( - ParentsConcatenateResponseSchema, - TaxonInputSchema, - TaxonResponseSchema, -) +from taxonomy.schemas import (ParentsConcatenateResponseSchema, + TaxonInputSchema, TaxonResponseSchema) from taxonomy.taxonomy.services import get_latest_taxonomy, get_taxonomy TaxonIdT = str diff --git a/taxonomy/taxonomy/taxonomy/resources.py b/taxonomy/taxonomy/taxonomy/resources.py index 71e9eb0c7..decab25e5 100644 --- a/taxonomy/taxonomy/taxonomy/resources.py +++ b/taxonomy/taxonomy/taxonomy/resources.py @@ -4,36 +4,27 @@ from filter_lib import Page from sqlalchemy.orm import Session from sqlalchemy_filters.exceptions import BadFilterFormat + from taxonomy.database import get_db from taxonomy.filters import TaxonomyFilter from taxonomy.logging_setup import LOGGER from taxonomy.microservice_communication.search import X_CURRENT_TENANT_HEADER -from taxonomy.schemas import ( - BadRequestErrorSchema, - CategoryLinkSchema, - ConnectionErrorSchema, - JobTaxonomySchema, - NotFoundErrorSchema, - TaxonomyBaseSchema, - TaxonomyInputSchema, - TaxonomyResponseSchema, -) +from taxonomy.schemas import (BadRequestErrorSchema, CategoryLinkSchema, + ConnectionErrorSchema, JobTaxonomySchema, + NotFoundErrorSchema, TaxonomyBaseSchema, + TaxonomyInputSchema, TaxonomyResponseSchema) from taxonomy.tags import TAXONOMY_TAG -from taxonomy.taxonomy.services import ( - batch_latest_taxonomies, - batch_versioned_taxonomies, - bulk_create_relations_with_categories, - bulk_delete_category_association, - create_taxonomy_instance, - delete_taxonomy_instance, - filter_taxonomies, - get_latest_taxonomy, - get_linked_taxonomies, - get_second_latest_taxonomy, - get_taxonomies_by_job_id, - get_taxonomy, - update_taxonomy_instance, -) +from taxonomy.taxonomy.services import (batch_latest_taxonomies, + batch_versioned_taxonomies, + bulk_create_relations_with_categories, + bulk_delete_category_association, + create_taxonomy_instance, + delete_taxonomy_instance, + filter_taxonomies, get_latest_taxonomy, + get_linked_taxonomies, + get_second_latest_taxonomy, + get_taxonomies_by_job_id, get_taxonomy, + update_taxonomy_instance) router = APIRouter( prefix="/taxonomy", diff --git a/taxonomy/taxonomy/taxonomy/services.py b/taxonomy/taxonomy/taxonomy/services.py index 2d793e190..6cb6e9a66 100644 --- a/taxonomy/taxonomy/taxonomy/services.py +++ b/taxonomy/taxonomy/taxonomy/services.py @@ -3,16 +3,13 @@ from filter_lib import Page, form_query, map_request_to_filter, paginate from sqlalchemy import and_, desc, null, or_ from sqlalchemy.orm import Query, Session + from taxonomy.errors import CheckFieldError from taxonomy.filters import TaxonomyFilter from taxonomy.models import AssociationTaxonomyCategory, Taxonomy -from taxonomy.schemas import ( - CategoryLinkSchema, - JobTaxonomySchema, - TaxonomyBaseSchema, - TaxonomyInputSchema, - TaxonomyResponseSchema, -) +from taxonomy.schemas import (CategoryLinkSchema, JobTaxonomySchema, + TaxonomyBaseSchema, TaxonomyInputSchema, + TaxonomyResponseSchema) def create_taxonomy_instance( diff --git a/taxonomy/tests/conftest.py b/taxonomy/tests/conftest.py index c177ccb6d..3d6b13f68 100644 --- a/taxonomy/tests/conftest.py +++ b/taxonomy/tests/conftest.py @@ -7,31 +7,24 @@ import pytest import sqlalchemy +from alembic import command +from alembic.config import Config from fastapi.testclient import TestClient from sqlalchemy import create_engine from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session, sessionmaker from sqlalchemy_utils import create_database, database_exists, drop_database +from tests.override_app_dependency import TEST_TENANTS, override -from alembic import command -from alembic.config import Config -from taxonomy.database import ( - SQLALCHEMY_DATABASE_URL, - Base, - get_db, - get_test_db_url, -) +from taxonomy.database import (SQLALCHEMY_DATABASE_URL, Base, get_db, + get_test_db_url) from taxonomy.main import app from taxonomy.models import Taxon, Taxonomy -from taxonomy.schemas import ( - CategoryLinkSchema, - TaxonInputSchema, - TaxonomyInputSchema, -) +from taxonomy.schemas import (CategoryLinkSchema, TaxonInputSchema, + TaxonomyInputSchema) from taxonomy.taxon import services as taxon_services from taxonomy.taxonomy import services as taxonomy_services from taxonomy.token_dependency import TOKEN -from tests.override_app_dependency import TEST_TENANTS, override @pytest.fixture diff --git a/taxonomy/tests/test_taxon_crud.py b/taxonomy/tests/test_taxon_crud.py index c5b4d4271..13cb107dd 100644 --- a/taxonomy/tests/test_taxon_crud.py +++ b/taxonomy/tests/test_taxon_crud.py @@ -3,9 +3,9 @@ from typing import Any, List, Optional import pytest +from tests.override_app_dependency import TEST_HEADER from taxonomy.models import Taxon -from tests.override_app_dependency import TEST_HEADER TAXON_PATH = "/taxons" diff --git a/taxonomy/tests/test_taxonomy_router.py b/taxonomy/tests/test_taxonomy_router.py index 6d6ce325a..6db0551d9 100644 --- a/taxonomy/tests/test_taxonomy_router.py +++ b/taxonomy/tests/test_taxonomy_router.py @@ -1,11 +1,11 @@ from typing import Tuple import pytest +from tests.override_app_dependency import TEST_HEADER, TEST_TENANTS from taxonomy.models import Taxonomy from taxonomy.schemas import CategoryLinkSchema from taxonomy.taxonomy import services -from tests.override_app_dependency import TEST_HEADER, TEST_TENANTS @pytest.mark.integration diff --git a/users/tests/keycloak/test_query.py b/users/tests/keycloak/test_query.py index bf9084421..13c127229 100644 --- a/users/tests/keycloak/test_query.py +++ b/users/tests/keycloak/test_query.py @@ -1,9 +1,8 @@ """Testing users/keycloak/query.py.""" import json -from unittest.mock import patch, create_autospec +from unittest.mock import create_autospec, patch import pytest - import users.keycloak.query as query import users.keycloak.schemas as schemas diff --git a/users/tests/keycloak/test_schemas.py b/users/tests/keycloak/test_schemas.py index 87026a1d1..b1d542415 100644 --- a/users/tests/keycloak/test_schemas.py +++ b/users/tests/keycloak/test_schemas.py @@ -1,6 +1,6 @@ -import users.keycloak.utils as kc_utils -import users.keycloak.schemas as kc_schemas import pytest +import users.keycloak.schemas as kc_schemas +import users.keycloak.utils as kc_utils user_1 = kc_schemas.User(username="user", id="1") user_2 = kc_schemas.User(username="u__r", id="2") diff --git a/users/tests/keycloak/test_utils.py b/users/tests/keycloak/test_utils.py index ae1f60428..070e271e1 100644 --- a/users/tests/keycloak/test_utils.py +++ b/users/tests/keycloak/test_utils.py @@ -1,6 +1,6 @@ +import pytest import users.keycloak.utils as kc_utils from users.schemas import Users -import pytest @pytest.fixture diff --git a/users/tests/test_main.py b/users/tests/test_main.py index 42c64e9ad..48ef902df 100644 --- a/users/tests/test_main.py +++ b/users/tests/test_main.py @@ -2,11 +2,10 @@ from unittest.mock import patch import pytest +import users.keycloak.schemas as kc_schemas from fastapi import HTTPException from fastapi.testclient import TestClient from tenant_dependency import TenantData - -import users.keycloak.schemas as kc_schemas from users.main import app, check_authorization, tenant client = TestClient(app) diff --git a/users/tests/test_utils.py b/users/tests/test_utils.py index 2c13f4cb8..c3426d602 100644 --- a/users/tests/test_utils.py +++ b/users/tests/test_utils.py @@ -1,6 +1,7 @@ from unittest.mock import patch import pytest + from users import utils diff --git a/users/users/keycloak/query.py b/users/users/keycloak/query.py index 4953687cb..2bc920b6f 100644 --- a/users/users/keycloak/query.py +++ b/users/users/keycloak/query.py @@ -5,6 +5,7 @@ import users.keycloak.resources as resources import users.keycloak.schemas as schemas from fastapi import HTTPException, status + from users import logger diff --git a/users/users/main.py b/users/users/main.py index 0ce47f175..6b3aaa468 100644 --- a/users/users/main.py +++ b/users/users/main.py @@ -14,15 +14,13 @@ from fastapi.security import OAuth2PasswordRequestForm from tenant_dependency import TenantData, get_tenant_info from urllib3.exceptions import MaxRetryError -from users import s3, utils -from users.config import ( - KEYCLOAK_ROLE_ADMIN, - KEYCLOAK_USERS_PUBLIC_KEY, - ROOT_PATH, -) +from users.config import (KEYCLOAK_ROLE_ADMIN, KEYCLOAK_USERS_PUBLIC_KEY, + ROOT_PATH) from users.logger import Logger from users.schemas import Users +from users import s3, utils + app = FastAPI(title="users", root_path=ROOT_PATH, version="0.1.2") realm = conf.KEYCLOAK_REALM minio_client = s3.get_minio_client() diff --git a/users/users/s3.py b/users/users/s3.py index 9f5d32d23..f6faab06f 100644 --- a/users/users/s3.py +++ b/users/users/s3.py @@ -2,6 +2,7 @@ from typing import Any, Dict, Optional from minio import Minio, credentials + from users import config, logger diff --git a/users/users/utils.py b/users/users/utils.py index 7a19410b0..d39e2849f 100644 --- a/users/users/utils.py +++ b/users/users/utils.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional from minio import Minio + from users import config diff --git a/web/local.env b/web/local.env new file mode 100644 index 000000000..0124d4af7 --- /dev/null +++ b/web/local.env @@ -0,0 +1,41 @@ +REACT_APP_FE_API_NAMESPACES_FILEMANAGEMENT=/docs + +BASE_URL=http://localhost + +ANNOTATION_PORT=8000 +ASSETS_PORT=8001 +CONVERT_PORT=8002 +JOBS_PORT=8003 +MODELS_PORT=8004 +PIPELINES_PORT=8005 +PROCESSING_PORT=8006 +SCHEDULER_PORT=8007 +SEARCH_PORT=8008 +TAXONOMY_PORT=8009 +USERS_PORT=8010 + +REACT_APP_FILEMANAGEMENT_API=${BASE_URL}:${ASSETS_PORT} +REACT_APP_JOB_API=${BASE_URL}:${JOBS_PORT} +REACT_APP_PIPELINES_API=${BASE_URL}:${PIPELINES_PORT} +REACT_APP_CATEGORIES_API=${BASE_URL}:${ANNOTATION_PORT} +REACT_APP_TOKENS_API=${BASE_URL}:${PROCESSING_PORT} +REACT_APP_AUTH_API=${BASE_URL}:8080 +REACT_APP_USERS_API=${BASE_URL}:${USERS_PORT} +REACT_APP_MODELS_API=${BASE_URL}:${MODELS_PORT} +REACT_APP_SEARCH_API=${BASE_URL}:${SEARCH_PORT} +REACT_APP_TAXONOMIES_API=${BASE_URL}:${TAXONOMY_PORT} + +REACT_APP_FILEMANAGEMENT_API_NAMESPACE=/ +REACT_APP_JOBMANAGER_API_NAMESPACE=/ +REACT_APP_PIPELINES_API_NAMESPACE=/ +REACT_APP_CATEGORIES_API_NAMESPACE=/ +REACT_APP_TOKENS_API_NAMESPACE=/ +REACT_APP_AUTH_API_NAMESPACE="/auth" +REACT_APP_USERS_API_NAMESPACE=/ +REACT_APP_MODELS_API_NAMESPACE=/ +REACT_APP_SEARCH_API_NAMESPACE=/ +REACT_APP_TAXONOMIES_API_NAMESPACE=/ + +REACT_APP_AUTH_CLIENT_ID=admin-cli + +REACT_APP_ALLOW_MOCKS=false