diff --git a/.github/workflows/annotation.yml b/.github/workflows/annotation.yml new file mode 100644 index 000000000..13d171148 --- /dev/null +++ b/.github/workflows/annotation.yml @@ -0,0 +1,73 @@ +name: annotation precommit and test +run-name: annotation precommit and test +on: + push +# paths: +# - 'annotation/**' +# pull_request: +# paths: +# - 'annotation/**' +jobs: + annotation-pre-commit-actions: + strategy: + matrix: + python-version: [ "3.8.15" ] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Run isort + uses: isort/isort-action@v1.1.0 + with: + configuration: + --profile=black + --filter-files + --line-length=79 + - name: Black + uses: psf/black@stable + with: + options: "--line-length=79" + - run: pip install flake8 + - run: flake8 --extend-ignore=E203 annotation + annotation-build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.8.15" ] + services: + postgres-postgresql: + image: postgres:13 + ports: + - 5432:5432 + env: + POSTGRES_DB: annotation + POSTGRES_HOST_AUTH_METHOD: trust + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install dependencies and run tests + run: | + python -m pip install --upgrade pip + cd annotation + pip install poetry + poetry install --no-root + poetry add --editable ../lib/filter_lib + poetry add --editable ../lib/tenants + poetry run alembic upgrade head + poetry run pytest + env: + POSTGRES_HOST: 127.0.0.1 + POSTGRES_PORT: 5432 + POSTGRES_DB: annotation + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres diff --git a/.github/workflows/assets.yml b/.github/workflows/assets.yml new file mode 100644 index 000000000..6b6868236 --- /dev/null +++ b/.github/workflows/assets.yml @@ -0,0 +1,58 @@ +name: assets precommit and test +run-name: assets precommit and test +on: + push: + paths: + - 'assets/**' + pull_request: + paths: + - 'assets/**' +jobs: + assets-pre-commit-actions: + strategy: + matrix: + python-version: [ "3.8.15" ] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Run isort + uses: isort/isort-action@v1.1.0 + with: + configuration: + --profile=black + --filter-files + --line-length=79 + - name: Black + uses: psf/black@stable + with: + options: "--line-length=79" + - run: pip install flake8 + - run: flake8 --extend-ignore=E203 assets + assets-build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.8.15" ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + cd assets + sudo apt-get update && sudo apt-get -y install poppler-utils + pip install -r requirements.txt + pip install -e ../lib/filter_lib + pip install -e ../lib/tenants + - name: Test with pytest + run: | + cd assets + pytest diff --git a/.github/workflows/filter-lib.yml b/.github/workflows/filter-lib.yml new file mode 100644 index 000000000..069252fce --- /dev/null +++ b/.github/workflows/filter-lib.yml @@ -0,0 +1,56 @@ +name: filter_lib precommit and test +run-name: filter_lib precommit and test +on: + push: + paths: + - 'lib/filter-lib/**' + pull_request: + paths: + - 'lib/filter-lib/**' +jobs: + filter-lib-pre-commit-actions: + strategy: + matrix: + python-version: [ "3.8.15" ] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Run isort + uses: isort/isort-action@v1.1.0 + with: + configuration: + --profile=black + --filter-files + --line-length=79 + - name: Black + uses: psf/black@stable + with: + options: "--line-length=79" + - run: pip install flake8 + - run: flake8 --extend-ignore=E203 lib/filter_lib + filter-lib-build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.8.15" ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + cd lib/filter_lib + pip install -r requirements.txt + pip install -r requirements-dev.txt + - name: Test with pytest + run: | + cd lib/filter_lib + pytest diff --git a/.github/workflows/tenants.yml b/.github/workflows/tenants.yml new file mode 100644 index 000000000..81fa48669 --- /dev/null +++ b/.github/workflows/tenants.yml @@ -0,0 +1,56 @@ +name: tenants precommit and test +run-name: tenants precommit and test +on: + push: + paths: + - 'lib/filter-lib/**' + pull_request: + paths: + - 'lib/filter-lib/**' +jobs: + tenants-pre-commit-actions: + strategy: + matrix: + python-version: [ "3.8.15" ] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Run isort + uses: isort/isort-action@v1.1.0 + with: + configuration: + --profile=black + --filter-files + --line-length=79 + - name: Black + uses: psf/black@stable + with: + options: "--line-length=79" + - run: pip install flake8 + - run: flake8 --extend-ignore=E203 lib/tenants + tenants-build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.8.15" ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + cd lib/tenants + pip install -r requirements.txt + pip install -r requirements-dev.txt + - name: Test with pytest + run: | + cd lib/tenants + pytest diff --git a/annotation/.env b/annotation/.env index 206472fa2..9ad5a784d 100644 --- a/annotation/.env +++ b/annotation/.env @@ -7,9 +7,12 @@ POSTGRES_PASSWORD="postgres" POSTGRES_DB="annotation" S3_ENDPOINT_URL="http://minio" +S3_PREFIX= S3_LOGIN="minioadmin" S3_PASS="minioadmin" S3_START_PATH="annotation" +# S3_CREDENTIALS_PROVIDER can be: "minio" (default), "aws_iam" +S3_CREDENTIALS_PROVIDER="minio" ASSETS_URL="http://assets/datasets" ASSETS_FILES_URL="http://assets/files/search" JOBS_SEARCH_URL="http://jobs/jobs/search" diff --git a/annotation/app/annotations/main.py b/annotation/app/annotations/main.py index 8bf3b9d86..c64aa1f1c 100644 --- a/annotation/app/annotations/main.py +++ b/annotation/app/annotations/main.py @@ -1,5 +1,4 @@ import json -import logging import os from datetime import datetime from hashlib import sha1 @@ -14,6 +13,7 @@ from sqlalchemy import asc from sqlalchemy.orm import Session +from app import logger from app.kafka_client import KAFKA_BOOTSTRAP_SERVER, KAFKA_SEARCH_TOPIC from app.kafka_client import producers as kafka_producers from app.models import AnnotatedDoc @@ -26,14 +26,20 @@ load_dotenv(find_dotenv()) ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL") +S3_PREFIX = os.environ.get("S3_PREFIX") AWS_ACCESS_KEY_ID = os.environ.get("S3_LOGIN") AWS_SECRET_ACCESS_KEY = os.environ.get("S3_PASS") INDEX_NAME = os.environ.get("INDEX_NAME") S3_START_PATH = os.environ.get("S3_START_PATH") +S3_CREDENTIALS_PROVIDER = os.environ.get("S3_CREDENTIALS_PROVIDER") MANIFEST = "manifest.json" LATEST = "latest" +logger_ = logger.Logger +logger_.debug("S3_PREFIX: %s", S3_PREFIX) + + def row_to_dict(row) -> dict: if hasattr(row, "__table__"): return { @@ -60,14 +66,40 @@ def row_to_dict(row) -> dict: } +def convert_bucket_name_if_s3prefix(bucket_name: str) -> str: + if S3_PREFIX: + return f"{S3_PREFIX}-{bucket_name}" + else: + return bucket_name + + +class NotConfiguredException(Exception): + pass + + def connect_s3(bucket_name: str) -> boto3.resource: - s3_resource = boto3.resource( - "s3", - endpoint_url=ENDPOINT_URL, - aws_access_key_id=AWS_ACCESS_KEY_ID, - aws_secret_access_key=AWS_SECRET_ACCESS_KEY, - ) + boto3_config = {} + if S3_CREDENTIALS_PROVIDER == "minio": + boto3_config.update( + { + "aws_access_key_id": AWS_ACCESS_KEY_ID, + "aws_secret_access_key": AWS_SECRET_ACCESS_KEY, + "endpoint_url": ENDPOINT_URL, + } + ) + elif S3_CREDENTIALS_PROVIDER == "aws_iam": + # No additional updates to config needed - boto3 uses env vars + ... + else: + raise NotConfiguredException( + "s3 connection is not properly configured - " + "s3_credentials_provider is not set" + ) + s3_resource = boto3.resource("s3", **boto3_config) + logger_.debug(f"S3_Credentials provider - {S3_CREDENTIALS_PROVIDER}") + try: + logger_.debug("Connecting to S3 bucket: %s", bucket_name) s3_resource.meta.client.head_bucket(Bucket=bucket_name) # here is some bug or I am missing smth: this line ^ # should raise NoSuchBucket @@ -84,7 +116,7 @@ def upload_pages_to_minio( pages: List[PageSchema], pages_sha: Dict[str, str], s3_path: str, - tenant: str, + bucket_name: str, s3_resource: boto3.resource, ) -> None: """ @@ -101,13 +133,15 @@ def upload_pages_to_minio( for page in pages: json_page = json.dumps(page.dict()) path_to_object = f"{s3_path}/{pages_sha[str(page.page_num)]}.json" - upload_json_to_minio(json_page, path_to_object, tenant, s3_resource) + upload_json_to_minio( + json_page, path_to_object, bucket_name, s3_resource + ) def upload_json_to_minio( json_obj: str, path_to_object: str, - tenant: str, + bucket_name: str, s3_resource: boto3.resource, ) -> None: """ @@ -119,7 +153,7 @@ def upload_json_to_minio( :param s3_resource: opened minio connection :return: None """ - s3_resource.Bucket(tenant).put_object( + s3_resource.Bucket(bucket_name).put_object( Body=json_obj, Key=path_to_object, ) @@ -174,7 +208,7 @@ def create_manifest_json( s3_path: str, s3_file_path: Optional[str], s3_file_bucket: Optional[str], - tenant: str, + bucket_name: str, job_id: int, file_id: int, doc_categories: Optional[List[str]], @@ -214,7 +248,9 @@ def create_manifest_json( manifest["categories"] = doc_categories manifest_json = json.dumps(manifest) - upload_json_to_minio(manifest_json, manifest_path, tenant, s3_resource) + upload_json_to_minio( + manifest_json, manifest_path, bucket_name, s3_resource + ) def construct_annotated_doc( @@ -325,12 +361,13 @@ def construct_annotated_doc( db.add(annotated_doc) db.flush() - s3_resource = connect_s3(tenant) + bucket_name = convert_bucket_name_if_s3prefix(tenant) + s3_resource = connect_s3(bucket_name) upload_pages_to_minio( pages=doc.pages, pages_sha=pages_sha, s3_path=s3_path, - tenant=tenant, + bucket_name=bucket_name, s3_resource=s3_resource, ) create_manifest_json( @@ -338,7 +375,7 @@ def construct_annotated_doc( s3_path, s3_file_path, s3_file_bucket, - tenant, + bucket_name, job_id, file_id, doc.categories, @@ -589,7 +626,7 @@ def find_latest_revision_pages( def load_page( s3_resource: boto3.resource, loaded_pages: List[Optional[LoadedPage]], - tenant: str, + bucket_name: str, page_num: int, user_id: str, page_revision: PageRevision, @@ -601,7 +638,7 @@ def load_page( f"{page_revision['file_id']}/{page_revision['page_id']}" ".json" ) - page_obj = s3_resource.Object(tenant, page_path) + page_obj = s3_resource.Object(bucket_name, page_path) loaded_page = json.loads(page_obj.get()["Body"].read().decode("utf-8")) else: loaded_page = { @@ -623,7 +660,8 @@ def get_file_manifest( job_id: str, file_id: str, tenant: str, s3_resource: boto3.resource ) -> Dict[str, Any]: manifest_path = f"{S3_START_PATH}/{job_id}/{file_id}/{MANIFEST}" - manifest_obj = s3_resource.Object(tenant, manifest_path) + bucket_name = convert_bucket_name_if_s3prefix(tenant) + manifest_obj = s3_resource.Object(bucket_name, manifest_path) return json.loads(manifest_obj.get()["Body"].read().decode("utf-8")) @@ -631,14 +669,15 @@ def load_all_revisions_pages( pages: Dict[int, List[PageRevision]], tenant: str, ): - s3_resource = connect_s3(tenant) + bucket_name = convert_bucket_name_if_s3prefix(tenant) + s3_resource = connect_s3(bucket_name) for page_num, page_revisions in pages.items(): loaded_pages = [] for page_revision in page_revisions: load_page( s3_resource, loaded_pages, - tenant, + bucket_name, page_num, page_revision["user_id"], page_revision, @@ -651,14 +690,15 @@ def load_latest_revision_pages( pages: Dict[int, Dict[str, LatestPageRevision]], tenant: str, ): - s3_resource = connect_s3(tenant) + bucket_name = convert_bucket_name_if_s3prefix(tenant) + s3_resource = connect_s3(bucket_name) for page_num, page_revisions in pages.items(): loaded_pages = [] for user_id, page_revision in page_revisions.items(): load_page( s3_resource, loaded_pages, - tenant, + bucket_name, page_num, user_id, page_revision, @@ -678,10 +718,11 @@ def load_annotated_pages_for_particular_rev( """ for page_num, page_id in revision.pages.items(): page_revision["page_id"] = page_id + bucket_name = convert_bucket_name_if_s3prefix(revision.tenant) load_page( s3_resource, loaded_pages, - revision.tenant, + bucket_name, page_num, revision.user, page_revision, @@ -703,10 +744,11 @@ def load_validated_pages_for_particular_rev( for page_num in revision.validated: if str(page_num) not in revision.pages: page_revision["page_id"] = None + bucket_name = convert_bucket_name_if_s3prefix(revision.tenant) load_page( s3_resource, loaded_pages, - revision.tenant, + bucket_name, page_num, revision.user, page_revision, @@ -737,7 +779,8 @@ def construct_particular_rev_response( "failed_validation_pages": [], } """ - s3_resource = connect_s3(revision.tenant) + bucket_name = convert_bucket_name_if_s3prefix(revision.tenant) + s3_resource = connect_s3(bucket_name) page_revision = { "job_id": revision.job_id, @@ -911,7 +954,7 @@ def _init_search_annotation_producer(): ) return producer except KafkaError as error: # KafkaError is parent of all kafka errors - logging.warning( + logger_.warning( f"Error occurred during kafka producer creating: {error}" ) diff --git a/annotation/app/categories/resources.py b/annotation/app/categories/resources.py index a4dd7c2a0..1aa824924 100644 --- a/annotation/app/categories/resources.py +++ b/annotation/app/categories/resources.py @@ -10,11 +10,10 @@ from app.errors import NoSuchCategoryError from app.filters import CategoryFilter from app.microservice_communication.search import X_CURRENT_TENANT_HEADER -from app.microservice_communication.taxonomy import link_category_with_taxonomy +from app.microservice_communication.taxonomy import delete_taxonomy_link from app.schemas import ( BadRequestErrorSchema, CategoryBaseSchema, - CategoryDataAttributeNames, CategoryInputSchema, CategoryResponseSchema, ConnectionErrorSchema, @@ -30,6 +29,7 @@ fetch_category_db, filter_category_db, insert_category_tree, + link_category_with_taxonomy, recursive_subcategory_search, response_object_from_db, update_category_db, @@ -58,27 +58,11 @@ def save_category( token: TenantData = Depends(TOKEN), ) -> CategoryResponseSchema: category_db = add_category_db(db, category, x_current_tenant) - if category_db.data_attributes: - taxonomy_link_params = {} - for data_attribute in category.data_attributes: - for attr_name, value in data_attribute.items(): - if attr_name in ( - CategoryDataAttributeNames.taxonomy_id.name, - CategoryDataAttributeNames.taxonomy_version.name, - ): - taxonomy_link_params[attr_name] = value - if taxonomy_link_params: - if ( - CategoryDataAttributeNames.taxonomy_id.name - not in taxonomy_link_params - ): - raise BadRequestErrorSchema("Taxonomy ID was not provided") - link_category_with_taxonomy( - category_id=category.id, - tenant=x_current_tenant, - token=token.token, - **taxonomy_link_params, - ) + link_category_with_taxonomy( + category_db=category_db, + x_current_tenant=x_current_tenant, + token=token, + ) return response_object_from_db(category_db) @@ -98,7 +82,9 @@ def fetch_category( x_current_tenant: str = X_CURRENT_TENANT_HEADER, ) -> CategoryResponseSchema: category_db = fetch_category_db(db, category_id, x_current_tenant) - category_response = insert_category_tree(db, category_db) + category_response = insert_category_tree( + db, category_db, tenant=x_current_tenant + ) return category_response @@ -171,6 +157,7 @@ def update_category( category_id: str = Path(..., example="1"), db: Session = Depends(get_db), x_current_tenant: str = X_CURRENT_TENANT_HEADER, + token: TenantData = Depends(TOKEN), ) -> CategoryResponseSchema: """ Updates category by id and returns updated category. @@ -180,6 +167,11 @@ def update_category( ) if not category_db: raise NoSuchCategoryError("Cannot update category parameters") + link_category_with_taxonomy( + category_db=category_db, + x_current_tenant=x_current_tenant, + token=token, + ) return response_object_from_db(category_db) @@ -195,6 +187,8 @@ def delete_category( category_id: str = Path(..., example="1"), db: Session = Depends(get_db), x_current_tenant: str = X_CURRENT_TENANT_HEADER, + token: TenantData = Depends(TOKEN), ) -> Response: + delete_taxonomy_link(category_id, x_current_tenant, token) delete_category_db(db, category_id, x_current_tenant) return Response(status_code=status.HTTP_204_NO_CONTENT) diff --git a/annotation/app/categories/services.py b/annotation/app/categories/services.py index 2c2b11b38..493c4ee2f 100644 --- a/annotation/app/categories/services.py +++ b/annotation/app/categories/services.py @@ -1,4 +1,3 @@ -import logging import uuid from typing import Dict, List, Optional, Set, Tuple, Union @@ -9,7 +8,9 @@ from sqlalchemy.orm import Session from sqlalchemy.sql.expression import func from sqlalchemy_utils import Ltree +from tenant_dependency import TenantData +from app import logger as app_logger from app.errors import ( CheckFieldError, ForeignKeyError, @@ -17,8 +18,10 @@ SelfParentError, ) from app.filters import CategoryFilter +from app.microservice_communication.taxonomy import send_category_taxonomy_link from app.models import Category, Job from app.schemas import ( + CategoryDataAttributeNames, CategoryInputSchema, CategoryORMSchema, CategoryResponseSchema, @@ -27,22 +30,46 @@ cache = TTLCache(maxsize=128, ttl=300) -logger = logging.getLogger(__name__) +logger = app_logger.Logger + + +def is_category_leaf(db: Session, category: Category, tenant: str) -> bool: + return not ( + db.query(Category.id) + .filter( + and_( + Category.parent == category.id, + or_(Category.tenant == tenant, Category.tenant == null()), + ) + ) + .first() + ) + + +def set_parents_is_leaf( + category_db: Category, + parents: Optional[List[CategoryResponseSchema]] = None, + is_leaf: bool = False, +) -> CategoryResponseSchema: + if parents is None: + parents = [] + category_response = response_object_from_db(category_db) + category_response.is_leaf = is_leaf + category_response.parents = parents + return category_response def insert_category_tree( - db: Session, category_db: Category + db: Session, category_db: Category, tenant: str ) -> CategoryResponseSchema: parents = fetch_category_parents(db, category_db) - children = fetch_category_children(db, category_db) + is_leaf = is_category_leaf(db, category_db, tenant) category_response = response_object_from_db(category_db) if category_response.parent: category_response.parents = [ - response_object_from_db(category) for category in parents + set_parents_is_leaf(category) for category in parents ] - category_response.children = [ - response_object_from_db(category) for category in children - ] + category_response.is_leaf = is_leaf return category_response @@ -70,7 +97,7 @@ def add_category_db( tree = Ltree(f"{id_}") category = Category( - id=(id_ or str(uuid.uuid4())), + id=id_, name=name, tenant=tenant, parent=parent if parent != "null" else None, @@ -179,12 +206,6 @@ def recursive_subcategory_search( return child_categories -# Turn off important check on job id -# TODO: Remove this patch BEFORE RELEASE!!! -# https://github.com/epam/badgerdoc/issues/2 -TEMP_PATCH_EXCLUDE_DIFF = True - - def fetch_bunch_categories_db( db: Session, category_ids: Set[str], @@ -214,7 +235,7 @@ def fetch_bunch_categories_db( category.id for category in categories }.symmetric_difference(category_ids) - if not TEMP_PATCH_EXCLUDE_DIFF and wrong_categories: + if wrong_categories: error_message = ", ".join(sorted(wrong_categories)) raise NoSuchCategoryError(f"No such categories: {error_message}") return categories @@ -275,8 +296,10 @@ def _get_parents( uniq_pathes = set() for cat in categories: - uniq_pathes.add(cat.tree.path) - uniq_cats = uniq_cats.union({tree.path for tree in cat.tree}) + # if we pass root categories it causes exception. + if cat.tree is not None: + uniq_pathes.add(cat.tree.path) + uniq_cats = uniq_cats.union({tree.path for tree in cat.tree}) category_to_object = { cat.id: cat @@ -419,3 +442,23 @@ def delete_category_db(db: Session, category_id: str, tenant: str) -> None: raise CheckFieldError("Cannot delete default category.") db.delete(category) db.commit() + + +def link_category_with_taxonomy( + category_db: Category, + x_current_tenant: str, + token: TenantData, +): + if category_db.data_attributes: + taxonomy_link_params = [] + for data_attribute in category_db.data_attributes: + if CategoryDataAttributeNames.validate_schema(data_attribute): + taxonomy_link_params.append(data_attribute) + + if taxonomy_link_params: + send_category_taxonomy_link( + category_id=category_db.id, + tenant=x_current_tenant, + token=token.token, + taxonomy_link_params=taxonomy_link_params, + ) diff --git a/annotation/app/database.py b/annotation/app/database.py index 4c41e592b..0d37ebbb3 100644 --- a/annotation/app/database.py +++ b/annotation/app/database.py @@ -1,6 +1,5 @@ import os -import psycopg2 import sqlalchemy from dotenv import find_dotenv, load_dotenv from sqlalchemy import create_engine diff --git a/annotation/app/errors.py b/annotation/app/errors.py index 8e56e6708..07a614212 100644 --- a/annotation/app/errors.py +++ b/annotation/app/errors.py @@ -1,6 +1,9 @@ +from typing import Union + from botocore.exceptions import BotoCoreError, ClientError from fastapi.requests import Request from fastapi.responses import JSONResponse +from requests import RequestException from sqlalchemy.exc import DBAPIError, SQLAlchemyError @@ -43,6 +46,11 @@ def __init__(self, message: str): self.message = message +class TaxonomyLinkException(Exception): + def __init__(self, exc_info: Union[str, RequestException]): + self.exc_info = exc_info + + def no_such_revisions_error_handler( request: Request, exc: NoSuchRevisionsError ): @@ -133,3 +141,10 @@ def category_parent_child_error_handler( status_code=400, content={"detail": f"Self parent error. {exc.message}"}, ) + + +def taxonomy_link_error_handler(request: Request, exc: TaxonomyLinkException): + return JSONResponse( + status_code=400, + content={"detail": f"Taxonomy link error. {exc.exc_info}"}, + ) diff --git a/annotation/app/jobs/resources.py b/annotation/app/jobs/resources.py index 05940c70d..880a11fba 100644 --- a/annotation/app/jobs/resources.py +++ b/annotation/app/jobs/resources.py @@ -1,16 +1,7 @@ -import logging from typing import Dict, List, Optional, Set, Union from uuid import UUID -from fastapi import ( - APIRouter, - Depends, - HTTPException, - Path, - Query, - Response, - status, -) +from fastapi import APIRouter, Depends, HTTPException, Path, Query, Response, status from filter_lib import Page from sqlalchemy import and_ from sqlalchemy.orm import Session @@ -19,6 +10,7 @@ from tenant_dependency import TenantData import app.categories.services +from app import logger as app_logger from app.categories import fetch_bunch_categories_db from app.database import get_db from app.distribution import distribute @@ -71,7 +63,7 @@ update_jobs_users, ) -logger = logging.getLogger(__name__) +logger = app_logger.Logger router = APIRouter( prefix="/jobs", diff --git a/annotation/app/logger.py b/annotation/app/logger.py new file mode 100644 index 000000000..a736eb903 --- /dev/null +++ b/annotation/app/logger.py @@ -0,0 +1,10 @@ +import logging + +_log_format = ( + "%(asctime)s - [%(levelname)s] - %(name)s - " + "(%(filename)s).%(funcName)s(%(lineno)d) - %(message)s" +) +_datefmt = "%d-%b-%y %H:%M:%S" + +logging.basicConfig(level=logging.INFO, format=_log_format, datefmt=_datefmt) +Logger = logging.getLogger(__name__) diff --git a/annotation/app/main.py b/annotation/app/main.py index d8e0169a2..75895e252 100644 --- a/annotation/app/main.py +++ b/annotation/app/main.py @@ -17,6 +17,7 @@ NoSuchCategoryError, NoSuchRevisionsError, SelfParentError, + TaxonomyLinkException, WrongJobError, category_foreign_key_error_handler, category_parent_child_error_handler, @@ -29,6 +30,7 @@ minio_no_such_bucket_error_handler, no_such_category_error_handler, no_such_revisions_error_handler, + taxonomy_link_error_handler, wrong_job_error_handler, ) from app.jobs import resources as jobs_resources @@ -84,3 +86,4 @@ def get_version() -> str: app.add_exception_handler(SQLAlchemyError, db_sa_error_handler) app.add_exception_handler(DBAPIError, db_dbapi_error_handler) app.add_exception_handler(SelfParentError, category_parent_child_error_handler) +app.add_exception_handler(TaxonomyLinkException, taxonomy_link_error_handler) diff --git a/annotation/app/microservice_communication/taxonomy.py b/annotation/app/microservice_communication/taxonomy.py index 9b11b8b1f..089f3206f 100644 --- a/annotation/app/microservice_communication/taxonomy.py +++ b/annotation/app/microservice_communication/taxonomy.py @@ -1,10 +1,11 @@ import os -from typing import Optional, Union +from typing import List import requests from dotenv import find_dotenv, load_dotenv from requests import RequestException +from app.errors import TaxonomyLinkException from app.microservice_communication.search import ( AUTHORIZATION, BEARER, @@ -15,24 +16,15 @@ TAXONOMY_URL = os.environ.get("TAXONOMY_URL") -class TaxonomyLinkException(Exception): - def __init__(self, exc_info: Union[str, RequestException]): - self.exc_info = exc_info - - -def link_category_with_taxonomy( +def send_category_taxonomy_link( category_id: str, - taxonomy_id: str, tenant: str, token: str, - taxonomy_version: Optional[int] = None, + taxonomy_link_params: List[dict], ): - response_body = { - "category_id": category_id, - "taxonomy_id": taxonomy_id, - } - if taxonomy_version is not None: - response_body["taxonomy_version"] = taxonomy_version + request_body = [ + {"category_id": category_id, **param} for param in taxonomy_link_params + ] try: response = requests.post( "{url}/link_category".format(url=TAXONOMY_URL), @@ -40,10 +32,30 @@ def link_category_with_taxonomy( HEADER_TENANT: tenant, AUTHORIZATION: f"{BEARER} {token}", }, - json=response_body, + json=request_body, timeout=5, ) if response.status_code != 201: - raise TaxonomyLinkException(response.text) + raise TaxonomyLinkException(response.json()["detail"]) + except RequestException as exc: + raise TaxonomyLinkException(exc) + + +def delete_taxonomy_link( + category_id: str, + tenant: str, + token: str, +): + try: + response = requests.delete( + f"{TAXONOMY_URL}/link_category/{category_id}", + headers={ + HEADER_TENANT: tenant, + AUTHORIZATION: f"{BEARER} {token}", + }, + timeout=5, + ) + if response.status_code != 204: + raise TaxonomyLinkException(response.json()["detail"]) except RequestException as exc: raise TaxonomyLinkException(exc) diff --git a/annotation/app/schemas/categories.py b/annotation/app/schemas/categories.py index 16a47da04..7a5ef734f 100644 --- a/annotation/app/schemas/categories.py +++ b/annotation/app/schemas/categories.py @@ -11,8 +11,21 @@ class CategoryTypeSchema(str, Enum): class CategoryDataAttributeNames(str, Enum): - taxonomy_id = 'taxonomy_id' - taxonomy_version = 'taxonomy_version' + taxonomy_id: str = "taxonomy_id" + taxonomy_version: Optional[str] = "taxonomy_version" + + @classmethod + def validate_schema(cls, schema: dict) -> bool: + if not schema: + return False + + for attr in schema: + if attr not in cls.__members__.keys(): + return False + + if not schema.get("taxonomy_id"): + return False + return True class CategoryBaseSchema(BaseModel): diff --git a/annotation/chart/templates/_helpers.tpl b/annotation/chart/templates/_helpers.tpl new file mode 100644 index 000000000..0ae93dc89 --- /dev/null +++ b/annotation/chart/templates/_helpers.tpl @@ -0,0 +1,6 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "svc.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} \ No newline at end of file diff --git a/annotation/chart/templates/deployment.yaml b/annotation/chart/templates/deployment.yaml index 58f56ab48..feac166c2 100644 --- a/annotation/chart/templates/deployment.yaml +++ b/annotation/chart/templates/deployment.yaml @@ -4,40 +4,61 @@ metadata: annotations: deployment.kubernetes.io/revision: "1" labels: - app: annotation - name: annotation + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: progressDeadlineSeconds: 600 - replicas: 1 + replicas: {{ default .Values.global.replicaCount .Values.replicaCount }} revisionHistoryLimit: 10 selector: matchLabels: - app: annotation + app: {{ template "svc.name" . }} strategy: - rollingUpdate: - maxSurge: 25% - maxUnavailable: 25% - type: RollingUpdate +{{ .Values.updateStrategy | default .Values.global.updateStrategy | toYaml | indent 4 }} template: metadata: - annotations: - sidecar.istio.io/inject: "false" - creationTimestamp: null labels: - app: annotation + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- if .Values.labels }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.global.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + annotations: + {{- if .Values.podAnnotation }} + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else}} + {{- range $key, $value := .Values.global.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} spec: + affinity: +{{ .Values.affinity | default .Values.global.affinity | toYaml | indent 8 }} + nodeSelector: +{{ .Values.nodeSelector | default .Values.global.nodeSelector | toYaml | indent 8 }} + tolerations: +{{ .Values.tolerations | default .Values.global.tolerations | toYaml | indent 8 }} containers: - - image: {{ .Values.image.registry }}/annotation:{{ .Values.image.tag }} - imagePullPolicy: IfNotPresent - name: annotation + - image: "{{ .Values.imageName }}:{{ default .Values.global.imageTag .Values.imageTag }}" + imagePullPolicy: "{{ default .Values.global.imagePullPolicy .Values.imagePullPolicy }}" + name: {{ template "svc.name" . }} resources: - requests: - memory: "200Mi" - cpu: "200m" - limits: - memory: "1000Mi" - cpu: "400m" +{{ .Values.resources | default .Values.global.resources | toYaml | indent 10 }} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File env: @@ -52,27 +73,39 @@ spec: name: annotation key: POSTGRES_PASSWORD - name: POSTGRES_PORT - value: "5432" + value: "{{ default .Values.global.dbPort .Values.dbPort }}" - name: POSTGRES_DB - value: "annotation" + value: "{{ .Values.dbName }}" - name: POSTGRES_HOST - value: "postgres-postgresql" + value: "{{ default .Values.global.dbHost .Values.dbHost }}" + - name: S3_CREDENTIALS_PROVIDER + value: "{{ default .Values.global.s3CredentialsProvider .Values.s3CredentialsProvider }}" - name: S3_ENDPOINT_URL - value: "http://minio" + value: "{{ default .Values.global.s3Endpoint .Values.s3Endpoint }}" + {{- if .Values.secret.s3user }} - name: S3_LOGIN valueFrom: secretKeyRef: name: annotation key: S3_LOGIN + {{- end }} + {{- if .Values.secret.s3password }} - name: S3_PASS valueFrom: secretKeyRef: name: annotation - key: S3_PASS + key: S3_PASS + {{- end }} +{{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 8 }} +{{- end }} command: ["/bin/sh"] args: ["-c", "alembic upgrade head && uvicorn app.main:app --host 0.0.0.0 --port 8080 --root-path /api/v1/annotation"] + serviceAccountName: {{ default .Values.global.serviceAccountName .Values.serviceAccountName }} + automountServiceAccountToken: {{ default .Values.global.automountToken .Values.automountToken }} dnsPolicy: ClusterFirst restartPolicy: Always - schedulerName: default-scheduler - securityContext: {} + schedulerName: {{ default .Values.global.schedulerName .Values.schedulerName }} + securityContext: +{{ toYaml .Values.securityContext | indent 8 }} terminationGracePeriodSeconds: 30 diff --git a/annotation/chart/templates/mapping.yaml b/annotation/chart/templates/mapping.yaml index ea01a3bbf..f227c23c1 100644 --- a/annotation/chart/templates/mapping.yaml +++ b/annotation/chart/templates/mapping.yaml @@ -1,25 +1,25 @@ apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: annotation + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: - host: {{ .Release.Namespace }}.badgerdoc.com + host: {{ default .Values.host .Values.global.host }} connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/annotation/ - service: annotation + service: {{ template "svc.name" . }} timeout_ms: 30000 --- apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: annotation-internal + name: {{ template "svc.name" . }}-internal namespace: {{ .Release.Namespace }} spec: host: ambassador connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/annotation/ - service: annotation + service: {{ template "svc.name" . }} timeout_ms: 30000 diff --git a/annotation/chart/templates/secret.yaml b/annotation/chart/templates/secret.yaml new file mode 100644 index 000000000..7d8a1a0d2 --- /dev/null +++ b/annotation/chart/templates/secret.yaml @@ -0,0 +1,22 @@ +{{- if .Values.secret.enabled -}} +apiVersion: v1 +kind: Secret +metadata: + name: "{{ template "svc.name" . }}" + namespace: {{ .Release.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ template "svc.name" . }}" + app: "{{ template "svc.name" . }}" +type: Opaque +data: + POSTGRES_USER: {{ .Values.secret.dbuser | b64enc }} + POSTGRES_PASSWORD: {{ .Values.secret.dbpassword | b64enc }} + {{- if .Values.secret.s3user }} + S3_LOGIN: {{ .Values.secret.s3user | b64enc }} + {{- end }} + {{- if .Values.secret.s3password }} + S3_PASS: {{ .Values.secret.s3password | b64enc }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/annotation/chart/templates/service.yaml b/annotation/chart/templates/service.yaml index 593af90ec..8fa8eab1d 100644 --- a/annotation/chart/templates/service.yaml +++ b/annotation/chart/templates/service.yaml @@ -2,18 +2,18 @@ apiVersion: v1 kind: Service metadata: labels: - app: annotation - name: annotation + app: "{{ template "svc.name" . }}" + name: "{{ template "svc.name" . }}" namespace: {{ .Release.Namespace }} spec: ipFamilies: - IPv4 ipFamilyPolicy: SingleStack ports: - - port: 80 - protocol: TCP - targetPort: 8080 + - port: {{ default .Values.global.servicePort .Values.servicePort }} + protocol: TCP + targetPort: 8080 selector: - app: annotation + app: "{{ template "svc.name" . }}" sessionAffinity: None - type: ClusterIP + type: "{{ default .Values.global.serviceType .Values.serviceType }}" \ No newline at end of file diff --git a/annotation/chart/values-c.yaml b/annotation/chart/values-c.yaml new file mode 100644 index 000000000..06ad077ee --- /dev/null +++ b/annotation/chart/values-c.yaml @@ -0,0 +1,62 @@ +extraEnvs: [] + +host: example.com + +image: + registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc + tag: latest + +imagePullPolicy: Always + +podAnnotations: + sidecar.istio.io/inject: "false" +labels: + app: annotation + +replicaCount: 1 + +resources: + requests: + memory: "200Mi" + cpu: "200m" + limits: + memory: "1000Mi" + cpu: "400m" + +rbac: + serviceAccountName: null + automountToken: false + +schedulerName: default-scheduler + +s3: + endpoint: "http://minio" + +db: + host: "postgres-postgresql" + port: "5432" + name: "annotation" + +nameOverride: "" + +affinity: {} +nodeSelector: {} +tolerations: [] + +secret: + enabled: true + dbuser: "postgres" + dbpassword: "postgres" + s3user: "serviceuser" + s3password: "12345678" + +securityContext: {} + +servicePort: 80 +serviceType: ClusterIP + +updateStrategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate \ No newline at end of file diff --git a/annotation/tests/test_category_crud.py b/annotation/tests/test_category_crud.py index 2b15a4dcb..d03d4a618 100644 --- a/annotation/tests/test_category_crud.py +++ b/annotation/tests/test_category_crud.py @@ -1,7 +1,8 @@ import uuid from json import loads from typing import Any, List, Optional, Tuple, Union -from unittest.mock import patch +from unittest.mock import Mock, patch +from uuid import UUID import pytest from fastapi.testclient import TestClient @@ -14,6 +15,22 @@ client = TestClient(app) +ATTRIBUTES_NOT_IN_CATEGORY_MODEL = ("is_leaf",) +NOT_FULLY_TEST_SUPPORTED_CATEGORY_ATTRIBUTES = ( + "parents", + "is_leaf", + "id", +) + + +def clean_data_for_db(data): + cleaned_data = { + key: value + for key, value in data.items() + if key not in ATTRIBUTES_NOT_IN_CATEGORY_MODEL + } + return cleaned_data + def prepare_category_body( id_: Optional[str] = None, @@ -121,14 +138,19 @@ def prepare_expected_result( response_map = loads(response) if isinstance(response, str) else response if not with_category_id: response_map["id"] = None - return {key: response_map[key] for key in sorted(response_map)} + return { + key: response_map[key] + for key in sorted(response_map) + if key not in NOT_FULLY_TEST_SUPPORTED_CATEGORY_ATTRIBUTES + } -def prepare_category_response( - data: dict, parents: List[dict] = [], children: List[dict] = [] -) -> dict: - data["parents"] = parents - data["children"] = children +def prepare_category_response(data: dict) -> dict: + data = { + key: value + for key, value in data.items() + if key not in NOT_FULLY_TEST_SUPPORTED_CATEGORY_ATTRIBUTES + } return data @@ -299,7 +321,7 @@ def test_add_id_is_unique(prepare_db_categories_different_names): @mark.integration -@patch("uuid.uuid4", return_value="fe857daa-8332-4a26-ab50-29be0a74477e") +@patch("uuid.uuid4", return_value=UUID("fe857daa-8332-4a26-ab50-29be0a74477e")) def test_add_id_is_generated(prepare_db_categories_different_names): data = prepare_category_body() response = client.post(CATEGORIES_PATH, json=data, headers=TEST_HEADERS) @@ -315,9 +337,7 @@ def test_add_id_is_generated(prepare_db_categories_different_names): @patch("app.categories.resources.link_category_with_taxonomy") @patch("uuid.uuid4", return_value="fe857daa-8332-4a26-ab50-29be0a74477e") def test_should_send_link_request_taxonomy_service( - uuid_mock, - link_request_mock, - prepare_db_categories_different_names + uuid_mock, link_request_mock, prepare_db_categories_different_names ): data = prepare_category_body( id_="1213", @@ -477,9 +497,12 @@ def test_search_allowed_categories( ) category = response.json()["data"][0] assert response.status_code == 200 - assert prepare_expected_result(category) == prepare_category_response( - expected - ) + + prepared_category_response = prepare_category_response(expected) + prepared_expected_result = prepare_expected_result(category) + + for key in prepared_expected_result: + assert prepared_category_response[key] == prepared_expected_result[key] @mark.integration @@ -763,7 +786,9 @@ def test_update_allowed_parent( cat_id = "1" data_add = prepare_category_body(name="Footer") data_add["id"] = category_parent - prepare_db_categories_different_names.merge(Category(**data_add)) + prepare_db_categories_different_names.merge( + Category(**clean_data_for_db(data_add)) + ) prepare_db_categories_different_names.commit() data_update = prepare_category_body(parent=category_parent) response = client.put( @@ -779,6 +804,7 @@ def test_update_allowed_parent( @patch( "app.categories.resources.delete_category_db", side_effect=SQLAlchemyError ) +@patch("app.categories.resources.delete_taxonomy_link", Mock) def test_delete_db_connection_error(prepare_db_categories_same_names): cat_id = "1" response = client.delete( @@ -793,6 +819,7 @@ def test_delete_db_connection_error(prepare_db_categories_same_names): "category_id", ("3", "100"), # category from other tenant and category that doesn't exist ) +@patch("app.categories.resources.delete_taxonomy_link", Mock) def test_delete_wrong_category( category_id, prepare_db_categories_same_names, @@ -806,6 +833,7 @@ def test_delete_wrong_category( @mark.integration +@patch("app.categories.resources.delete_taxonomy_link", Mock) def test_delete_common_category(prepare_db_categories_same_names): cat_id = "2" response = client.delete( @@ -816,6 +844,7 @@ def test_delete_common_category(prepare_db_categories_same_names): @mark.integration +@patch("app.categories.resources.delete_taxonomy_link", Mock) def test_delete_tenant_category(prepare_db_categories_same_names): cat_id = "1" response = client.delete( @@ -832,6 +861,7 @@ def test_delete_tenant_category(prepare_db_categories_same_names): @mark.integration @mark.parametrize("add_for_cascade_delete", ["1"], indirect=True) +@patch("app.categories.resources.delete_taxonomy_link", Mock) def test_cascade_delete_tenant_parent(add_for_cascade_delete): cat_id = "1" child_1, child_2 = add_for_cascade_delete diff --git a/annotation/tests/test_get_accumulated_revisions.py b/annotation/tests/test_get_accumulated_revisions.py index 3a88d061c..0f2fb0019 100644 --- a/annotation/tests/test_get_accumulated_revisions.py +++ b/annotation/tests/test_get_accumulated_revisions.py @@ -123,6 +123,7 @@ def reformat_date(date: str): pages=[], validated=[], failed_validation_pages=[], + categories=None, ) LATEST_WITH_ALL_PAGES = dict( revision=DOCS[2].revision, @@ -145,6 +146,7 @@ def reformat_date(date: str): ], validated=[3, 4, 5], failed_validation_pages=[1], + categories=["test_category_1", "test_category_2"], ) @@ -187,6 +189,7 @@ def reformat_date(date: str): ], validated=[5], failed_validation_pages=[1], + categories=["test_category_1", "test_category_2"], ), ), # find first revision and accumulate @@ -214,6 +217,7 @@ def reformat_date(date: str): ], validated=[3], failed_validation_pages=[4], + categories=["test_category_1", "test_category_2"], ), ), # find first revision and accumulate @@ -240,6 +244,7 @@ def reformat_date(date: str): ], validated=[3], failed_validation_pages=[], + categories=["test_category_1", "test_category_2"], ), ), # find second revision and accumulate @@ -267,6 +272,7 @@ def reformat_date(date: str): ], validated=[3, 4], failed_validation_pages=[1], + categories=["test_category_1", "test_category_2"], ), ), # find second revision and accumulate @@ -285,6 +291,7 @@ def reformat_date(date: str): pages=[], validated=[], failed_validation_pages=[], + categories=["test_category_1", "test_category_2"], ), ), # if revisions were not found, @@ -338,6 +345,12 @@ def test_get_annotation_for_latest_revision_status_codes( "app.annotations.main.connect_s3", Mock(return_value=minio_accumulate_revisions), ) + monkeypatch.setattr( + "app.annotations.main.get_file_manifest", + Mock( + return_value={"categories": ["test_category_1", "test_category_2"]} + ), + ) params = {"page_numbers": page_numbers} response = client.get( diff --git a/annotation/tests/test_get_annotation_for_particular_revision.py b/annotation/tests/test_get_annotation_for_particular_revision.py index 7a21b171a..a2d0e48c2 100644 --- a/annotation/tests/test_get_annotation_for_particular_revision.py +++ b/annotation/tests/test_get_annotation_for_particular_revision.py @@ -84,6 +84,7 @@ "pages": PART_REV_PAGES, "validated": PART_REV_DOC.validated, "failed_validation_pages": PART_REV_DOC.failed_validation_pages, + "categories": None, } @@ -128,7 +129,10 @@ def test_get_annotation_for_particular_revision_status_codes( "app.annotations.main.connect_s3", Mock(return_value=minio_particular_revision), ) - + monkeypatch.setattr( + "app.annotations.main.get_file_manifest", + Mock(return_value={}), + ) response = client.get( construct_part_rev_path( PART_REV_DOC.job_id, file_id, PART_REV_DOC.revision @@ -138,7 +142,6 @@ def test_get_annotation_for_particular_revision_status_codes( AUTHORIZATION: f"{BEARER} {TEST_TOKEN}", }, ) - assert response.status_code == expected_code assert response.json() == expected_response diff --git a/annotation/tests/test_job_categories.py b/annotation/tests/test_job_categories.py index 23d2d55de..10333f24c 100644 --- a/annotation/tests/test_job_categories.py +++ b/annotation/tests/test_job_categories.py @@ -32,6 +32,13 @@ client = TestClient(app) +NOT_FULLY_TEST_SUPPORTED_CATEGORY_ATTRIBUTES = ( + "parents", + "is_leaf", + "id", +) + + def prepare_job_body( categories: List[str], callback_url: Optional[str] = "http://datasetmanager.com", @@ -64,8 +71,8 @@ def prepare_get_result( current_page: Optional[int] = 1, page_size: Optional[int] = 50, total_objects: Optional[int] = 16, - parents=[], - is_leaf=True, + parents=None, + is_leaf=None, ) -> dict: categories = [] for cat_id, cat_name in categories_ids_names: @@ -126,11 +133,19 @@ def prepare_expected_result( response_map = loads(response) if isinstance(response, str) else response if not with_category_id: response_map["id"] = None - return {key: response_map[key] for key in sorted(response_map)} + return { + key: response_map[key] + for key in sorted(response_map) + if key not in NOT_FULLY_TEST_SUPPORTED_CATEGORY_ATTRIBUTES + } -def prepare_category_response(data: dict, parents: List[dict] = []) -> dict: - data["parents"] = parents +def prepare_category_response(data: dict) -> dict: + data = { + key: value + for key, value in data.items() + if key not in NOT_FULLY_TEST_SUPPORTED_CATEGORY_ATTRIBUTES + } return data diff --git a/assets/.env b/assets/.env index 70bc115b2..ff15beaff 100644 --- a/assets/.env +++ b/assets/.env @@ -13,6 +13,8 @@ POSTGRES_HOST=localhost POSTGRES_PORT=5432 DATABASE_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}" +# S3_CREDENTIALS_PROVIDER can be: minio (default), aws_iam, aws_env, aws_config +S3_CREDENTIALS_PROVIDER=minio S3_PREFIX= S3_ENDPOINT=minio:9000 S3_ACCESS_KEY=minioadmin diff --git a/assets/alembic/env.py b/assets/alembic/env.py index 979d996f1..ff945ac7a 100644 --- a/assets/alembic/env.py +++ b/assets/alembic/env.py @@ -18,7 +18,7 @@ # for 'autogenerate' support # from myapp import mymodel # target_metadata = mymodel.Base.metadata -from src.db.models import Base +from src.db.models import Base # noqa target_metadata = Base.metadata diff --git a/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py b/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py index d82635fe1..dd03ad5ff 100644 --- a/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py +++ b/assets/alembic/versions/0f6c859c1d1c_add_original_ext_column_to_files_table.py @@ -9,10 +9,10 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. -from sqlalchemy.orm import Session +from sqlalchemy.orm import Session # noqa from alembic import op -from src.db.models import FileObject +from src.db.models import FileObject # noqa revision = "0f6c859c1d1c" down_revision = "fe5926249504" diff --git a/assets/chart/templates/deployment.yaml b/assets/chart/templates/deployment.yaml index 6c9700661..27269556e 100644 --- a/assets/chart/templates/deployment.yaml +++ b/assets/chart/templates/deployment.yaml @@ -4,40 +4,61 @@ metadata: annotations: deployment.kubernetes.io/revision: "1" labels: - app: assets - name: assets + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: progressDeadlineSeconds: 600 - replicas: 1 + replicas: {{ default .Values.global.replicaCount .Values.replicaCount }} revisionHistoryLimit: 10 selector: matchLabels: - app: assets + app: {{ template "svc.name" . }} strategy: - rollingUpdate: - maxSurge: 25% - maxUnavailable: 25% - type: RollingUpdate +{{ .Values.updateStrategy | default .Values.global.updateStrategy | toYaml | indent 4 }} template: metadata: - annotations: - sidecar.istio.io/inject: "false" - creationTimestamp: null labels: - app: assets + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- if .Values.labels }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.global.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + annotations: + {{- if .Values.podAnnotation }} + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else}} + {{- range $key, $value := .Values.global.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} spec: + affinity: +{{ .Values.affinity | default .Values.global.affinity | toYaml | indent 8 }} + nodeSelector: +{{ .Values.nodeSelector | default .Values.global.nodeSelector | toYaml | indent 8 }} + tolerations: +{{ .Values.tolerations | default .Values.global.tolerations | toYaml | indent 8 }} containers: - - image: {{ .Values.image.registry }}/assets:{{ .Values.image.tag }} - imagePullPolicy: IfNotPresent - name: assets + - image: "{{ .Values.imageName }}:{{ default .Values.global.imageTag .Values.imageTag }}" + imagePullPolicy: "{{ default .Values.global.imagePullPolicy .Values.imagePullPolicy }}" + name: {{ template "svc.name" . }} resources: - requests: - memory: "200Mi" - cpu: "200m" - limits: - memory: "1000Mi" - cpu: "500m" +{{ .Values.resources | default .Values.global.resources | toYaml | indent 10 }} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File env: @@ -46,37 +67,34 @@ spec: secretKeyRef: name: assets key: DATABASE_URL - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: assets - key: POSTGRES_USER - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: assets - key: POSTGRES_PASSWORD + - name: S3_CREDENTIALS_PROVIDER + value: "{{ default .Values.global.s3CredentialsProvider .Values.s3CredentialsProvider }}" - name: S3_ENDPOINT - value: "minio:80" + value: "{{ default .Values.global.s3Endpoint .Values.s3Endpoint }}" + {{- if .Values.secret.s3user }} - name: S3_ACCESS_KEY valueFrom: secretKeyRef: - name: assets - key: S3_ACCESS_KEY + name: annotation + key: S3_LOGIN + {{- end }} + {{- if .Values.secret.s3password }} - name: S3_SECRET_KEY valueFrom: secretKeyRef: - name: assets - key: S3_SECRET_KEY - - name: JWT_SECRET - valueFrom: - secretKeyRef: - name: assets - key: JWT_SECRET + name: annotation + key: S3_PASS + {{- end }} +{{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 8 }} +{{- end }} command: ["/bin/sh"] args: ["-c", "alembic upgrade afa33cc83d57 && alembic upgrade fe5926249504 && alembic upgrade 0f6c859c1d1c && alembic upgrade head && uvicorn src.main:app --host 0.0.0.0 --port 8080 --root-path /api/v1/assets"] + serviceAccountName: {{ default .Values.global.serviceAccountName .Values.serviceAccountName }} + automountServiceAccountToken: {{ default .Values.global.automountToken .Values.automountToken }} dnsPolicy: ClusterFirst restartPolicy: Always - schedulerName: default-scheduler - securityContext: {} + schedulerName: {{ default .Values.global.schedulerName .Values.schedulerName }} + securityContext: +{{ toYaml .Values.securityContext | indent 8 }} terminationGracePeriodSeconds: 30 diff --git a/assets/chart/templates/mapping.yaml b/assets/chart/templates/mapping.yaml index ff4b2a22d..2157c93be 100644 --- a/assets/chart/templates/mapping.yaml +++ b/assets/chart/templates/mapping.yaml @@ -1,14 +1,14 @@ apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: assets + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: - host: {{ .Release.Namespace }}.badgerdoc.com + host: {{ default .Values.host .Values.global.host }} connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/assets/ - service: assets + service: {{ template "svc.name" . }} timeout_ms: 30000 keepalive: interval: 10 @@ -18,14 +18,14 @@ spec: apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: assets-internal + name: {{ template "svc.name" . }}-internal namespace: {{ .Release.Namespace }} spec: host: ambassador connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/assets/ - service: assets + service: {{ template "svc.name" . }} timeout_ms: 30000 keepalive: interval: 10 diff --git a/assets/chart/templates/secret.yaml b/assets/chart/templates/secret.yaml new file mode 100644 index 000000000..cdb235b67 --- /dev/null +++ b/assets/chart/templates/secret.yaml @@ -0,0 +1,21 @@ +{{- if .Values.secret.enabled -}} +apiVersion: v1 +kind: Secret +metadata: + name: "{{ template "svc.name" . }}" + namespace: {{ .Release.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ template "svc.name" . }}" + app: "{{ template "svc.name" . }}" +type: Opaque +data: + DATABASE_URL: {{ .Values.secret.dbUrl | b64enc }} + {{- if .Values.secret.s3user }} + S3_LOGIN: {{ .Values.secret.s3user | b64enc }} + {{- end }} + {{- if .Values.secret.s3password }} + S3_PASS: {{ .Values.secret.s3password | b64enc }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/assets/chart/templates/service.yaml b/assets/chart/templates/service.yaml index 78743822a..8fa8eab1d 100644 --- a/assets/chart/templates/service.yaml +++ b/assets/chart/templates/service.yaml @@ -2,18 +2,18 @@ apiVersion: v1 kind: Service metadata: labels: - app: assets - name: assets + app: "{{ template "svc.name" . }}" + name: "{{ template "svc.name" . }}" namespace: {{ .Release.Namespace }} spec: ipFamilies: - IPv4 ipFamilyPolicy: SingleStack ports: - - port: 80 - protocol: TCP - targetPort: 8080 + - port: {{ default .Values.global.servicePort .Values.servicePort }} + protocol: TCP + targetPort: 8080 selector: - app: assets + app: "{{ template "svc.name" . }}" sessionAffinity: None - type: ClusterIP + type: "{{ default .Values.global.serviceType .Values.serviceType }}" \ No newline at end of file diff --git a/assets/chart/values-c.yaml b/assets/chart/values-c.yaml new file mode 100644 index 000000000..b7b280c35 --- /dev/null +++ b/assets/chart/values-c.yaml @@ -0,0 +1,57 @@ +extraEnvs: [] + +host: example.com + +image: + registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc + tag: latest + +imagePullPolicy: Always + +podAnnotations: + sidecar.istio.io/inject: "false" +labels: + app: assets + +replicaCount: 1 + +resources: + requests: + memory: "200Mi" + cpu: "200m" + limits: + memory: "1000Mi" + cpu: "400m" + +rbac: + serviceAccountName: null + automountToken: false + +schedulerName: default-scheduler + +s3: + endpoint: "http://minio" + +nameOverride: "" + +affinity: {} +nodeSelector: {} +tolerations: [] + +secret: + enabled: true + dburl: "postgresql+psycopg2://postgres:postgres@postgres-postgresql:5432/file_management" + s3user: "serviceuser" + s3password: "12345678" + + +securityContext: {} + +servicePort: 80 +serviceType: ClusterIP + +updateStrategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate \ No newline at end of file diff --git a/assets/chart/values.yaml b/assets/chart/values.yaml deleted file mode 100644 index f67f9d9a2..000000000 --- a/assets/chart/values.yaml +++ /dev/null @@ -1,2 +0,0 @@ -image: - registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc diff --git a/assets/requirements.txt b/assets/requirements.txt new file mode 100644 index 000000000..23e438273 --- /dev/null +++ b/assets/requirements.txt @@ -0,0 +1,68 @@ +alembic==1.7.6 +anyio==3.5.0 +asgiref==3.5.0 +astroid==2.6.6 +attrs==21.4.0 +black==22.1.0 +boto3==1.20.54 +botocore==1.23.54 +certifi==2021.10.8 +cfgv==3.3.1 +charset-normalizer==2.0.12 +click==8.0.3 +coverage==6.3.1 +distlib==0.3.4 +fastapi==0.73.0 +filelock==3.4.2 +h11==0.13.0 +identify==2.4.9 +idna==3.3 +importlib-metadata==4.11.0 +importlib-resources==5.4.0 +iniconfig==1.1.1 +isort==5.10.1 +jmespath==0.10.0 +lazy-object-proxy==1.7.1 +Mako==1.1.6 +MarkupSafe==2.0.1 +mccabe==0.6.1 +minio==7.1.3 +mypy==0.931 +mypy-extensions==0.4.3 +nodeenv==1.6.0 +packaging==21.3 +pathspec==0.9.0 +pdf2image==1.16.0 +Pillow==9.0.1 +platformdirs==2.5.0 +pluggy==1.0.0 +pre-commit==2.17.0 +psycopg2-binary==2.9.3 +py==1.11.0 +pydantic==1.9.0 +pylint==3.0.0a4 +pyparsing==3.0.7 +pytest==7.0.1 +pytest-cov==3.0.0 +python-dateutil==2.8.2 +python-dotenv==0.19.2 +python-magic==0.4.25 +python-multipart==0.0.5 +PyYAML==6.0 +requests==2.27.1 +s3transfer==0.5.1 +six==1.16.0 +sniffio==1.2.0 +SQLAlchemy==1.3.23 +sqlalchemy-filters==0.12.0 +starlette==0.17.1 +toml==0.10.2 +tomli==2.0.1 +types-requests==2.27.9 +types-urllib3==1.26.9 +typing_extensions==4.1.1 +urllib3==1.26.8 +uvicorn==0.17.4 +virtualenv==20.13.1 +wrapt==1.12.1 +zipp==3.7.0 diff --git a/assets/src/config.py b/assets/src/config.py index f54afcf12..f19632665 100644 --- a/assets/src/config.py +++ b/assets/src/config.py @@ -28,9 +28,11 @@ class Config(BaseSettings): postgres_host: Optional[str] postgres_port: Optional[str] database_url: Optional[str] + s3_credentials_provider: Optional[str] s3_endpoint: Optional[str] s3_access_key: Optional[str] s3_secret_key: Optional[str] + s3_prefix: Optional[str] minio_secure_connection: Optional[bool] = False preprocessing_url: Optional[str] sqlalchemy_pool_size: Optional[int] = 10 @@ -44,7 +46,6 @@ class Config(BaseSettings): gotenberg_formats: List[str] image_formats: List[str] aws_profile_name: Optional[str] - s3_prefix: Optional[str] class Config: env_file: str = find_dotenv(".env") diff --git a/assets/src/db/__init__.py b/assets/src/db/__init__.py index 879619e96..bc34dc758 100644 --- a/assets/src/db/__init__.py +++ b/assets/src/db/__init__.py @@ -1,2 +1,2 @@ import src.db.models -import src.db.service +import src.db.service # noqa diff --git a/assets/src/logger.py b/assets/src/logger.py index bcf44bdc7..a9c495025 100644 --- a/assets/src/logger.py +++ b/assets/src/logger.py @@ -2,7 +2,7 @@ from src.config import settings -_log_format = f"%(asctime)s - [%(levelname)s] - %(name)s - (%(filename)s).%(funcName)s(%(lineno)d) - %(message)s" +_log_format = f"%(asctime)s - [%(levelname)s] - %(name)s - (%(filename)s).%(funcName)s(%(lineno)d) - %(message)s" # noqa _datefmt = "%d-%b-%y %H:%M:%S" diff --git a/assets/src/routers/__init__.py b/assets/src/routers/__init__.py index 34bc19eb0..efdae3e25 100644 --- a/assets/src/routers/__init__.py +++ b/assets/src/routers/__init__.py @@ -2,4 +2,4 @@ import src.routers.datasets_router import src.routers.files_router import src.routers.minio_router -import src.routers.s3_router +import src.routers.s3_router # noqa diff --git a/assets/src/routers/bonds_router.py b/assets/src/routers/bonds_router.py index 04b6af59f..7bb5bf6af 100644 --- a/assets/src/routers/bonds_router.py +++ b/assets/src/routers/bonds_router.py @@ -1,3 +1,4 @@ +# flake8: noqa: F501 from typing import Any, Dict, List, Optional import fastapi diff --git a/assets/src/routers/datasets_router.py b/assets/src/routers/datasets_router.py index a3108acde..992336d6b 100644 --- a/assets/src/routers/datasets_router.py +++ b/assets/src/routers/datasets_router.py @@ -1,3 +1,4 @@ +# flake8: noqa: F501 from typing import Any, Dict, List, Optional, Union import fastapi diff --git a/assets/src/routers/files_router.py b/assets/src/routers/files_router.py index 154b0f19b..9bba2fce6 100644 --- a/assets/src/routers/files_router.py +++ b/assets/src/routers/files_router.py @@ -1,3 +1,4 @@ +# flake8: noqa: F501 from typing import Any, Dict, List, Optional, Union import fastapi diff --git a/assets/src/routers/minio_router.py b/assets/src/routers/minio_router.py index 0c305ecc8..1ce23892c 100644 --- a/assets/src/routers/minio_router.py +++ b/assets/src/routers/minio_router.py @@ -115,7 +115,7 @@ async def get_image_piece( status_code=fastapi.status.HTTP_400_BAD_REQUEST, detail=f"Content type {f.content_type} not supported", ) - piece_path = f"files/bbox/{f.id}/page{page_number}_bbox{bbox}_ext{settings.bbox_ext}.jpg" + piece_path = f"files/bbox/{f.id}/page{page_number}_bbox{bbox}_ext{settings.bbox_ext}.jpg" # noqa if not utils.minio_utils.check_file_exist(piece_path, f.bucket, storage): utils.minio_utils.make_pdf_piece( f, page_number, bbox, piece_path, storage diff --git a/assets/src/routers/s3_router.py b/assets/src/routers/s3_router.py index f1cafc2a2..11346acf3 100644 --- a/assets/src/routers/s3_router.py +++ b/assets/src/routers/s3_router.py @@ -37,7 +37,8 @@ async def download_s3_files( storage_url: storage endpoint. Example: "http://localhost:9000" bucket_s3: s3 storage bucket name from where files to be downloaded files_keys: list of files keys, paths to the file in s3 storage. - bucket_storage: bucket in MinIO storage where files should be uploaded + bucket_storage: bucket in MinIO storage where files should be + uploaded """ try: utils.common_utils.check_uploading_limit(s3_data.files_keys) @@ -74,4 +75,4 @@ async def download_s3_files( bucket_name, s3_files, session, storage_ ) - return [schemas.ActionResponse.parse_obj(response) for response in upload_results] + return [schemas.ActionResponse.parse_obj(response) for response in upload_results] # noqa diff --git a/assets/src/utils/__init__.py b/assets/src/utils/__init__.py index 3079e4bc8..7c30159fc 100644 --- a/assets/src/utils/__init__.py +++ b/assets/src/utils/__init__.py @@ -1,3 +1,3 @@ import src.utils.common_utils import src.utils.minio_utils -import src.utils.s3_utils +import src.utils.s3_utils # noqa diff --git a/assets/src/utils/common_utils.py b/assets/src/utils/common_utils.py index 99f8a04ef..579f93958 100644 --- a/assets/src/utils/common_utils.py +++ b/assets/src/utils/common_utils.py @@ -65,7 +65,7 @@ def get_pages(file: bytes) -> Any: def get_pages_from_pdf(file: bytes) -> Any: try: pages = pdf2image.pdfinfo_from_bytes(file)["Pages"] - except: + except: # noqa return None return pages @@ -74,7 +74,7 @@ def get_pages_from_image(file: bytes) -> Any: try: with PIL.Image.open(BytesIO(file)) as image: pages = image.n_frames - except: + except: # noqa return None return pages @@ -179,8 +179,8 @@ def convert_to_pdf(self) -> bytes: raise requests.exceptions.ConnectionError(e) if is_gotenberg_returns_file(converted_file.content) is False: - # is_gotenberg_returns_file func checks if file was converted to pdf. - # In case of some error, the content of Gotenberg response is plain text. + # is_gotenberg_returns_file func checks if file was converted to pdf. # noqa + # In case of some error, the content of Gotenberg response is plain text. # noqa self.conversion_status = "conversion error" logger_.error( logger_.error( @@ -398,7 +398,7 @@ def is_file_updated(self) -> bool: id_=self.new_file.id, action=self.action, action_status=True, - message=f"Successfully uploaded, converted: {self.conversion_status}", + message=f"Successfully uploaded, converted: {self.conversion_status}", # noqa name=self.file_name, ) return True diff --git a/assets/src/utils/minio_utils.py b/assets/src/utils/minio_utils.py index 2f8135122..8098c7188 100644 --- a/assets/src/utils/minio_utils.py +++ b/assets/src/utils/minio_utils.py @@ -6,30 +6,59 @@ import pdf2image.exceptions import PIL.Image import urllib3.exceptions -from minio.credentials import AWSConfigProvider +from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider from src import db, logger from src.config import settings logger_ = logger.get_logger(__name__) -minio_config = { - "endpoint": settings.s3_endpoint, - "secure": settings.minio_secure_connection, -} -if settings.aws_profile_name is not None: - # 'minio' library is used to connect with AWS S3 - minio_config.update( - {"credentials": AWSConfigProvider(profile=settings.aws_profile_name)} - ) -else: - # 'minio' library is used to connect with Minio service locally - minio_config.update( - { - "access_key": settings.s3_access_key, - "secret_key": settings.s3_secret_key, - } + +class NotConfiguredException(Exception): + pass + + +def create_minio_config(): + minio_config = {} + + minio_config.update({"secure": settings.minio_secure_connection}) + + if settings.s3_endpoint: + minio_config.update({"endpoint": settings.s3_endpoint}) + + if settings.s3_credentials_provider == "minio": + minio_config.update( + { + "access_key": settings.s3_access_key, + "secret_key": settings.s3_secret_key, + } + ) + elif settings.s3_credentials_provider == "aws_iam": + minio_config.update({"credentials": IamAwsProvider()}) + elif settings.s3_credentials_provider == "aws_env": + minio_config.update({"credentials": EnvAWSProvider()}) + elif settings.s3_credentials_provider == "aws_config": + # environmental variable AWS_PROFILE_NAME should be set + minio_config.update( + { + "credentials": AWSConfigProvider( + profile=settings.aws_profile_name + ) + } + ) + else: + raise NotConfiguredException( + "s3 connection is not properly configured - " + "s3_credentials_provider is not set" + ) + logger_.debug( + f"S3_Credentials provider - {settings.s3_credentials_provider}" ) + + return minio_config + + +minio_config = create_minio_config() MinioClient = minio.Minio(**minio_config) @@ -272,7 +301,7 @@ def check_bucket(bucket: str, client: minio.Minio) -> bool: except ValueError: raise fastapi.HTTPException( status_code=fastapi.status.HTTP_400_BAD_REQUEST, - detail="Bucket name length must be more than 3 characters and less than 63 characters!", + detail="Bucket name length must be more than 3 characters and less than 63 characters!", # noqa ) return True diff --git a/assets/tests/conftest.py b/assets/tests/conftest.py index 105debddd..49e251e76 100644 --- a/assets/tests/conftest.py +++ b/assets/tests/conftest.py @@ -1,3 +1,4 @@ +# flake8: noqa: F501 import tempfile import uuid from io import BytesIO diff --git a/assets/tests/test_main.py b/assets/tests/test_main.py index 59ef35214..ae7d4a939 100644 --- a/assets/tests/test_main.py +++ b/assets/tests/test_main.py @@ -1,3 +1,4 @@ +# flake8: noqa: F501 import json import uuid from tempfile import NamedTemporaryFile diff --git a/convert/chart/templates/deployment.yaml b/convert/chart/templates/deployment.yaml index bdbca97a0..206e4d977 100644 --- a/convert/chart/templates/deployment.yaml +++ b/convert/chart/templates/deployment.yaml @@ -43,6 +43,8 @@ spec: env: - name: ROOT_PATH value: "" + - name: S3_CREDENTIALS_PROVIDER + value: "minio" - name: MINIO_HOST value: "http://minio" - name: MINIO_ACCESS_KEY diff --git a/convert/src/.env b/convert/src/.env index 5641a116c..504611c06 100644 --- a/convert/src/.env +++ b/convert/src/.env @@ -1,6 +1,9 @@ MINIO_HOST=http://minio MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin +S3_PREFIX= +# S3_CREDENTIALS_PROVIDER can be: minio (default), aws_iam +S3_CREDENTIALS_PROVIDER=minio ASSETS_SERVICE_URL=http://dev2.badgerdoc.com/api/v1/assets/files/ CATEGORY_SERVICE_URL=http://dev2.badgerdoc.com/api/v1/annotation/categories/ diff --git a/convert/src/coco_export/convert.py b/convert/src/coco_export/convert.py index 46f835015..f32d74719 100644 --- a/convert/src/coco_export/convert.py +++ b/convert/src/coco_export/convert.py @@ -16,6 +16,7 @@ from src.utils.common_utils import add_to_zip_and_local_remove, get_headers from src.utils.json_utils import export_save_to_json, load_from_json from src.utils.render_pdf_page import pdf_page_to_jpg +from src.utils.s3_utils import convert_bucket_name_if_s3prefix LOGGER = get_logger(__file__) @@ -24,6 +25,7 @@ class DatasetFetch: def __init__(self, job_id: int, current_tenant: str, uuid: str): self.job_id = job_id self.tenant = current_tenant + self.bucket_name = convert_bucket_name_if_s3prefix(self.tenant) self.uuid = uuid def load_input(self, file_id: int) -> str: @@ -34,7 +36,7 @@ def load_input(self, file_id: int) -> str: Return path to loaded json """ key = f"annotation/{self.job_id}/{file_id}" - minio_client.download_file(self.tenant, key, key) + minio_client.download_file(self.bucket_name, key, key) return key def download_image( @@ -55,7 +57,7 @@ def download_image( f"{image_folder}/{self.job_id}_{Path(file_path).name}" ) minio_resource.meta.client.download_file( - self.tenant, file_path, image_local_path + self.bucket_name, file_path, image_local_path ) LOGGER.info("file %s was downloaded", Path(file_path).name) if Path(file_path).suffix == ".pdf" and validated_pages: @@ -104,7 +106,7 @@ def download_annotation( if validated_pages and int(page_num) not in validated_pages: continue minio_client.download_file( - self.tenant, + self.bucket_name, f"{work_dir}/{page_name}.json", f"{local_path}/{page_name}.json", ) @@ -127,7 +129,7 @@ def get_annotation_body( continue annotation_page_content = json.loads( minio_client.get_object( - Bucket=self.tenant, Key=f"{work_dir}/{page_name}.json" + Bucket=self.bucket_name, Key=f"{work_dir}/{page_name}.json" )["Body"].read() ) annotation_content_lst.append(annotation_page_content) @@ -147,7 +149,9 @@ def fetch( """ work_dir = Path(manifest).parent manifest_content = json.loads( - minio_client.get_object(Bucket=self.tenant, Key=manifest)["Body"] + minio_client.get_object(Bucket=self.bucket_name, Key=manifest)[ + "Body" + ] .read() .decode("utf-8") ) @@ -208,7 +212,7 @@ def is_job_exist(self) -> Union[List[Dict[str, str]], ClientError]: """Existence check of the job""" try: file_id = minio_client.list_objects( - Bucket=self.tenant, + Bucket=self.bucket_name, Prefix=f"annotation/{self.job_id}/", Delimiter="/", )["CommonPrefixes"] @@ -235,6 +239,7 @@ def __init__( ): self.job_id = job_id self.tenant = tenant + self.bucket_name = convert_bucket_name_if_s3prefix(self.tenant) self.token = token self.uuid = uuid self.zip_name = f"{self.uuid}_{export_format}.zip" @@ -283,7 +288,7 @@ def convert(self) -> ZipFile: } for page in file_id: files = minio_client.list_objects( - Bucket=self.tenant, Prefix=page["Prefix"] + Bucket=self.bucket_name, Prefix=page["Prefix"] )["Contents"] manifest_path = [ file for file in files if Path(file["Key"]).stem == "manifest" @@ -397,7 +402,7 @@ def convert(self) -> ZipFile: file_id = loader.is_job_exist() for page in file_id: files = minio_client.list_objects( - Bucket=self.tenant, Prefix=page["Prefix"] + Bucket=self.bucket_name, Prefix=page["Prefix"] )["Contents"] manifest_path = [ file for file in files if Path(file["Key"]).stem == "manifest" @@ -407,7 +412,7 @@ def convert(self) -> ZipFile: if not os.path.exists(Path(annotation_local_path).parent): os.makedirs(Path(annotation_local_path).parent, exist_ok=True) minio_client.download_file( - self.tenant, manifest_path, annotation_local_path + self.bucket_name, manifest_path, annotation_local_path ) LOGGER.info( "manifest.json was downloaded for the job %s", self.job_id diff --git a/convert/src/coco_export/export_service.py b/convert/src/coco_export/export_service.py index 70615387d..7a4b31daa 100644 --- a/convert/src/coco_export/export_service.py +++ b/convert/src/coco_export/export_service.py @@ -8,6 +8,7 @@ from src.coco_export.convert import ConvertToCoco, ExportConvertBase from src.config import minio_client from src.logger import get_logger +from src.utils.s3_utils import convert_bucket_name_if_s3prefix LOGGER = get_logger(__file__) @@ -36,14 +37,15 @@ def export_run( with ZipFile(zip_file.filename, "a") as zip_obj: # type: ignore zip_obj.write(f"{export_format}.json") os.remove(f"{export_format}.json") + bucket_name = convert_bucket_name_if_s3prefix(current_tenant) minio_client.upload_file( zip_file.filename, # type: ignore - Bucket=current_tenant, + Bucket=bucket_name, Key=f"{export_format}/{unique_identity}.zip", ) LOGGER.info( "zip archive was uploaded to bucket - %s, key - %s/%s.zip", - current_tenant, + bucket_name, export_format, unique_identity, ) @@ -60,10 +62,11 @@ def export_run_and_return_url( validated_only: bool = False, ) -> Any: unique_value = uuid.uuid4() + bucket_name = convert_bucket_name_if_s3prefix(current_tenant) url = minio_client.generate_presigned_url( "get_object", Params={ - "Bucket": current_tenant, + "Bucket": bucket_name, "Key": f"{export_format}/{unique_value}.zip", }, ExpiresIn=3600, diff --git a/convert/src/config.py b/convert/src/config.py index 075bbd099..43a24f44e 100644 --- a/convert/src/config.py +++ b/convert/src/config.py @@ -10,6 +10,10 @@ from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry +from src import logger + +logger_ = logger.get_logger(__name__) + class Settings(BaseSettings): """Base settings values""" @@ -17,6 +21,10 @@ class Settings(BaseSettings): minio_host: Optional[str] = os.getenv("MINIO_HOST") minio_access_key: Optional[str] = os.getenv("MINIO_ACCESS_KEY") minio_secret_key: Optional[str] = os.getenv("MINIO_SECRET_KEY") + s3_prefix: Optional[str] = os.getenv("S3_PREFIX") + s3_credentials_provider: Optional[str] = os.getenv( + "S3_CREDENTIALS_PROVIDER", "minio" + ) uploading_limit: int = Field(100, env="UPLOADING_LIMIT") coco_image_format: str = "jpg" dpi: int = 300 @@ -70,27 +78,48 @@ def get_request_session(*args: List[Any], **kwargs: Dict[str, Any]) -> Session: settings = Settings() +logger_.info(f"{settings.s3_credentials_provider=}") + + +class NotConfiguredException(Exception): + pass + + +def create_boto3_config(): + boto3_config = {} + if settings.s3_credentials_provider == "minio": + boto3_config.update( + { + "aws_access_key_id": settings.minio_access_key, + "aws_secret_access_key": settings.minio_secret_key, + "endpoint_url": settings.minio_host, + } + ) + elif settings.s3_credentials_provider == "aws_iam": + # No additional updates to config needed - boto3 uses env vars + ... + else: + raise NotConfiguredException( + "s3 connection is not properly configured - " + "s3_credentials_provider is not set" + ) + logger_.info( + f"S3_Credentials provider - {settings.s3_credentials_provider}" + ) + return boto3_config def get_minio_client() -> BaseClient: """Initialized s3 client by boto3 client""" - client = boto3.client( - "s3", - endpoint_url=settings.minio_host, - aws_access_key_id=settings.minio_access_key, - aws_secret_access_key=settings.minio_secret_key, - ) + boto3_config = create_boto3_config() + client = boto3.client("s3", **boto3_config) return client def get_minio_resource() -> BaseClient: """Initialized s3 client by boto3 resource""" - client = boto3.resource( - "s3", - endpoint_url=settings.minio_host, - aws_access_key_id=settings.minio_access_key, - aws_secret_access_key=settings.minio_secret_key, - ) + boto3_config = create_boto3_config() + client = boto3.resource("s3", **boto3_config) return client diff --git a/convert/src/utils/s3_utils.py b/convert/src/utils/s3_utils.py index 21798e63e..e515389f1 100644 --- a/convert/src/utils/s3_utils.py +++ b/convert/src/utils/s3_utils.py @@ -5,7 +5,7 @@ import urllib3 from fastapi import HTTPException, status -from src.config import minio_client +from src.config import minio_client, settings from src.exceptions import BucketError, FileKeyError, UploadLimitExceedError from src.logger import get_logger from src.models import coco @@ -14,6 +14,13 @@ logger = get_logger(__name__) +def convert_bucket_name_if_s3prefix(bucket_name: str) -> str: + if settings.s3_prefix: + return f"{settings.s3_prefix}-{bucket_name}" + else: + return bucket_name + + class S3Manager: """ Initializes boto3 client and boto3 resource objects with given credentials. diff --git a/infra/chart/.helmignore b/infra/chart/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/infra/chart/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/infra/chart/Chart.yaml b/infra/chart/Chart.yaml new file mode 100644 index 000000000..df2d97f9b --- /dev/null +++ b/infra/chart/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: chart +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/infra/chart/README.md b/infra/chart/README.md new file mode 100644 index 000000000..a279825f8 --- /dev/null +++ b/infra/chart/README.md @@ -0,0 +1,58 @@ +# How to install minimal badgedoc app + +## Prerequisites + +1. PostgreSQL database host, port, credentials per service +2. Access to S3 + +## How to install + +set values in values.yaml + +run in shell +```shell +helm install --debug --dry-run badgerdoc . +``` + +### Configuration of values + +| Parameter | Description | Default | +|------------------------------------|--------------------|--------------------------------------------------| +| affinity | | null +| labels | | null +| nodeSelector | | null +| tolerations | | null +| podAnnotations | | sidecar.istio.io/inject | | "false" +| dbHost | | postgres-postgresql +| dbPort | | 5432 +| s3CredentialsProvider | | "aws_iam" +| s3Endpoint | | "minio" +| host | | yourexample.com +| imagePullPolicy | | Always +| serviceAccountName | | null +| automountToken | | false +| replicaCount | | 1 +| resources | | {} +| schedulerName | | default-scheduler +| servicePort | | 80 +| serviceType | | ClusterIP +| updateStrategy | | {} + +Global parameter works only if the local chart parameter is null. + +### Values per service + +| Parameter | Description | Default | +|------------------------------------|--------------------|--------------------------------------------------| +imageName | | ${ACCOUNT}.dkr.ecr.${REGION}.amazonaws.com/badgerdoc +imageTag | | latest +dbName | | ${DATABASENAME} +keycloak.externalUrl | | "http://example.com" +keycloak.internalUrl | | "http://bagerdoc-keycloak" +secret.enabled | | true +secret.dbuser | | "postgres" +secret.dbpassword | | "postgres" +secret.s3user | | "serviceuser" +secret.s3password | | "12345678" + +See [values.yaml](values.yaml) \ No newline at end of file diff --git a/infra/chart/charts/annotation b/infra/chart/charts/annotation new file mode 120000 index 000000000..db9aaa993 --- /dev/null +++ b/infra/chart/charts/annotation @@ -0,0 +1 @@ +../../../annotation/chart/ \ No newline at end of file diff --git a/infra/chart/charts/assets b/infra/chart/charts/assets new file mode 120000 index 000000000..d81fa0abe --- /dev/null +++ b/infra/chart/charts/assets @@ -0,0 +1 @@ +../../../assets/chart/ \ No newline at end of file diff --git a/infra/chart/charts/jobs b/infra/chart/charts/jobs new file mode 120000 index 000000000..0895ec606 --- /dev/null +++ b/infra/chart/charts/jobs @@ -0,0 +1 @@ +../../../jobs/chart/ \ No newline at end of file diff --git a/infra/chart/charts/taxonomy b/infra/chart/charts/taxonomy new file mode 120000 index 000000000..47fd3f25c --- /dev/null +++ b/infra/chart/charts/taxonomy @@ -0,0 +1 @@ +../../../taxonomy/chart/ \ No newline at end of file diff --git a/infra/chart/charts/users b/infra/chart/charts/users new file mode 120000 index 000000000..22de9c12f --- /dev/null +++ b/infra/chart/charts/users @@ -0,0 +1 @@ +../../../users/chart/ \ No newline at end of file diff --git a/infra/chart/charts/web b/infra/chart/charts/web new file mode 120000 index 000000000..8d31531a0 --- /dev/null +++ b/infra/chart/charts/web @@ -0,0 +1 @@ +../../../web/chart/ \ No newline at end of file diff --git a/infra/chart/values.yaml b/infra/chart/values.yaml new file mode 100644 index 000000000..52c22ed26 --- /dev/null +++ b/infra/chart/values.yaml @@ -0,0 +1,84 @@ +global: + affinity: null + labels: null + nodeSelector: null + tolerations: null + podAnnotations: + sidecar.istio.io/inject: "false" + dbHost: postgres-postgresql + dbPort: 5432 + s3CredentialsProvider: "aws_iam" + s3Endpoint: "minio" + host: yourexample.com + imagePullPolicy: Always + serviceAccountName: null + automountToken: false + replicaCount: 1 + resources: + requests: + memory: "000Mi" + cpu: "000m" + limits: + memory: "0000Mi" + cpu: "000m" + schedulerName: default-scheduler + servicePort: 80 + serviceType: ClusterIP + updateStrategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + +annotation: + dbName: "annotation" + imageName: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc/annotation + imageTag: 0.1.5-0d3e100 + secret: + dbpassword: postgres + dbuser: postgres + enabled: true + +assets: + imageName: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc/assets + imageTag: 0.1.7-0d3e100 + secret: + enabled: true + dbUrl: "postgresql+psycopg2://postgres:postgres@postgres-postgresql:5432/file_management" + +jobs: + imageName: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc/jobs + imageTag: 0.1.9-0d3e100 + keycloak: + externalUrl: "http://example.com" + secret: + enabled: true + dbUrl: "postgresql+psycopg2://postgres:postgres@postgres-postgresql:5432/job_manager" + + +users: + imageName: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc/users + imageTag: 0.1.2-0d3e100 + dbName: "users" + keycloak: + externalUrl: "http://example.com" + internalUrl: "http://bagerdoc-keycloak" + secret: + enabled: true + dbuser: "postgres" + dbpassword: "postgres" + +taxonomy: + dbName: "taxonomy" + imageName: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc/taxonomy + imageTag: 0.0.1-0d3e100 + secret: + dbpassword: postgres + dbuser: postgres + enabled: true + +badgerdoc-ui: + imageName: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc/badgerdoc_ui + imageTag: 0.2.0-379754b + + diff --git a/jobs/chart/templates/deployment.yaml b/jobs/chart/templates/deployment.yaml index a281381be..c5561af50 100644 --- a/jobs/chart/templates/deployment.yaml +++ b/jobs/chart/templates/deployment.yaml @@ -4,40 +4,61 @@ metadata: annotations: deployment.kubernetes.io/revision: "1" labels: - app: jobs - name: jobs + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: progressDeadlineSeconds: 600 - replicas: 1 + replicas: {{ default .Values.global.replicaCount .Values.replicaCount }} revisionHistoryLimit: 10 selector: matchLabels: - app: jobs + app: {{ template "svc.name" . }} strategy: - rollingUpdate: - maxSurge: 25% - maxUnavailable: 25% - type: RollingUpdate +{{ .Values.updateStrategy | default .Values.global.updateStrategy | toYaml | indent 4 }} template: metadata: - annotations: - sidecar.istio.io/inject: "false" - creationTimestamp: null labels: - app: jobs + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- if .Values.labels }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.global.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + annotations: + {{- if .Values.podAnnotation }} + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else}} + {{- range $key, $value := .Values.global.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} spec: + affinity: +{{ .Values.affinity | default .Values.global.affinity | toYaml | indent 8 }} + nodeSelector: +{{ .Values.nodeSelector | default .Values.global.nodeSelector | toYaml | indent 8 }} + tolerations: +{{ .Values.tolerations | default .Values.global.tolerations | toYaml | indent 8 }} containers: - - image: {{ .Values.image.registry }}/jobs:{{ .Values.image.tag }} - imagePullPolicy: IfNotPresent - name: jobs + - image: "{{ .Values.imageName }}:{{ default .Values.global.imageTag .Values.imageTag }}" + imagePullPolicy: "{{ default .Values.global.imagePullPolicy .Values.imagePullPolicy }}" + name: {{ template "svc.name" . }} resources: - requests: - memory: "200Mi" - cpu: "200m" - limits: - memory: "2000Mi" - cpu: "500m" +{{ .Values.resources | default .Values.global.resources | toYaml | indent 10 }} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File env: @@ -45,7 +66,7 @@ spec: valueFrom: secretKeyRef: name: jobs - key: POSTGRESQL_JOBMANAGER_DATABASE_URI + key: DATABASE_URL - name: PIPELINES_URI value: "pipelines" - name: ASSETS_URI @@ -53,14 +74,14 @@ spec: - name: ANNOTATION_MICROSERVICE_URI value: "annotation" - name: KEYCLOAK_HOST - valueFrom: - secretKeyRef: - name: users - key: KEYCLOAK_DIRECT_ENDPOINT_DEV1 + value: "{{ .Values.keycloak.externalUrl }}" command: ["/bin/sh"] args: ["-c", "alembic upgrade head && uvicorn jobs.main:app --host 0.0.0.0 --port 8123 --root-path /api/v1/jobs"] + serviceAccountName: {{ default .Values.global.serviceAccountName .Values.serviceAccountName }} + automountServiceAccountToken: {{ default .Values.global.automountToken .Values.automountToken }} dnsPolicy: ClusterFirst restartPolicy: Always - schedulerName: default-scheduler - securityContext: {} + schedulerName: {{ default .Values.global.schedulerName .Values.schedulerName }} + securityContext: +{{ toYaml .Values.securityContext | indent 8 }} terminationGracePeriodSeconds: 30 diff --git a/jobs/chart/templates/mapping.yaml b/jobs/chart/templates/mapping.yaml index 385078693..860e9600d 100644 --- a/jobs/chart/templates/mapping.yaml +++ b/jobs/chart/templates/mapping.yaml @@ -1,25 +1,25 @@ apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: jobs + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: - host: {{ .Release.Namespace }}.badgerdoc.com + host: {{ default .Values.host .Values.global.host }} connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/jobs/ - service: jobs + service: {{ template "svc.name" . }} timeout_ms: 30000 --- apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: jobs-internal + name: {{ template "svc.name" . }}-internal namespace: {{ .Release.Namespace }} spec: host: ambassador connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/jobs/ - service: jobs + service: {{ template "svc.name" . }} timeout_ms: 30000 diff --git a/jobs/chart/templates/secret.yaml b/jobs/chart/templates/secret.yaml new file mode 100644 index 000000000..2e12d286e --- /dev/null +++ b/jobs/chart/templates/secret.yaml @@ -0,0 +1,15 @@ +{{- if .Values.secret.enabled -}} +apiVersion: v1 +kind: Secret +metadata: + name: "{{ template "svc.name" . }}" + namespace: {{ .Release.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ template "svc.name" . }}" + app: "{{ template "svc.name" . }}" +type: Opaque +data: + DATABASE_URL: {{ .Values.secret.dbUrl | b64enc }} +{{- end }} \ No newline at end of file diff --git a/jobs/chart/templates/service.yaml b/jobs/chart/templates/service.yaml index fc08a5028..75cd44ac7 100644 --- a/jobs/chart/templates/service.yaml +++ b/jobs/chart/templates/service.yaml @@ -2,18 +2,18 @@ apiVersion: v1 kind: Service metadata: labels: - app: jobs - name: jobs + app: "{{ template "svc.name" . }}" + name: "{{ template "svc.name" . }}" namespace: {{ .Release.Namespace }} spec: ipFamilies: - IPv4 ipFamilyPolicy: SingleStack ports: - - port: 80 - protocol: TCP - targetPort: 8123 + - port: {{ default .Values.global.servicePort .Values.servicePort }} + protocol: TCP + targetPort: 8123 selector: - app: jobs + app: "{{ template "svc.name" . }}" sessionAffinity: None - type: ClusterIP + type: "{{ default .Values.global.serviceType .Values.serviceType }}" \ No newline at end of file diff --git a/jobs/chart/values-c.yaml b/jobs/chart/values-c.yaml new file mode 100644 index 000000000..f622bfdda --- /dev/null +++ b/jobs/chart/values-c.yaml @@ -0,0 +1,54 @@ +extraEnvs: [] + +host: example.com + +image: + registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc + tag: latest + +imagePullPolicy: Always + +podAnnotations: + sidecar.istio.io/inject: "false" +labels: + app: jobs + +replicaCount: 1 + +resources: + requests: + memory: "200Mi" + cpu: "200m" + limits: + memory: "1000Mi" + cpu: "400m" + +rbac: + serviceAccountName: null + automountToken: false + +schedulerName: default-scheduler + +keycloak: + externalUrl: "http://example.com" + +nameOverride: "" + +affinity: {} +nodeSelector: {} +tolerations: [] + +secret: + enabled: true + dburl: "postgresql+psycopg2://postgres:postgres@postgres-postgresql:5432/job_manager" + +securityContext: {} + +servicePort: 80 +serviceType: ClusterIP + +updateStrategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate \ No newline at end of file diff --git a/jobs/chart/values.yaml b/jobs/chart/values.yaml deleted file mode 100644 index f67f9d9a2..000000000 --- a/jobs/chart/values.yaml +++ /dev/null @@ -1,2 +0,0 @@ -image: - registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc diff --git a/lib/filter_lib/requirements-dev.txt b/lib/filter_lib/requirements-dev.txt index 9f4a78ed3..c64cdf875 100644 --- a/lib/filter_lib/requirements-dev.txt +++ b/lib/filter_lib/requirements-dev.txt @@ -1,6 +1,6 @@ pytest pytest-cov black -isort +isort==5.9.1 pylint mypy==0.910 diff --git a/lib/filter_lib/src/__init__.py b/lib/filter_lib/src/__init__.py index 477acfa1a..0c73ee1b5 100644 --- a/lib/filter_lib/src/__init__.py +++ b/lib/filter_lib/src/__init__.py @@ -1,4 +1,4 @@ -from .dict_parser import map_request_to_filter -from .pagination import PaginationParams, paginate -from .query_modificator import form_query -from .schema_generator import BaseSearch, Page, create_filter_model +from .dict_parser import map_request_to_filter # noqa +from .pagination import PaginationParams, paginate # noqa +from .query_modificator import form_query # noqa +from .schema_generator import BaseSearch, Page, create_filter_model # noqa diff --git a/lib/filter_lib/src/query_modificator.py b/lib/filter_lib/src/query_modificator.py index 197d52b29..896991e20 100644 --- a/lib/filter_lib/src/query_modificator.py +++ b/lib/filter_lib/src/query_modificator.py @@ -128,7 +128,12 @@ def _create_filter(query: Query, fil: Dict[str, Any]) -> Query: "Operator 'match' shouldn't be used with relations" ) - if isinstance(getattr(model, field).type, LtreeType): + try: + attr = getattr(model, field).type + except AttributeError: + attr = None + + if isinstance(attr, LtreeType): return _make_ltree_query(query=query, model=model, op=op, value=value) if _op_is_match(fil): diff --git a/lib/filter_lib/tests/test_query_modifier.py b/lib/filter_lib/tests/test_query_modifier.py index 1063ce495..2a6f36b75 100644 --- a/lib/filter_lib/tests/test_query_modifier.py +++ b/lib/filter_lib/tests/test_query_modifier.py @@ -52,7 +52,8 @@ def test_create_filter_ltree_parent(get_session): "(SELECT categories.tree AS tree \n" "FROM categories \n" "WHERE categories.id = :id_1) AS anon_1 \n" - "WHERE subpath(categories.tree, :subpath_1, nlevel(anon_1.tree) - :nlevel_1) = categories.tree " + "WHERE subpath(categories.tree, :subpath_1, " + "nlevel(anon_1.tree) - :nlevel_1) = categories.tree " "AND index(anon_1.tree, categories.tree) != :index_1 " "ORDER BY categories.tree DESC\n" " LIMIT :param_1" diff --git a/lib/filter_lib/usage_example/app.py b/lib/filter_lib/usage_example/app.py index f25cc88ff..19882eb70 100644 --- a/lib/filter_lib/usage_example/app.py +++ b/lib/filter_lib/usage_example/app.py @@ -58,7 +58,7 @@ def create_new_user( @app.post("/users/search", tags=["users"], response_model=Page[UserOut]) def search_users( - request: UserFilterModel, session: Session = Depends(get_db) # type: ignore + request: UserFilterModel, session: Session = Depends(get_db) # type: ignore # noqa ) -> Page[UserOut]: query = session.query(User) filter_args = map_request_to_filter(request.dict(), "User") # type: ignore @@ -82,9 +82,9 @@ def create_new_address( "/addresses/search", tags=["addresses"], response_model=Page[AddressOut] ) def search_address( - request: AddressFilterModel, session: Session = Depends(get_db) # type: ignore + request: AddressFilterModel, session: Session = Depends(get_db) # type: ignore # noqa ) -> Page[UserOut]: query = session.query(Address) - filter_args = map_request_to_filter(request.dict(), "Address") # type: ignore + filter_args = map_request_to_filter(request.dict(), "Address") # type: ignore # noqa query, pagination = form_query(filter_args, query) return paginate([x for x in query], pagination) diff --git a/lib/filter_lib/usage_example/db_example.py b/lib/filter_lib/usage_example/db_example.py index 5f66f4390..011afbfa5 100644 --- a/lib/filter_lib/usage_example/db_example.py +++ b/lib/filter_lib/usage_example/db_example.py @@ -4,7 +4,7 @@ Base = declarative_base() -DATABASE_URL = "postgresql+psycopg2://admin:admin@localhost/db_for_usage_example" # Database should be Postgres +DATABASE_URL = "postgresql+psycopg2://admin:admin@localhost/db_for_usage_example" # Database should be Postgres # noqa engine = create_engine(DATABASE_URL) SessionLocal = sessionmaker(bind=engine) diff --git a/lib/tenants/requirements-dev.txt b/lib/tenants/requirements-dev.txt index 32ee87e03..1c8adf282 100644 --- a/lib/tenants/requirements-dev.txt +++ b/lib/tenants/requirements-dev.txt @@ -2,6 +2,6 @@ pytest requests pytest-cov black -isort +isort==5.9.1 pylint mypy==0.910 diff --git a/lib/tenants/requirements.txt b/lib/tenants/requirements.txt index 49ff7f300..bdf6d27eb 100644 --- a/lib/tenants/requirements.txt +++ b/lib/tenants/requirements.txt @@ -1,2 +1,3 @@ -fastapi==0.70.0 +fastapi>=0.68.0 +httpx PyJWT[crypto]==2.3.0 diff --git a/lib/tenants/setup.py b/lib/tenants/setup.py index 255555af2..108a6d138 100644 --- a/lib/tenants/setup.py +++ b/lib/tenants/setup.py @@ -16,7 +16,7 @@ def get_long_description(path: str) -> str: setup( name="tenant_dependency", version="0.1.3", - description="Package for validating and parsing jwt via FastAPI dependency", + description="Package for validating and parsing jwt via FastAPI dependency", # noqa long_description=get_long_description("README.md"), author="Roman Kuzianov", author_email="Roman_Kuzianov@epam.com", diff --git a/lib/tenants/src/__init__.py b/lib/tenants/src/__init__.py index d5fef0fd5..b560bac7f 100644 --- a/lib/tenants/src/__init__.py +++ b/lib/tenants/src/__init__.py @@ -1,2 +1,2 @@ -from .dependency import get_tenant_info -from .schema import TenantData +from .dependency import get_tenant_info # noqa +from .schema import TenantData # noqa diff --git a/lib/tenants/src/dependency.py b/lib/tenants/src/dependency.py index 5d8c17705..d12a98a46 100644 --- a/lib/tenants/src/dependency.py +++ b/lib/tenants/src/dependency.py @@ -22,8 +22,8 @@ def __init__( Args: key: a private key for decoding tokens with hs256 alg - algorithm: an alg for tokens, will be checked in available algorithms - url: an url to auth service (http://bagerdoc-keycloack, http://dev1.gcov.ru) + algorithm: an alg for tokens, will be checked in available algorithms # noqa + url: an url to auth service (http://bagerdoc-keycloack, http://dev1.gcov.ru) # noqa """ self.key = key self.algorithm = self._check_algorithm(algorithm) @@ -154,16 +154,16 @@ def get_tenant_info( Examples: RS256: - tenant = get_tenant_info(algorithm="RS256", url="http://dev1.gcov.ru"). + tenant = get_tenant_info(algorithm="RS256", url="http://dev1.gcov.ru"). # noqa HS256: - tenant = get_tenant_info(algorithm="HS256", key="some_secret_key"). + tenant = get_tenant_info(algorithm="HS256", key="some_secret_key"). # noqa Args: key: a private key for decoding tokens with hs256 alg. algorithm: an alg for tokens, will be checked in available algorithms. - url: an url to auth service (http://bagerdoc-keycloack, http://dev1.gcov.ru). - scheme_name: a name for TenantDependency on Swagger, if not provided class name will be used. - description: a description for TenantDependency on Swagger, if not provided description will be empty. + url: an url to auth service (http://bagerdoc-keycloack, http://dev1.gcov.ru). # noqa + scheme_name: a name for TenantDependency on Swagger, if not provided class name will be used. # noqa + description: a description for TenantDependency on Swagger, if not provided description will be empty. # noqa debug: If True button 'Authorize' will be rendered on Swagger. """ if debug: diff --git a/lib/tenants/usage_example/main.py b/lib/tenants/usage_example/main.py index c0bfb7993..05ba50751 100644 --- a/lib/tenants/usage_example/main.py +++ b/lib/tenants/usage_example/main.py @@ -5,7 +5,7 @@ from tenant_dependency import TenantData, get_tenant_info # RS256 (BadgerDoc) -# url=http://dev1.gcov.ru for local testing, url=http://bagerdoc-keycloack for deployed service +# url=http://dev1.gcov.ru for local testing, url=http://bagerdoc-keycloack for deployed service # noqa tenant_ = get_tenant_info(url="http://dev1.gcov.ru", algorithm="RS256") app_ = FastAPI() diff --git a/models/chart/templates/deployment.yaml b/models/chart/templates/deployment.yaml index 6a0b100ee..44238a305 100644 --- a/models/chart/templates/deployment.yaml +++ b/models/chart/templates/deployment.yaml @@ -61,6 +61,8 @@ spec: value: "5432" - name: POSTGRES_DB value: "models" + - name: S3_CREDENTIALS_PROVIDER + value: "minio" - name: MINIO_ACCESS_KEY valueFrom: secretKeyRef: diff --git a/models/src/.env b/models/src/.env index 3f4b24346..34a66ca3b 100644 --- a/models/src/.env +++ b/models/src/.env @@ -11,6 +11,8 @@ MINIO_ACCESS_KEY="minio" MINIO_SECRET_KEY="minio123" MINIO_ROOT_USER=${MINIO_ACCESS_KEY} MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY} +S3_PREFIX= +S3_CREDENTIALS_PROVIDER=minio DOCKER_REGISTRY_URL="localhost:5000" diff --git a/models/src/constants.py b/models/src/constants.py index aa8dc85cc..d9526f040 100644 --- a/models/src/constants.py +++ b/models/src/constants.py @@ -19,6 +19,8 @@ MINIO_SECRET_KEY = os.environ.get("MINIO_SECRET_KEY") MINIO_HOST = os.environ.get("MINIO_HOST") MINIO_PUBLIC_HOST = os.environ.get("MINIO_PUBLIC_HOST") +S3_PREFIX = os.environ.get("S3_PREFIX") +S3_CREDENTIALS_PROVIDER = os.environ.get("S3_CREDENTIALS_PROVIDER") INFERENCE_HOST = os.environ.get("INFERENCE_HOST") INFERENCE_PORT = os.environ.get("INFERENCE_PORT") diff --git a/models/src/logger.py b/models/src/logger.py new file mode 100644 index 000000000..9ef09f307 --- /dev/null +++ b/models/src/logger.py @@ -0,0 +1,13 @@ +import logging + +_log_format = ( + f"%(asctime)s - [%(levelname)s] - %(name)s - " + f"(%(filename)s).%(funcName)s(%(lineno)d) - %(message)s" +) +_datefmt = "%d-%b-%y %H:%M:%S" + + +def get_logger(name: str) -> logging.Logger: + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + return logger diff --git a/models/src/routers/basements_routers.py b/models/src/routers/basements_routers.py index 90bc5e286..dd0814a51 100644 --- a/models/src/routers/basements_routers.py +++ b/models/src/routers/basements_routers.py @@ -17,6 +17,7 @@ from src.routers import tenant from src.utils import ( NoSuchTenant, + convert_bucket_name_if_s3prefix, get_minio_resource, upload_to_object_storage, ) @@ -192,6 +193,7 @@ def upload_files_to_object_storage( script: Optional[UploadFile] = File(None), archive: Optional[UploadFile] = File(None), ) -> None: + bucket_name = convert_bucket_name_if_s3prefix(x_current_tenant) basement = crud.get_instance(session, Basement, basement_id) if not basement: LOGGER.info( @@ -199,11 +201,11 @@ def upload_files_to_object_storage( ) raise HTTPException(status_code=404, detail="Not existing basement") try: - s3_resource = get_minio_resource(tenant=x_current_tenant) + s3_resource = get_minio_resource(tenant=bucket_name) except NoSuchTenant as err: - LOGGER.info( - "NoSuchTenant error was encountered. Bucket %s does not exist", - x_current_tenant, + LOGGER.exception( + "Bucket %s does not exist", + bucket_name, ) raise HTTPException(status_code=500, detail=str(err)) script_key = None @@ -212,7 +214,7 @@ def upload_files_to_object_storage( script_key = f"basements/{basement_id}/training_script.py" upload_to_object_storage( s3_resource=s3_resource, - bucket_name=x_current_tenant, + bucket_name=bucket_name, file=script, file_path=script_key, ) @@ -220,7 +222,7 @@ def upload_files_to_object_storage( archive_key = f"basements/{basement_id}/training_archive.zip" upload_to_object_storage( s3_resource=s3_resource, - bucket_name=x_current_tenant, + bucket_name=bucket_name, file=archive, file_path=archive_key, ) diff --git a/models/src/routers/training_routers.py b/models/src/routers/training_routers.py index ca49a2e0e..7b042fc15 100644 --- a/models/src/routers/training_routers.py +++ b/models/src/routers/training_routers.py @@ -36,7 +36,12 @@ from src.convert_utils import prepare_dataset_info from src.db import Basement, Training, get_db from src.routers import tenant -from src.utils import NoSuchTenant, get_minio_object, get_minio_resource +from src.utils import ( + NoSuchTenant, + convert_bucket_name_if_s3prefix, + get_minio_object, + get_minio_resource, +) LOGGER = logging.getLogger(name="models") TRAINING_SCRIPT_NAME = "training_script.py" @@ -116,11 +121,12 @@ def upload_files_to_object_storage( status_code=404, detail="Training with given id does not exist", ) - s3_resource = get_minio_resource(tenant=x_current_tenant) + bucket_name = convert_bucket_name_if_s3prefix(x_current_tenant) + s3_resource = get_minio_resource(tenant=bucket_name) key_archive = f"trainings/{training_id}/training_archive.zip" utils.upload_to_object_storage( s3_resource=s3_resource, - bucket_name=x_current_tenant, + bucket_name=bucket_name, file=archive, file_path=key_archive, ) @@ -223,16 +229,17 @@ def delete_training_by_id( if not training: LOGGER.info("Delete_training get not existing id %s", request.id) raise HTTPException(status_code=404, detail="Not existing training") + bucket_name = convert_bucket_name_if_s3prefix(x_current_tenant) try: - s3_resource = get_minio_resource(tenant=x_current_tenant) + s3_resource = get_minio_resource(tenant=bucket_name) except NoSuchTenant as err: - LOGGER.info( - "NoSuchTenant error was encountered. Bucket %s does not exist", - x_current_tenant, + LOGGER.exception( + "Bucket %s does not exist", + bucket_name, ) raise HTTPException(status_code=500, detail=str(err)) s3_resource.meta.client.delete_object( - Bucket=training.tenant, Key=training.key_archive + Bucket=bucket_name, Key=training.key_archive ) crud.delete_instance(session, training) LOGGER.info("Training %d was deleted", request.id) @@ -334,7 +341,7 @@ def start_training( ) key_archive = training.bases.key_archive with connect_colab(credentials) as ssh_client: - bucket = x_current_tenant + bucket = convert_bucket_name_if_s3prefix(x_current_tenant) file_script, size_script = get_minio_object(bucket, key_script) upload_file_to_colab( ssh_client, file_script, size_script, TRAINING_SCRIPT_NAME @@ -384,6 +391,7 @@ def download_training_results( Results should be located at in "/content/training/results" directory in colab's file system. """ + bucket_name = convert_bucket_name_if_s3prefix(x_current_tenant) training_exists = crud.is_id_existing(session, Training, training_id) if not training_exists: LOGGER.info( @@ -395,5 +403,5 @@ def download_training_results( with tempfile.TemporaryDirectory(dir=home_directory) as temp_dir: LOGGER.info(f"Created temporary directory: {temp_dir}") local_mount_colab_drive(temp_dir, credentials) - sync_colab_with_minio(temp_dir, x_current_tenant, training_id) + sync_colab_with_minio(temp_dir, bucket_name, training_id) return {"msg": f"Results for training with id {training_id} were uploaded"} diff --git a/models/src/utils.py b/models/src/utils.py index 22960c39f..caec2a0ff 100644 --- a/models/src/utils.py +++ b/models/src/utils.py @@ -10,6 +10,7 @@ from sqlalchemy.orm import Session from starlette.datastructures import UploadFile +import src.logger as logger from src.constants import ( CONTAINER_NAME, DOCKER_REGISTRY_URL, @@ -21,11 +22,22 @@ MINIO_PUBLIC_HOST, MINIO_SECRET_KEY, MODELS_NAMESPACE, + S3_CREDENTIALS_PROVIDER, + S3_PREFIX, ) from src.db import Basement, Model from src.errors import NoSuchTenant from src.schemas import DeployedModelPod, MinioHTTPMethod +logger_ = logger.get_logger(__name__) + + +def convert_bucket_name_if_s3prefix(bucket_name: str) -> str: + if S3_PREFIX: + return f"{S3_PREFIX}-{bucket_name}" + else: + return bucket_name + def deploy(session: Session, instance: Model) -> None: basement_instance = session.query(Basement).get(instance.basement) @@ -285,42 +297,65 @@ def get_pods(model_name: str) -> List[DeployedModelPod]: return pods -def get_minio_resource(tenant: str) -> boto3.resource: +class NotConfiguredException(Exception): + pass + + +def create_boto3_config(): + boto3_config = {} + if S3_CREDENTIALS_PROVIDER == "minio": + boto3_config.update( + { + "aws_access_key_id": MINIO_ACCESS_KEY, + "aws_secret_access_key": MINIO_SECRET_KEY, + "endpoint_url": f"http://{MINIO_HOST}", + } + ) + elif S3_CREDENTIALS_PROVIDER == "aws_iam": + # No additional updates to config needed - boto3 uses env vars + ... + else: + raise NotConfiguredException( + "s3 connection is not properly configured - " + "s3_credentials_provider is not set" + ) + logger_.debug(f"S3_Credentials provider - {S3_CREDENTIALS_PROVIDER}") + return boto3_config + + +def get_minio_resource(bucket_name: str) -> boto3.resource: """Creates and returns boto3 s3 resource with provided credentials to connect minio and validates that Bucket for provided tenant exists. If Bucket was not found - raises "NoSuchTenant" exception. """ + boto3_config = create_boto3_config() s3_resource = boto3.resource( - "s3", - endpoint_url=f"http://{MINIO_HOST}", # http prefix for SDK connection - aws_access_key_id=MINIO_ACCESS_KEY, - aws_secret_access_key=MINIO_SECRET_KEY, - config=Config(signature_version="s3v4"), # more secure signature type + "s3", **boto3_config, config=Config(signature_version="s3v4") ) try: - s3_resource.meta.client.head_bucket(Bucket=tenant) + s3_resource.meta.client.head_bucket(Bucket=bucket_name) except ClientError as err: if "404" in err.args[0]: - raise NoSuchTenant(f"Bucket for tenant {tenant} does not exist") + raise NoSuchTenant(f"Bucket {bucket_name} does not exist") return s3_resource def generate_presigned_url( - http_method: MinioHTTPMethod, tenant: str, key: str, expiration: int + http_method: MinioHTTPMethod, bucket_name: str, key: str, expiration: int ) -> Optional[str]: """Generates and returns presigned URL for tenant's minio Bucket to make actions with Object that has provided "key" in accordance with provided http_method. Link is valid for number of "expiration" seconds. In cases of boto3 errors returns None. """ - minio_client = get_minio_resource(tenant).meta.client + minio_client = get_minio_resource(bucket_name).meta.client # To make minio accessible via presigned URL from outside the cluster # we need to temporary use external host URL for signature generation. minio_client.meta._endpoint_url = f"http://{MINIO_PUBLIC_HOST}" try: presigned_url: str = minio_client.generate_presigned_url( http_method, - Params={"Bucket": tenant, "Key": key}, + Params={"Bucket": bucket_name, "Key": key}, ExpiresIn=expiration, ) except BotoCoreError: diff --git a/pipelines/.env b/pipelines/.env index 8e5f1067a..41f297bbe 100644 --- a/pipelines/.env +++ b/pipelines/.env @@ -23,9 +23,12 @@ DB_HOST="localhost" DB_PORT=5432 DB_NAME="pipelines" -# Minio settings. -MINIO_URI=minio:9000 -MINIO_ACCESS_KEY=minioadmin -MINIO_SECRET_KEY=minioadmin +# S3 settings +S3_CREDENTIALS_PROVIDER=minio +S3_PREFIX= +S3_ENDPOINT=minio:9000 +S3_ACCESS_KEY=minioadmin +S3_SECRET_KEY=minioadmin +AWS_PROFILE= KEYCLOAK_URI=http://bagerdoc-keycloack diff --git a/pipelines/chart/templates/deployment.yaml b/pipelines/chart/templates/deployment.yaml index cb6be0316..7eca84649 100644 --- a/pipelines/chart/templates/deployment.yaml +++ b/pipelines/chart/templates/deployment.yaml @@ -61,14 +61,16 @@ spec: key: DB_URL - name: DB_HOST value: "postgres-postgresql" - - name: MINIO_URI + - name: S3_CREDENTIALS_PROVIDER + value: "minio" + - name: S3_ENDPOINT value: "minio:80" - - name: MINIO_ACCESS_KEY + - name: S3_ACCESS_KEY valueFrom: secretKeyRef: name: pipelines key: MINIO_ACCESS_KEY - - name: MINIO_SECRET_KEY + - name: S3_SECRET_KEY valueFrom: secretKeyRef: name: pipelines diff --git a/pipelines/docker-compose.yaml b/pipelines/docker-compose.yaml index 08849100f..3d303c5e8 100644 --- a/pipelines/docker-compose.yaml +++ b/pipelines/docker-compose.yaml @@ -72,7 +72,10 @@ services: - .env environment: - DB_URI=postgresql+psycopg2://postgres:postgres@db:5432/pipelines - - MINIO_URI=minio:9000 + - S3_CREDENTIALS_PROVIDER=minio + - S3_ENDPOINT=minio:9000 + - S3_ACCESS_KEY=minioadmin + - S3_SECRET_KEY=minioadmin - PREPROCESSING_URL=//preprocess:8000/ - POSTPROCESSING_URI=//postprocess:8004/ volumes: diff --git a/pipelines/readme.md b/pipelines/readme.md index 84ea06e70..faca6e5b5 100644 --- a/pipelines/readme.md +++ b/pipelines/readme.md @@ -34,11 +34,14 @@ Executor for pipelines. |`int`
`default: 5432`
DB_PORT| Server port. | |`int`
`default: pipelines`
DB_NAME| Database name. | -### Minio settings -Necessary for proper result merger work. +### S3 settings +File storage for result processing. | Variable | Description | |---|---------------| -|`str`
`default: ""`
MINIO_URI| Minio storage URI. | -|`str`
`default: minioadmin`
MINIO_ACCESS_KEY| Minio storage user ID. | -|`str`
`default: minioadmin`
MINIO_SECRET_KEY| Minio storage password. | +|`str`
`default: None`
S3_CREDENTIALS_PROVIDER| Credentials provider. Support `minio`, `aws_iam`, `aws_env`, `aws_config` | +|`str`
`default: ""`
S3_PREFIX| Bucket name prefix. `[-]bucket_name` | +|`str`
`default: None`
S3_ENDPOINT| S3 storage URI | +|`str`
`default: None`
S3_ACCESS_KEY| S3 storage access key | +|`str`
`default: None`
S3_SECRET_KEY| S3 storage secret key | +|`str`
`default: None`
AWS_PROFILE| AWS_PROFILE if `aws_config` provider selected | diff --git a/pipelines/requirements.txt b/pipelines/requirements.txt index 01ff6ef4a..53b9b4646 100644 --- a/pipelines/requirements.txt +++ b/pipelines/requirements.txt @@ -11,9 +11,7 @@ requests==2.26.0 PyJWT[crypto]==2.3.0 urllib3~=1.26.8 -pytest~=7.0.0 setuptools~=60.5.0 aiokafka~=0.7.2 setuptools~=60.5.0 -freezegun~=1.1.0 diff --git a/pipelines/requirements_dev.txt b/pipelines/requirements_dev.txt index 2e50915f5..ca410c59d 100644 --- a/pipelines/requirements_dev.txt +++ b/pipelines/requirements_dev.txt @@ -2,7 +2,6 @@ pre-commit==2.14.0 pytest==6.2.4 pytest-asyncio==0.15.1 freezegun==1.1.0 -requests pytest-cov isort==5.9.1 black==21.6b0 diff --git a/pipelines/src/config.py b/pipelines/src/config.py index c6fd1b37d..a78a02449 100644 --- a/pipelines/src/config.py +++ b/pipelines/src/config.py @@ -45,19 +45,25 @@ def get_version() -> str: DB_NAME = os.getenv("DB_NAME", "pipelines") DB_URI = os.getenv( "DB_URI", - f"postgresql+psycopg2://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}", + f"postgresql+psycopg2://{DB_USERNAME}:{DB_PASSWORD}" + f"@{DB_HOST}:{DB_PORT}/{DB_NAME}", ) -# Minio settings. -MINIO_URI = os.getenv("MINIO_URI", "") -MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "minioadmin") -MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "minioadmin") +# S3 settings +S3_CREDENTIALS_PROVIDER = os.getenv("S3_CREDENTIALS_PROVIDER") +S3_PREFIX = os.getenv("S3_PREFIX", "") +S3_ENDPOINT = os.getenv("S3_ENDPOINT") +S3_ACCESS_KEY = os.getenv("S3_ACCESS_KEY") +S3_SECRET_KEY = os.getenv("S3_SECRET_KEY") +AWS_PROFILE = os.getenv("AWS_PROFILE") # Keycloak settings KEYCLOAK_URI = os.getenv("KEYCLOAK_URI", "http://dev1.badgerdoc.com") KEYCLOAK_REALM = os.getenv("KEYCLOAK_REALM", "master") CLIENT_SECRET = os.getenv("CLIENT_SECRET", "") -KEYCLOAK_TOKEN_URI = f"{KEYCLOAK_URI}/auth/realms/{KEYCLOAK_REALM}/protocol/openid-connect/token" +KEYCLOAK_TOKEN_URI = ( + f"{KEYCLOAK_URI}/auth/realms/{KEYCLOAK_REALM}" f"/protocol/openid-connect/token" +) # Kafka settings KAFKA_BOOTSTRAP_SERVER = os.getenv("KAFKA_BOOTSTRAP_SERVERS", "kafka:9092") diff --git a/pipelines/src/execution.py b/pipelines/src/execution.py index 2fbff8a39..a7f60e73e 100644 --- a/pipelines/src/execution.py +++ b/pipelines/src/execution.py @@ -17,9 +17,10 @@ import src.db.models as dbm import src.db.service as service import src.result_processing as postprocessing -from src import config, http_utils, log, schemas, service_token, webhooks +from src import config, http_utils, log, s3, schemas, service_token, webhooks logger = log.get_logger(__file__) +minio_client = s3.get_minio_client() # Exception messages PIPELINE_EXISTS = ( @@ -265,7 +266,7 @@ async def start(self, producer: AIOKafkaProducer) -> None: pipeline_type = self.get_pipeline_type() initial_step = [step for step in self.steps if step.init_args][0] args = schemas.InputArguments.parse_obj(initial_step.init_args) - bucket = args.get_output_bucket() + tenant = s3.tenant_from_bucket(args.get_output_bucket()) if pipeline_type == schemas.PipelineTypes.INFERENCE: preprecessing_passed = await self.check_preprocessing_status( bucket @@ -274,7 +275,7 @@ async def start(self, producer: AIOKafkaProducer) -> None: return logger.info(f"Start executing task with id = {self.id}") self.change_status(schemas.Status.RUN) - self.send_status(pipeline_type=pipeline_type, bucket=bucket) + self.send_status(pipeline_type=pipeline_type, tenant=tenant) init_body = args.prepare_for_init( pipeline_type=pipeline_type, curr_step_id=str(initial_step.id) ) @@ -309,24 +310,23 @@ async def finish(self, failed: bool) -> None: filepath = args.file postprocessing_status = postprocessing.manage_result_for_annotator( bucket=bucket, + tenant=s3.tenant_from_bucket(bucket), path_=path_, job_id=self.job_id, # type: ignore file_bucket=file_bucket, filepath=filepath, file_id=filename, pipeline_id=self.pipeline_id, + client=minio_client, token=token, ) failed = not postprocessing_status task_status = schemas.Status.FAIL if failed else schemas.Status.DONE self.change_status(task_status) - logger.info( - f"Task with id = {self.id} finished with status = {task_status}" - ) - self.send_status( - pipeline_type=pipeline_type, bucket=bucket, token=token - ) + logger.info(f"Task with id = {self.id} finished with status = {task_status}") + tenant = s3.tenant_from_bucket(bucket) + self.send_status(pipeline_type=pipeline_type, tenant=tenant, token=token) def change_status(self, status: schemas.Status) -> None: """Changes status of the task in the db and in the instance.""" @@ -339,7 +339,7 @@ def change_status(self, status: schemas.Status) -> None: def send_status( self, pipeline_type: schemas.PipelineTypes, - bucket: Optional[str], + tenant: Optional[str], token: Optional[str] = None, ) -> None: if self.webhook is None: @@ -355,7 +355,7 @@ def send_status( webhook=self.webhook, task_id=self.id, task_status=self.status ) if url and body: - webhooks.send_webhook(url, body, token, bucket) + webhooks.send_webhook(url, body, token, tenant) def get_pipeline_type(self) -> schemas.PipelineTypes: pipeline = service.run_in_session( @@ -369,7 +369,7 @@ def get_file_id(self) -> int: file_id = file_path.split("/")[1] return int(file_id) - async def check_preprocessing_status(self, bucket: str) -> bool: + async def check_preprocessing_status(self, tenant: str) -> bool: """Checks preprocessing status of task file. If the status is 'preprocessing in progress', waits and tries again. If the status is "failed" or the number of retries is exceeded, @@ -387,9 +387,7 @@ async def check_preprocessing_status(self, bucket: str) -> bool: max_retries = config.MAX_FILE_STATUS_RETRIES timeout = config.FILE_STATUS_TIMEOUT for retry in range(1, int(max_retries) + 1): - file_status = http_utils.get_file_status( - file_id=file_id, bucket=bucket - ) + file_status = http_utils.get_file_status(file_id=file_id, tenant=tenant) if file_status == schemas.PreprocessingStatus.PREPROCESSED: return True elif file_status is None: diff --git a/pipelines/src/http_utils.py b/pipelines/src/http_utils.py index 47c0f535b..8cedf3a44 100644 --- a/pipelines/src/http_utils.py +++ b/pipelines/src/http_utils.py @@ -55,15 +55,13 @@ def make_request_with_retry( return None -def get_file_status( - file_id: int, bucket: str -) -> Optional[schemas.PreprocessingStatus]: +def get_file_status(file_id: int, tenant: str) -> Optional[schemas.PreprocessingStatus]: logger.info(f"Sending request to the assets to get file {file_id} status.") body = {"filters": [{"field": "id", "operator": "eq", "value": file_id}]} url = f"{config.ASSETS_URI}/files/search" token = service_token.get_service_token() headers = { - "X-Current-Tenant": bucket, + "X-Current-Tenant": tenant, "Authorization": f"Bearer {token}", } response = make_request_with_retry( diff --git a/pipelines/src/result_processing.py b/pipelines/src/result_processing.py index 885296bc0..2724efcc9 100644 --- a/pipelines/src/result_processing.py +++ b/pipelines/src/result_processing.py @@ -202,19 +202,6 @@ def merge( Page.update_forward_refs() -def get_minio_client() -> Optional[Minio]: - """Return Minio client if URI is provided via config.py.""" - if not config.MINIO_URI: - logger.error("MINIO_URI is None") - return None - return Minio( - endpoint=config.MINIO_URI, - access_key=config.MINIO_ACCESS_KEY, - secret_key=config.MINIO_SECRET_KEY, - secure=False, - ) - - def get_annotation_uri( job_id: Union[str, int], file_id: Union[str, int] ) -> Optional[str]: @@ -336,28 +323,31 @@ def postprocess_result( def manage_result_for_annotator( bucket: str, + tenant: str, path_: str, job_id: int, file_bucket: str, filepath: str, file_id: str, pipeline_id: int, + client: Minio, token: Optional[str], ) -> bool: """Manage result for by merging step results and sending it to Annotation Manager. :param bucket: Bucket with step results. + :param tenant: Tenant name to use. :param job_id: Job id in which task is done. :param file_bucket: Bucket of the file. :param filepath: File path. :param path_: Path of the step results. :param file_id: File id (filename without extension). :param pipeline_id: id of executing pipeline. + :param client: Client to connect to s3. :param token: service token. :return: True if succeeded. """ - client = get_minio_client() uri = get_annotation_uri(job_id, file_id) if client is None or uri is None: logger.error("minio client or annotation uri are None") @@ -373,10 +363,8 @@ def manage_result_for_annotator( "bucket": file_bucket, "input": merged_data.dict(exclude_none=True), } - headers = {"X-Current-Tenant": bucket, "Authorization": f"Bearer {token}"} - postprocessed_data = postprocess_result( - data_for_postprocessor, headers=headers - ) + headers = {"X-Current-Tenant": tenant, "Authorization": f"Bearer {token}"} + postprocessed_data = postprocess_result(data_for_postprocessor, headers=headers) if postprocessed_data is None: logger.info("result for postprocessing data is None") return False diff --git a/pipelines/src/s3.py b/pipelines/src/s3.py new file mode 100644 index 000000000..b86994b2c --- /dev/null +++ b/pipelines/src/s3.py @@ -0,0 +1,56 @@ +import enum +from typing import Any, Dict, Optional + +from minio import Minio, credentials + +from src import config, log + +logger = log.get_logger(__file__) + + +class S3Providers(str, enum.Enum): + MINIO = "minio" + AWS_IAM = "aws_iam" + AWS_ENV = "aws_env" + AWS_CONF = "aws_config" + + +def get_minio_config( + s3_provider: S3Providers, + endpoint: Optional[str], + access_key: Optional[str], + secret_key: Optional[str], + **kwargs: Optional[str], +) -> Dict[str, Any]: + minio_config = {"endpoint": endpoint, "secure": False} + if s3_provider == S3Providers.MINIO: + minio_config["access_key"] = access_key + minio_config["secret_key"] = secret_key + elif s3_provider == S3Providers.AWS_IAM: + minio_config["credentials"] = credentials.IamAwsProvider() + elif s3_provider == S3Providers.AWS_ENV: + minio_config["credentials"] = credentials.EnvAWSProvider() + elif s3_provider == S3Providers.AWS_CONF: + minio_config["credentials"] = credentials.AWSConfigProvider( + profile=kwargs.get("aws_profile") + ) + return minio_config + + +def get_minio_client() -> Minio: + """Return Minio client if URI is provided via config.py.""" + s3_provider = S3Providers(config.S3_CREDENTIALS_PROVIDER) + logger.debug("S3_CREDENTIALS_PROVIDER is set to %s", s3_provider) + minio_config = get_minio_config( + s3_provider=s3_provider, + endpoint=config.S3_ENDPOINT, + access_key=config.S3_ACCESS_KEY, + secret_key=config.S3_SECRET_KEY, + aws_profile=config.AWS_PROFILE, + ) + return Minio(**minio_config) + + +def tenant_from_bucket(bucket: str) -> str: + prefix = f"{config.S3_PREFIX}-" if config.S3_PREFIX else "" + return bucket.replace(prefix, "", 1) diff --git a/pipelines/src/webhooks.py b/pipelines/src/webhooks.py index 146f09928..ebf9b7644 100644 --- a/pipelines/src/webhooks.py +++ b/pipelines/src/webhooks.py @@ -49,7 +49,7 @@ def send_webhook( url: str, body: Dict[str, Any], token: Optional[str] = None, - bucket: Optional[str] = None, + tenant: Optional[str] = None, ) -> None: if token is None: token = service_token.get_service_token() @@ -59,5 +59,5 @@ def send_webhook( f"with body {body} wasn`t sent." ) return - headers = {"X-Current-Tenant": bucket, "Authorization": f"Bearer {token}"} + headers = {"X-Current-Tenant": tenant, "Authorization": f"Bearer {token}"} http_utils.make_request_with_retry(url=url, body=body, headers=headers) diff --git a/pipelines/tests/test_app.py b/pipelines/tests/test_app.py index 65ad2cd93..6b0b508c6 100644 --- a/pipelines/tests/test_app.py +++ b/pipelines/tests/test_app.py @@ -2,7 +2,6 @@ from copy import deepcopy from typing import Dict -from unittest.mock import patch import pytest @@ -294,7 +293,7 @@ def test_response__execute_pipeline_by_id( def test_response__execute_pipeline_by_id_not_found(testing_app, adjust_mock): - """Testing execute_pipeline_by_id response when there's no such pipeline.""" + """When there's no such pipeline.""" testing_app.post("/pipeline", json=td.pipeline_dict) response = testing_app.post( "/pipelines/2/execute", @@ -343,7 +342,7 @@ def test_step_args__execute_pipeline_by_id( def test_steps_ids__execute_pipeline_by_id( testing_app, adjust_mock, mock_preprocessing_file_status ): - """Testing execute_pipeline_by_id steps ids equals to pipeline steps ids.""" + """Steps ids equals to pipeline steps ids.""" testing_app.post("/pipeline", json=td.pipeline_dict) testing_app.post( "/pipelines/1/execute", diff --git a/pipelines/tests/test_execution.py b/pipelines/tests/test_execution.py index 4d43cecdc..19fb3c8af 100644 --- a/pipelines/tests/test_execution.py +++ b/pipelines/tests/test_execution.py @@ -178,6 +178,7 @@ def test_update_categories_empty(): assert td.pipeline.meta.categories == [] +@pytest.mark.skip(reason="We make request which is not mocked, fix needed") def test_get_model_urls(): with patch.object( execution.PipelineStep, diff --git a/pipelines/tests/test_result_processing.py b/pipelines/tests/test_result_processing.py index 3ccf41015..e786e51bb 100644 --- a/pipelines/tests/test_result_processing.py +++ b/pipelines/tests/test_result_processing.py @@ -1,8 +1,8 @@ """Testing src/result_processing.py.""" -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch import pytest -from minio import Minio, S3Error +from minio import S3Error import src.result_processing as processing @@ -170,22 +170,11 @@ def test_merge_geometry_objects(): def test_merge_geometry_objects_no_objects_provided(): - """Testing merge of GeometryObject when objects for merge are not provided.""" + """Objects for merge are not provided.""" with pytest.raises(ValueError, match="No GeometryObjects to merge"): assert processing.GeometryObject.merge([]) -def test_get_minio_client(): - """Testing get_minio_client.""" - assert isinstance(processing.get_minio_client(), Minio) - - -def test_get_minio_client_no_uri(): - """Testing get_minio_client when MINIO_URI is not provided.""" - with patch("src.result_processing.config.MINIO_URI", ""): - assert processing.get_minio_client() is None - - @pytest.mark.parametrize( ["job_id", "file_id", "expected"], [ @@ -251,11 +240,11 @@ def test_get_pipeline_leaves_data_minio_error(): def test_merge_pipeline_leaves_data(): """Testing merge_pipeline_leaves_data.""" leaves_data = [ - b'{"pages": [{"page_num": 1, "size": {"width": 1, "height": 2}, "objs": ' - b'[{"id": 0, "bbox": [1, 1, 1, 1], "category": "some", ' + b'{"pages": [{"page_num": 1, "size": {"width": 1, "height": 2}, ' + b'"objs": [{"id": 0, "bbox": [1, 1, 1, 1], "category": "some", ' b'"data": {"a": 0}}]}]}', - b'{"pages": [{"page_num": 1, "size": {"width": 1, "height": 2}, "objs": ' - b'[{"id": 0, "bbox": [1, 1, 1, 1], "category": "some", ' + b'{"pages": [{"page_num": 1, "size": {"width": 1, "height": 2}, ' + b'"objs": [{"id": 0, "bbox": [1, 1, 1, 1], "category": "some", ' b'"data": {"a": 1, "b": 2}}, ' b'{"id": 3, "bbox": [3, 3, 3, 3], "category": "some"}]}]}', ] @@ -380,7 +369,7 @@ def test_postprocess_result_no_uri(): def test_postprocess_result_invalid_postprocessor_json_response(): - """Testing postprocess_result when postprocessor return invalid json format.""" + """Postprocessor return invalid json format.""" m = MagicMock m.content = b'{"asd":}' with patch( @@ -411,7 +400,7 @@ def test_manage_result_for_annotator(): "f.com/annotation", ): assert processing.manage_result_for_annotator( - "", "", "", "", "", "", 1, "" + "", "", "", 0, "", "", "", 1, MagicMock(), "" ) req_mock.assert_called_once_with( "f.com/annotation/annotation", @@ -425,19 +414,11 @@ def test_manage_result_for_annotator(): del_mock.assert_called_once() -def test_manage_result_for_annotator_no_client(): - """Testing manage_result_for_annotator when there's no Minio client.""" - with patch("src.result_processing.get_minio_client", return_value=None): - assert not processing.manage_result_for_annotator( - "", "", "", "", "", "", 8, "" - ) - - def test_manage_result_for_annotator_no_annotator_uri(): """Testing manage_result_for_annotator when there's no Annotator URI.""" with patch("src.result_processing.config.ANNOTATION_URI", ""): assert not processing.manage_result_for_annotator( - "", "", "", "", "", "", 8, "" + "", "", "", 0, "", "", "", 8, MagicMock(), "" ) @@ -447,7 +428,7 @@ def test_manage_result_for_annotator_cannot_merge_data(): "src.result_processing.merge_pipeline_leaves_data", return_value=None ): assert not processing.manage_result_for_annotator( - "", "", "", "", "", "", 8, "" + "", "", "", 0, "", "", "", 8, MagicMock(), "" ) @@ -460,12 +441,12 @@ def test_manage_result_for_annotator_request_not_succeeded(): return_value=None, ): assert not processing.manage_result_for_annotator( - "", "", "", "", "", "", 8, "" + "", "", "", 0, "", "", "", 8, MagicMock(), "" ) def test_manage_result_for_annotator_request_debug_merge(): - """Testing manage_result_for_annotator when debug merge is True and data are not deleted.""" + """Debug merge is True and data are not deleted.""" with patch("src.result_processing.merge_pipeline_leaves_data"): with patch("src.result_processing.postprocess_result"): with patch( @@ -476,6 +457,6 @@ def test_manage_result_for_annotator_request_debug_merge(): "src.result_processing.delete_objects" ) as del_mock: assert processing.manage_result_for_annotator( - "", "", "", "", "", "", 8, "" + "", "", "", 0, "", "", "", 8, MagicMock(), "" ) del_mock.assert_not_called() diff --git a/pipelines/tests/test_s3.py b/pipelines/tests/test_s3.py new file mode 100644 index 000000000..939029dfc --- /dev/null +++ b/pipelines/tests/test_s3.py @@ -0,0 +1,25 @@ +from unittest.mock import patch + +import minio +import pytest + +from src import s3 + + +def test_get_minio_client(): + """Testing get_minio_client.""" + assert isinstance(s3.get_minio_client(), minio.Minio) + + +@pytest.mark.parametrize( + ("prefix", "bucket", "expected"), + ( + ("", "tenant", "tenant"), + ("", "some-tenant", "some-tenant"), + ("prefix", "prefix-tenant", "tenant"), + ("prefix", "prefix-prefix-tenant", "prefix-tenant"), + ), +) +def test_tenant_from_bucket(prefix: str, bucket: str, expected: str) -> None: + with patch("src.config.S3_PREFIX", prefix): + assert s3.tenant_from_bucket(bucket) == expected diff --git a/processing/.env b/processing/.env index 4a031cff2..5351bd53f 100644 --- a/processing/.env +++ b/processing/.env @@ -1,6 +1,9 @@ MINIO_SERVER="minio:9000" MINIO_ROOT_USER=minioadmin MINIO_ROOT_PASSWORD=minioadmin +MINIO_SECURE_CONNECTION=False +S3_PREFIX= +S3_CREDENTIALS_PROVIDER=minio HOST="0.0.0.0" PORT=8080 diff --git a/processing/chart/templates/deployment.yaml b/processing/chart/templates/deployment.yaml index db66f9e9e..ffd7a58e1 100644 --- a/processing/chart/templates/deployment.yaml +++ b/processing/chart/templates/deployment.yaml @@ -59,6 +59,8 @@ spec: value: "5432" - name: SERVICE_NAME value: "processing" + - name: S3_CREDENTIALS_PROVIDER + value: "minio" - name: MINIO_SERVER value: "minio:80" - name: MINIO_ROOT_USER diff --git a/processing/src/config.py b/processing/src/config.py index 4e44a5f89..b3fefe042 100644 --- a/processing/src/config.py +++ b/processing/src/config.py @@ -33,6 +33,10 @@ class Settings(BaseSettings): minio_server: str = "minio:80" minio_root_user: str = "minioadmin" minio_root_password: str = "minioadmin" + minio_secure_connection: Optional[bool] = False + s3_prefix: Optional[str] + s3_credentials_provider: Optional[str] + aws_profile_name: Optional[str] keycloak_host: str = "http://bagerdoc-keycloack" host_models: str = "http://models/deployed_models" diff --git a/processing/src/health_check_easy_ocr.py b/processing/src/health_check_easy_ocr.py index 571f9da72..3353b6c8d 100644 --- a/processing/src/health_check_easy_ocr.py +++ b/processing/src/health_check_easy_ocr.py @@ -6,7 +6,10 @@ from src.utils.aiohttp_utils import send_request from src.utils.logger import get_logger -from src.utils.minio_utils import MinioCommunicator +from src.utils.minio_utils import ( + MinioCommunicator, + convert_bucket_name_if_s3prefix, +) logger = get_logger(__name__) minio_client = MinioCommunicator().client @@ -15,6 +18,8 @@ # Path to `health_check_files` accord to badgerdoc paths # bucket: `post`, path: `files/file_id/file_id.pdf` bucket = "post" +bucket = convert_bucket_name_if_s3prefix(bucket) + file_ids = {"health_check1": [1], "health_check2": [1, 2]} diff --git a/processing/src/main.py b/processing/src/main.py index be42b541e..98cecbf41 100644 --- a/processing/src/main.py +++ b/processing/src/main.py @@ -22,6 +22,7 @@ from src.text_merge import merge_words_to_paragraph from src.utils.aiohttp_utils import http_session from src.utils.logger import get_logger +from src.utils.minio_utils import convert_bucket_name_if_s3prefix from src.utils.utils import map_finish_status_for_assets logger = get_logger(__name__) @@ -83,8 +84,9 @@ def get_preprocessing_result( pages, current_tenant, ) + bucket_name = convert_bucket_name_if_s3prefix(current_tenant) return Response( - content=send_preprocess_result(current_tenant, file_id, pages), + content=send_preprocess_result(bucket_name, file_id, pages), media_type="application/json", ) diff --git a/processing/src/utils/minio_utils.py b/processing/src/utils/minio_utils.py index db2c2f083..2568688b6 100644 --- a/processing/src/utils/minio_utils.py +++ b/processing/src/utils/minio_utils.py @@ -1,9 +1,54 @@ from minio import Minio +from minio.credentials import AWSConfigProvider, EnvAWSProvider, IamAwsProvider from src.config import settings from src.utils.logger import get_logger -logger = get_logger(__file__) +logger = get_logger(__name__) + + +class NotConfiguredException(Exception): + pass + + +def create_minio_config(): + minio_config = {} + + minio_config.update({"secure": settings.minio_secure_connection}) + + if settings.minio_server: + minio_config.update({"endpoint": settings.minio_server}) + + if settings.s3_credentials_provider == "minio": + minio_config.update( + { + "access_key": settings.minio_root_user, + "secret_key": settings.minio_root_password, + } + ) + elif settings.s3_credentials_provider == "aws_iam": + minio_config.update({"credentials": IamAwsProvider()}) + elif settings.s3_credentials_provider == "aws_env": + minio_config.update({"credentials": EnvAWSProvider()}) + elif settings.s3_credentials_provider == "aws_config": + # environmental variable AWS_PROFILE_NAME should be set + minio_config.update( + { + "credentials": AWSConfigProvider( + profile=settings.aws_profile_name + ) + } + ) + else: + raise NotConfiguredException( + "s3 connection is not properly configured - " + "s3_credentials_provider is not set" + ) + logger.info( + f"S3_Credentials provider - {settings.s3_credentials_provider}" + ) + + return minio_config class MinioCommunicator: @@ -15,13 +60,16 @@ def __init__(self) -> None: @classmethod def create_client(cls) -> None: - cls.client = Minio( - endpoint=settings.minio_server, - access_key=settings.minio_root_user, - secret_key=settings.minio_root_password, - secure=False, - ) + minio_config = create_minio_config() + cls.client = Minio(**minio_config) logger.info( "MinIO client for %s was created successfully", settings.minio_server, ) + + +def convert_bucket_name_if_s3prefix(bucket_name: str) -> str: + if settings.s3_prefix: + return f"{settings.s3_prefix}-{bucket_name}" + else: + return bucket_name diff --git a/search/.env b/search/.env index 36ccc4c33..495ec4a03 100644 --- a/search/.env +++ b/search/.env @@ -28,5 +28,7 @@ S3_ENDPOINT_URL=http://minio S3_LOGIN=minioadmin S3_PASS=minioadmin S3_START_PATH=annotation +S3_CREDENTIALS_PROVIDER=minio +S3_PREFIX= ROOT_PATH= diff --git a/search/chart/templates/deployment.yaml b/search/chart/templates/deployment.yaml index f727db9fc..15c7b2471 100644 --- a/search/chart/templates/deployment.yaml +++ b/search/chart/templates/deployment.yaml @@ -41,6 +41,8 @@ spec: terminationMessagePath: /dev/termination-log terminationMessagePolicy: File env: + - name: S3_CREDENTIALS_PROVIDER + value: "minio" - name: S3_ENDPOINT_URL value: "http://minio" - name: S3_LOGIN diff --git a/search/search/config.py b/search/search/config.py index bf40a5c22..db6bba644 100644 --- a/search/search/config.py +++ b/search/search/config.py @@ -1,5 +1,5 @@ import pathlib -from typing import List +from typing import List, Optional from dotenv import find_dotenv from pydantic import BaseSettings @@ -30,6 +30,8 @@ class Settings(BaseSettings): s3_login: str s3_pass: str s3_start_path: str + s3_credentials_provider: Optional[str] + s3_prefix: Optional[str] version: str = Field(default_factory=get_version) manifest: str text_pieces_path: str diff --git a/search/search/harvester.py b/search/search/harvester.py index 6ceaff665..d29ff3898 100644 --- a/search/search/harvester.py +++ b/search/search/harvester.py @@ -11,13 +11,43 @@ from search.logger import logger +def convert_bucket_name_if_s3prefix(bucket_name: str) -> str: + if settings.s3_prefix: + return f"{settings.s3_prefix}-{bucket_name}" + else: + return bucket_name + + +class NotConfiguredException(Exception): + pass + + +def create_boto3_config(): + boto3_config = {} + if settings.s3_credentials_provider == "minio": + boto3_config.update( + { + "aws_access_key_id": settings.s3_login, + "aws_secret_access_key": settings.s3_pass, + "endpoint_url": settings.s3_endpoint_url, + } + ) + elif settings.s3_credentials_provider == "aws_iam": + # No additional updates to config needed - boto3 uses env vars + ... + else: + raise NotConfiguredException( + "s3 connection is not properly configured " + "- s3_credentials_provider is not set" + ) + logger.info( + f"S3_Credentials provider - {settings.s3_credentials_provider}") + return boto3_config + + def connect_s3(tenant: str) -> boto3.resource: - s3_resource = boto3.resource( - "s3", - endpoint_url=settings.s3_endpoint_url, - aws_access_key_id=settings.s3_login, - aws_secret_access_key=settings.s3_pass, - ) + boto3_config = create_boto3_config() + s3_resource = boto3.resource("s3", **boto3_config) try: s3_resource.meta.client.head_bucket(Bucket=tenant) except ClientError as err: @@ -80,20 +110,21 @@ def extract_manifest_data( def harvester( tenant: str, job_id: int, file_id: Optional[int] = None ) -> Optional[Iterator[dict]]: - s3 = connect_s3(tenant) + bucket_name = convert_bucket_name_if_s3prefix(tenant) + s3 = connect_s3(bucket_name) if file_id is None: prefix = f"{settings.s3_start_path}/{job_id}" else: prefix = f"{settings.s3_start_path}/{job_id}/{file_id}" - for bucket_object in s3.Bucket(tenant).objects.filter(Prefix=prefix): + for bucket_object in s3.Bucket(bucket_name).objects.filter(Prefix=prefix): if not bucket_object.key.endswith(settings.manifest): continue object_data = bucket_object.get()["Body"].read().decode("utf-8") file_id = bucket_object.key.split("/")[-2] pages_objects = extract_manifest_data( - s3, tenant, job_id, file_id, object_data + s3, bucket_name, job_id, file_id, object_data ) for page_num, text_piece_object in pages_objects.items(): yield from parse_json( diff --git a/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py b/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py new file mode 100644 index 000000000..0a5a8062f --- /dev/null +++ b/taxonomy/alembic/versions/48dc50decbed_add_association_taxonomy_category.py @@ -0,0 +1,47 @@ +"""add association_taxonomy_category + +Revision ID: 48dc50decbed +Revises: bdea8a93cafe +Create Date: 2022-12-02 15:04:55.726594 + +""" +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "48dc50decbed" +down_revision = "bdea8a93cafe" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "association_taxonomy_category", + sa.Column("taxonomy_id", sa.VARCHAR(), nullable=False), + sa.Column("taxonomy_version", sa.Integer(), nullable=False), + sa.Column("category_id", sa.VARCHAR(), nullable=False), + sa.ForeignKeyConstraint( + ["taxonomy_id", "taxonomy_version"], + ["taxonomy.id", "taxonomy.version"], + ), + sa.PrimaryKeyConstraint( + "taxonomy_id", "taxonomy_version", "category_id" + ), + ) + op.drop_column("taxonomy", "category_id") + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "taxonomy", + sa.Column( + "category_id", sa.VARCHAR(), autoincrement=False, nullable=False + ), + ) + op.drop_table("association_taxonomy_category") + # ### end Alembic commands ### diff --git a/taxonomy/app/models.py b/taxonomy/app/models.py index c1dfedef5..42235d52f 100644 --- a/taxonomy/app/models.py +++ b/taxonomy/app/models.py @@ -45,6 +45,26 @@ class AssociationTaxonomyJob(Base): job_id = Column(VARCHAR, primary_key=True) +class AssociationTaxonomyCategory(Base): + __tablename__ = "association_taxonomy_category" + __table_args__ = ( + ForeignKeyConstraint( + ["taxonomy_id", "taxonomy_version"], + ["taxonomy.id", "taxonomy.version"], + ), + ) + taxonomy_id = Column(VARCHAR, primary_key=True) + taxonomy_version = Column(Integer, primary_key=True) + + taxonomy = relationship( + "Taxonomy", + foreign_keys="[AssociationTaxonomyCategory.taxonomy_id, " + "AssociationTaxonomyCategory.taxonomy_version]", + back_populates="categories", + ) + category_id = Column(VARCHAR, primary_key=True) + + class Taxonomy(Base): __tablename__ = "taxonomy" @@ -52,7 +72,10 @@ class Taxonomy(Base): name = Column(VARCHAR, nullable=False) version = Column(Integer, primary_key=True) tenant = Column(VARCHAR, nullable=True) - category_id = Column(VARCHAR, nullable=False) + categories = relationship( + "AssociationTaxonomyCategory", + back_populates="taxonomy", + ) latest = Column(Boolean, nullable=False) jobs = relationship("AssociationTaxonomyJob", back_populates="taxonomy") taxons = relationship("Taxon", back_populates="taxonomy") diff --git a/taxonomy/app/schemas/taxonomy.py b/taxonomy/app/schemas/taxonomy.py index ad97bba8f..7eb2fbb10 100644 --- a/taxonomy/app/schemas/taxonomy.py +++ b/taxonomy/app/schemas/taxonomy.py @@ -5,7 +5,6 @@ class TaxonomyBaseSchema(BaseModel): name: str = Field(..., example="taxonomy_name") - category_id: Optional[str] = Field(None, example="my_category") class TaxonomyInputSchema(TaxonomyBaseSchema): diff --git a/taxonomy/app/taxonomy/resources.py b/taxonomy/app/taxonomy/resources.py index 978622ed3..e923aa16e 100644 --- a/taxonomy/app/taxonomy/resources.py +++ b/taxonomy/app/taxonomy/resources.py @@ -18,8 +18,11 @@ ) from app.tags import TAXONOMY_TAG from app.taxonomy.services import ( + batch_latest_taxonomies, + batch_versioned_taxonomies, + bulk_create_relations_with_categories, + bulk_delete_category_association, create_new_relation_to_job, - create_new_relation_with_category, create_taxonomy_instance, delete_taxonomy_instance, get_latest_taxonomy, @@ -155,31 +158,61 @@ def associate_taxonomy_to_job( @router.post( "/link_category", status_code=status.HTTP_201_CREATED, - response_model=TaxonomyResponseSchema, + response_model=List[CategoryLinkSchema], responses={ 400: {"model": BadRequestErrorSchema}, }, summary="Creates association between taxonomy and category.", ) def associate_taxonomy_to_category( - query: CategoryLinkSchema, + category_links: List[CategoryLinkSchema], session: Session = Depends(get_db), -): - if query.taxonomy_version: - taxonomy = get_taxonomy( - session, (query.taxonomy_id, query.taxonomy_version) - ) - else: - taxonomy = get_latest_taxonomy(session, query.taxonomy_id) - if not taxonomy: +) -> List[CategoryLinkSchema]: + versions = [] + latests = [] + + for category_link in category_links: + if category_link.taxonomy_version: + versions.append(category_link) + else: + latests.append(category_link) + + taxonomies: dict = batch_versioned_taxonomies(session, versions) + taxonomies.update(batch_latest_taxonomies(session, latests)) + + not_found_taxonomies = [ + link.taxonomy_id + for link in versions + latests + if link.taxonomy_id not in taxonomies + ] + if not_found_taxonomies: LOGGER.error( - "associate_taxonomy_to_category get not existing id %s", - query.taxonomy_id, + "associate_taxonomy_to_category get not existing ids %s", + not_found_taxonomies, + ) + raise HTTPException( + status_code=404, + detail="Taxonomy does not exist.", ) - raise HTTPException(status_code=404, detail="Not existing taxonomy") - create_new_relation_with_category(session, taxonomy, query.category_id) - return TaxonomyResponseSchema.from_orm(taxonomy) + bulk_create_relations_with_categories(session, taxonomies, category_links) + return category_links + + +@router.delete( + "/link_category/{category_id}", + status_code=status.HTTP_204_NO_CONTENT, + responses={ + 404: {"model": NotFoundErrorSchema}, + }, + summary="Deletes association between taxonomy and category.", +) +def delete_category_link( + category_id: str = Path(..., example="1"), + session: Session = Depends(get_db), +) -> Response: + bulk_delete_category_association(session, category_id) + return Response(status_code=status.HTTP_204_NO_CONTENT) @router.get( diff --git a/taxonomy/app/taxonomy/services.py b/taxonomy/app/taxonomy/services.py index 944bc442c..1c6caa867 100644 --- a/taxonomy/app/taxonomy/services.py +++ b/taxonomy/app/taxonomy/services.py @@ -1,10 +1,18 @@ from typing import Dict, List, Optional, Tuple, Union -from sqlalchemy import desc +from sqlalchemy import and_, desc, or_ from sqlalchemy.orm import Session -from app.models import AssociationTaxonomyJob, Taxonomy -from app.schemas import TaxonomyBaseSchema, TaxonomyInputSchema +from app.models import ( + AssociationTaxonomyCategory, + AssociationTaxonomyJob, + Taxonomy, +) +from app.schemas import ( + CategoryLinkSchema, + TaxonomyBaseSchema, + TaxonomyInputSchema, +) def create_taxonomy_instance( @@ -101,10 +109,62 @@ def get_taxonomies_by_job_id(session: Session, job_id: str) -> List[Taxonomy]: ) -def create_new_relation_with_category( - session: Session, taxonomy: Taxonomy, category_id: str -) -> Taxonomy: - taxonomy.category_id = category_id +def bulk_create_relations_with_categories( + session: Session, + taxonomies: Dict[str, int], + category_links: List[CategoryLinkSchema], +) -> None: + objects = [ + AssociationTaxonomyCategory( + taxonomy_id=link.taxonomy_id, + taxonomy_version=taxonomies[link.taxonomy_id], + category_id=link.category_id, + ) + for link in category_links + ] + session.bulk_save_objects(objects) + session.commit() + + +def batch_versioned_taxonomies( + session: Session, schemas: List[CategoryLinkSchema] +) -> Dict[str, int]: + taxonomies = session.query(Taxonomy.id, Taxonomy.version).filter( + or_( + *[ + and_( + Taxonomy.id == link.taxonomy_id, + Taxonomy.version == link.taxonomy_version, + ) + for link in schemas + ] + ) + ) + return {id_: version for id_, version in taxonomies.all()} + + +def batch_latest_taxonomies( + session: Session, schemas: List[CategoryLinkSchema] +) -> Dict[str, int]: + taxonomies = session.query(Taxonomy.id, Taxonomy.version).filter( + or_( + *[ + and_( + Taxonomy.id == link.taxonomy_id, + Taxonomy.latest == True, # noqa E712 + ) + for link in schemas + ] + ) + ) + return {id_: version for id_, version in taxonomies.all()} + + +def bulk_delete_category_association( + session: Session, + category_id: str, +) -> None: + session.query(AssociationTaxonomyCategory).filter( + AssociationTaxonomyCategory.category_id == category_id, + ).delete(synchronize_session=False) session.commit() - session.refresh(taxonomy) - return taxonomy diff --git a/taxonomy/chart/templates/deployment.yaml b/taxonomy/chart/templates/deployment.yaml index 051548fce..a66454c6d 100644 --- a/taxonomy/chart/templates/deployment.yaml +++ b/taxonomy/chart/templates/deployment.yaml @@ -4,40 +4,61 @@ metadata: annotations: deployment.kubernetes.io/revision: "1" labels: - app: taxonomy - name: taxonomy + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: progressDeadlineSeconds: 600 - replicas: 1 + replicas: {{ default .Values.global.replicaCount .Values.replicaCount }} revisionHistoryLimit: 10 selector: matchLabels: - app: taxonomy + app: {{ template "svc.name" . }} strategy: - rollingUpdate: - maxSurge: 25% - maxUnavailable: 25% - type: RollingUpdate +{{ .Values.updateStrategy | default .Values.global.updateStrategy | toYaml | indent 4 }} template: metadata: - annotations: - sidecar.istio.io/inject: "false" - creationTimestamp: null labels: - app: taxonomy + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- if .Values.labels }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.global.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + annotations: + {{- if .Values.podAnnotation }} + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else}} + {{- range $key, $value := .Values.global.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} spec: + affinity: +{{ .Values.affinity | default .Values.global.affinity | toYaml | indent 8 }} + nodeSelector: +{{ .Values.nodeSelector | default .Values.global.nodeSelector | toYaml | indent 8 }} + tolerations: +{{ .Values.tolerations | default .Values.global.tolerations | toYaml | indent 8 }} containers: - - image: {{ .Values.image.registry }}/taxonomy:{{ .Values.image.tag }} - imagePullPolicy: IfNotPresent - name: taxonomy + - image: "{{ .Values.imageName }}:{{ default .Values.global.imageTag .Values.imageTag }}" + imagePullPolicy: "{{ default .Values.global.imagePullPolicy .Values.imagePullPolicy }}" + name: {{ template "svc.name" . }} resources: - requests: - memory: "200Mi" - cpu: "200m" - limits: - memory: "1000Mi" - cpu: "400m" +{{ .Values.resources | default .Values.global.resources | toYaml | indent 10 }} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File env: @@ -52,15 +73,21 @@ spec: name: taxonomy key: POSTGRES_PASSWORD - name: POSTGRES_PORT - value: "5432" + value: "{{ default .Values.global.dbPort .Values.dbPort }}" - name: POSTGRES_DB - value: "taxonomy" + value: "{{ .Values.dbName }}" - name: POSTGRES_HOST - value: "postgres-postgresql" + value: "{{ default .Values.global.dbHost .Values.dbHost }}" +{{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 8 }} +{{- end }} command: ["/bin/sh"] args: ["-c", "alembic upgrade head && uvicorn app.main:app --host 0.0.0.0 --port 8080 --root-path /api/v1/taxonomy"] + serviceAccountName: {{ default .Values.global.serviceAccountName .Values.serviceAccountName }} + automountServiceAccountToken: {{ default .Values.global.automountToken .Values.automountToken }} dnsPolicy: ClusterFirst restartPolicy: Always - schedulerName: default-scheduler - securityContext: {} + schedulerName: {{ default .Values.global.schedulerName .Values.schedulerName }} + securityContext: +{{ toYaml .Values.securityContext | indent 8 }} terminationGracePeriodSeconds: 30 diff --git a/taxonomy/chart/templates/mapping.yaml b/taxonomy/chart/templates/mapping.yaml index c193094ba..837b01dbe 100644 --- a/taxonomy/chart/templates/mapping.yaml +++ b/taxonomy/chart/templates/mapping.yaml @@ -1,25 +1,25 @@ apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: taxonomy + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: - host: {{ .Release.Namespace }}.badgerdoc.com + host: {{ default .Values.host .Values.global.host }} connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/taxonomy/ - service: taxonomy + service: {{ template "svc.name" . }} timeout_ms: 30000 --- apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: taxonomy-internal + name: {{ template "svc.name" . }}-internal namespace: {{ .Release.Namespace }} spec: host: ambassador connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/taxonomy/ - service: taxonomy + service: {{ template "svc.name" . }} timeout_ms: 30000 diff --git a/taxonomy/chart/templates/secret.yaml b/taxonomy/chart/templates/secret.yaml new file mode 100644 index 000000000..7d8a1a0d2 --- /dev/null +++ b/taxonomy/chart/templates/secret.yaml @@ -0,0 +1,22 @@ +{{- if .Values.secret.enabled -}} +apiVersion: v1 +kind: Secret +metadata: + name: "{{ template "svc.name" . }}" + namespace: {{ .Release.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ template "svc.name" . }}" + app: "{{ template "svc.name" . }}" +type: Opaque +data: + POSTGRES_USER: {{ .Values.secret.dbuser | b64enc }} + POSTGRES_PASSWORD: {{ .Values.secret.dbpassword | b64enc }} + {{- if .Values.secret.s3user }} + S3_LOGIN: {{ .Values.secret.s3user | b64enc }} + {{- end }} + {{- if .Values.secret.s3password }} + S3_PASS: {{ .Values.secret.s3password | b64enc }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/taxonomy/chart/templates/service.yaml b/taxonomy/chart/templates/service.yaml index 2b18abed6..8fa8eab1d 100644 --- a/taxonomy/chart/templates/service.yaml +++ b/taxonomy/chart/templates/service.yaml @@ -2,18 +2,18 @@ apiVersion: v1 kind: Service metadata: labels: - app: taxonomy - name: taxonomy + app: "{{ template "svc.name" . }}" + name: "{{ template "svc.name" . }}" namespace: {{ .Release.Namespace }} spec: ipFamilies: - IPv4 ipFamilyPolicy: SingleStack ports: - - port: 80 - protocol: TCP - targetPort: 8080 + - port: {{ default .Values.global.servicePort .Values.servicePort }} + protocol: TCP + targetPort: 8080 selector: - app: taxonomy + app: "{{ template "svc.name" . }}" sessionAffinity: None - type: ClusterIP + type: "{{ default .Values.global.serviceType .Values.serviceType }}" \ No newline at end of file diff --git a/annotation/chart/values.yaml b/taxonomy/chart/values-c.yaml similarity index 100% rename from annotation/chart/values.yaml rename to taxonomy/chart/values-c.yaml diff --git a/taxonomy/chart/values.yaml b/taxonomy/chart/values.yaml deleted file mode 100644 index f67f9d9a2..000000000 --- a/taxonomy/chart/values.yaml +++ /dev/null @@ -1,2 +0,0 @@ -image: - registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc diff --git a/taxonomy/tests/conftest.py b/taxonomy/tests/conftest.py index ebb91f919..c0657e7b2 100644 --- a/taxonomy/tests/conftest.py +++ b/taxonomy/tests/conftest.py @@ -79,7 +79,7 @@ def taxonomy_orm_object(taxonomy_input_data) -> Taxonomy: @pytest.fixture def taxonomy_input_data(): - return dict(id=uuid4().hex, category_id=1, name="some_name") + return dict(id=uuid4().hex, name="some_name") @pytest.fixture diff --git a/taxonomy/tests/test_taxonomy_router.py b/taxonomy/tests/test_taxonomy_router.py index a78b5c3bb..b3e60e664 100644 --- a/taxonomy/tests/test_taxonomy_router.py +++ b/taxonomy/tests/test_taxonomy_router.py @@ -10,7 +10,6 @@ def test_create_taxonomy_should_work(overrided_token_client, db_session): # given input_data = { "id": "123", - "category_id": "321", "name": "some_name", } @@ -25,7 +24,6 @@ def test_create_taxonomy_should_work(overrided_token_client, db_session): assert response assert response.status_code == 201 assert response.json()["id"] == input_data["id"] - assert response.json()["category_id"] == input_data["category_id"] assert response.json()["name"] == input_data["name"] assert response.json()["version"] == 1 @@ -35,7 +33,6 @@ def test_create_taxonomy_should_work(overrided_token_client, db_session): assert taxonomy.id == input_data["id"] assert taxonomy.name == input_data["name"] - assert taxonomy.category_id == input_data["category_id"] assert taxonomy.version == 1 assert taxonomy.latest @@ -47,7 +44,6 @@ def test_create_new_taxonomy_with_same_id_should_update_version( # given input_data = { "id": "321", - "category_id": "321", "name": "some_name", } overrided_token_client.post( @@ -73,7 +69,6 @@ def test_create_new_taxonomy_with_same_id_should_update_version( assert new_taxonomy.id == input_data["id"] assert new_taxonomy.name == input_data["name"] - assert new_taxonomy.category_id == input_data["category_id"] assert new_taxonomy.version == 2 assert new_taxonomy.latest @@ -153,17 +148,19 @@ def test_should_associate_taxonomy_to_job( @pytest.mark.integration -def test_should_associate_taxonomy_to_job( +def test_should_associate_taxonomy_to_category( overrided_token_client, prepared_taxonomy_record_in_db: Taxonomy, db_session, ): # given - request_body = { - "taxonomy_id": prepared_taxonomy_record_in_db.id, - "taxonomy_version": prepared_taxonomy_record_in_db.version, - "category_id": "123", - } + request_body = [ + { + "taxonomy_id": prepared_taxonomy_record_in_db.id, + "taxonomy_version": prepared_taxonomy_record_in_db.version, + "category_id": "123", + } + ] # when response = overrided_token_client.post( "/taxonomy/link_category", @@ -175,7 +172,9 @@ def test_should_associate_taxonomy_to_job( assert response.status_code == 201 db_session.refresh(prepared_taxonomy_record_in_db) - assert prepared_taxonomy_record_in_db.category_id == request_body["category_id"] + assert request_body[0]["category_id"] in [ + c.category_id for c in prepared_taxonomy_record_in_db.categories + ] @pytest.mark.integration diff --git a/users/.env b/users/.env index bd7349442..bcac04ff0 100644 --- a/users/.env +++ b/users/.env @@ -21,10 +21,13 @@ KEYCLOAK_ROLE_ADMIN = "admin" KEYCLOAK_USERS_PUBLIC_KEY = -# Minio settings. -MINIO_URI=minio:9000 -MINIO_ACCESS_KEY=minioadmin -MINIO_SECRET_KEY=minioadmin +# S3 settings +S3_CREDENTIALS_PROVIDER=minio +S3_PREFIX= +S3_ENDPOINT=minio:9000 +S3_ACCESS_KEY=minioadmin +S3_SECRET_KEY=minioadmin +AWS_PROFILE= # app settings. ROOT_PATH= diff --git a/users/chart/templates/deployment.yaml b/users/chart/templates/deployment.yaml index cff7c3a06..9bf424c2a 100644 --- a/users/chart/templates/deployment.yaml +++ b/users/chart/templates/deployment.yaml @@ -4,40 +4,61 @@ metadata: annotations: deployment.kubernetes.io/revision: "1" labels: - app: users - name: users + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: progressDeadlineSeconds: 600 - replicas: 1 + replicas: {{ default .Values.global.replicaCount .Values.replicaCount }} revisionHistoryLimit: 10 selector: matchLabels: - app: users + app: {{ template "svc.name" . }} strategy: - rollingUpdate: - maxSurge: 25% - maxUnavailable: 25% - type: RollingUpdate +{{ .Values.updateStrategy | default .Values.global.updateStrategy | toYaml | indent 4 }} template: metadata: - annotations: - sidecar.istio.io/inject: "false" - creationTimestamp: null labels: - app: users + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- if .Values.labels }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.global.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + annotations: + {{- if .Values.podAnnotation }} + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else}} + {{- range $key, $value := .Values.global.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} spec: + affinity: +{{ .Values.affinity | default .Values.global.affinity | toYaml | indent 8 }} + nodeSelector: +{{ .Values.nodeSelector | default .Values.global.nodeSelector | toYaml | indent 8 }} + tolerations: +{{ .Values.tolerations | default .Values.global.tolerations | toYaml | indent 8 }} containers: - - image: {{ .Values.image.registry }}/users:{{ .Values.image.tag }} - imagePullPolicy: IfNotPresent - name: users + - image: "{{ .Values.imageName }}:{{ default .Values.global.imageTag .Values.imageTag }}" + imagePullPolicy: "{{ default .Values.global.imagePullPolicy .Values.imagePullPolicy }}" + name: {{ template "svc.name" . }} resources: - requests: - memory: "100Mi" - cpu: "200m" - limits: - memory: "1200Mi" - cpu: "600m" +{{ .Values.resources | default .Values.global.resources | toYaml | indent 10 }} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File env: @@ -52,49 +73,58 @@ spec: name: users key: POSTGRES_PASSWORD - name: POSTGRES_PORT - value: "5432" + value: "{{ default .Values.global.dbPort .Values.dbPort }}" - name: POSTGRES_DB - value: "users" + value: "{{ .Values.dbName }}" - name: POSTGRES_HOST - value: "postgres-postgresql" - - name: MINIO_URI - value: "minio:80" - - name: MINIO_ACCESS_KEY + value: "{{ default .Values.global.dbHost .Values.dbHost }}" + - name: S3_CREDENTIALS_PROVIDER + value: "{{ default .Values.global.s3CredentialsProvider .Values.s3CredentialsProvider }}" + - name: S3_ENDPOINT + value: "{{ default .Values.global.s3Endpoint .Values.s3Endpoint }}" + {{- if .Values.secret.s3user }} + - name: S3_ACCESS_KEY valueFrom: secretKeyRef: - name: assets - key: MINIO_ACCESS_KEY - - name: MINIO_SECRET_KEY + name: users + key: S3_LOGIN + {{- end }} + {{- if .Values.secret.s3password }} + - name: S3_SECRET_KEY valueFrom: secretKeyRef: - name: assets - key: MINIO_SECRET_KEY + name: users + key: S3_PASS + {{- end }} - name: KEYCLOAK_ENDPOINT - value: "http://bagerdoc-keycloack" + value: "{{ .Values.keycloak.internalUrl }}" + - name: KEYCLOAK_DIRECT_ENDPOINT + value: "{{ .Values.keycloak.externalUrl }}" - name: KEYCLOAK_USERS_PUBLIC_KEY valueFrom: secretKeyRef: - name: users-keycloack + name: users-keycloak key: KEYCLOAK_USERS_PUBLIC_KEY - - name: KEYCLOAK_DIRECT_ENDPOINT - valueFrom: - secretKeyRef: - name: users - key: KEYCLOAK_DIRECT_ENDPOINT_DEV1 - name: BADGERDOC_CLIENT_SECRET valueFrom: secretKeyRef: - name: users - key: BADGERDOC_CLIENT_SECRET_DEV1 + name: users-keycloak + key: BADGERDOC_CLIENT_SECRET - name: ADMIN_CLIENT_SECRET valueFrom: secretKeyRef: - name: users - key: ADMIN_CLIENT_SECRET_DEV1 + name: users-keycloak + key: ADMIN_CLIENT_SECRET +{{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 8 }} +{{- end }} command: ["/bin/sh"] args: ["-c", "uvicorn src.main:app --host 0.0.0.0 --port 8080 --root-path /api/v1/users"] + serviceAccountName: {{ default .Values.global.serviceAccountName .Values.serviceAccountName }} + automountServiceAccountToken: {{ default .Values.global.automountToken .Values.automountToken }} dnsPolicy: ClusterFirst restartPolicy: Always - schedulerName: default-scheduler - securityContext: {} + schedulerName: {{ default .Values.global.schedulerName .Values.schedulerName }} + securityContext: +{{ toYaml .Values.securityContext | indent 8 }} terminationGracePeriodSeconds: 30 diff --git a/users/chart/templates/mapping.yaml b/users/chart/templates/mapping.yaml index e4333e4ab..a59e8b82a 100644 --- a/users/chart/templates/mapping.yaml +++ b/users/chart/templates/mapping.yaml @@ -1,25 +1,25 @@ apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: users + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: - host: {{ .Release.Namespace }}.badgerdoc.com + host: {{ default .Values.host .Values.global.host }} connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/users/ - service: users + service: {{ template "svc.name" . }} timeout_ms: 30000 --- apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: users-internal + name: {{ template "svc.name" . }}-internal namespace: {{ .Release.Namespace }} spec: host: ambassador connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: /api/v1/users/ - service: users + service: {{ template "svc.name" . }} timeout_ms: 30000 diff --git a/users/chart/templates/secret.yaml b/users/chart/templates/secret.yaml index 51e982ccf..985448f5c 100644 --- a/users/chart/templates/secret.yaml +++ b/users/chart/templates/secret.yaml @@ -1,8 +1,10 @@ apiVersion: v1 data: KEYCLOAK_USERS_PUBLIC_KEY: LS0tLS1CRUdJTiBQVUJMSUMgS0VZLS0tLS0KTUlJQklqQU5CZ2txaGtpRzl3MEJBUUVGQUFPQ0FROEFNSUlCQ2dLQ0FRRUFtb2FPcTE4ZU5tYmQ5QkEyQTBiRVZ3TFQ4R2JBY2NMNDZRcXpsbFVvSW02YTZUVE1JQzFsNVNQSUY2RFc5Q0c4cU1XZHdZVjQ5TWx5NXp5ZHZUMk9aNVpSZmJWN0VDelFXVGdLeTFRRHR2WWhNVEZsZkVhU1lxVGI5VGNmeFB6R1IzZGpnU1lzR3RJSFF0cVFwN2plZXkzQi8zUmFzYm81Q1FJWUdxZ0xNS2NEOUErSHZNQnErMENXcllGOVBuem95bCtIV2kySmREajNTWjNLVVdhN0RJRE9GdXdxdE9IQ0ZBRXJNNTNadVJGeStqV3lzalV6ZHRXMkhMM1k3dnRDL1RSY2hsWlJjVS9FWDd2SnBUdU8zT0RRSzgydDZJL2JaUzVTMFJBOEhlNngwUVZXTFpCd2xwSTY2Y0s0SmcvOTBuRHRsdDUyWmxMcnNBTFlIaW5tTHdJREFRQUIKLS0tLS1FTkQgUFVCTElDIEtFWS0tLS0tCg== + BADGERDOC_CLIENT_SECRET: NThmMTRjMzktM2FmYS00YmZlLTliZjktODYyNTBmODU0YTBh + ADMIN_CLIENT_SECRET: MTQ5ZWE4NzEtY2VkNC00NjcyLWE5YmUtM2ZlMzUwMmFmYjNl kind: Secret metadata: - name: users-keycloack + name: users-keycloak namespace: {{ .Release.Namespace }} type: Opaque diff --git a/users/chart/templates/secret2.yaml b/users/chart/templates/secret2.yaml new file mode 100644 index 000000000..7d8a1a0d2 --- /dev/null +++ b/users/chart/templates/secret2.yaml @@ -0,0 +1,22 @@ +{{- if .Values.secret.enabled -}} +apiVersion: v1 +kind: Secret +metadata: + name: "{{ template "svc.name" . }}" + namespace: {{ .Release.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ template "svc.name" . }}" + app: "{{ template "svc.name" . }}" +type: Opaque +data: + POSTGRES_USER: {{ .Values.secret.dbuser | b64enc }} + POSTGRES_PASSWORD: {{ .Values.secret.dbpassword | b64enc }} + {{- if .Values.secret.s3user }} + S3_LOGIN: {{ .Values.secret.s3user | b64enc }} + {{- end }} + {{- if .Values.secret.s3password }} + S3_PASS: {{ .Values.secret.s3password | b64enc }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/users/chart/templates/service.yaml b/users/chart/templates/service.yaml index 54d84a04c..8fa8eab1d 100644 --- a/users/chart/templates/service.yaml +++ b/users/chart/templates/service.yaml @@ -2,18 +2,18 @@ apiVersion: v1 kind: Service metadata: labels: - app: users - name: users + app: "{{ template "svc.name" . }}" + name: "{{ template "svc.name" . }}" namespace: {{ .Release.Namespace }} spec: ipFamilies: - IPv4 ipFamilyPolicy: SingleStack ports: - - port: 80 - protocol: TCP - targetPort: 8080 + - port: {{ default .Values.global.servicePort .Values.servicePort }} + protocol: TCP + targetPort: 8080 selector: - app: users + app: "{{ template "svc.name" . }}" sessionAffinity: None - type: ClusterIP + type: "{{ default .Values.global.serviceType .Values.serviceType }}" \ No newline at end of file diff --git a/users/chart/values-c.yaml b/users/chart/values-c.yaml new file mode 100644 index 000000000..43c75475b --- /dev/null +++ b/users/chart/values-c.yaml @@ -0,0 +1,66 @@ +extraEnvs: [] + +host: example.com + +image: + registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc + tag: latest + +imagePullPolicy: Always + +podAnnotations: + sidecar.istio.io/inject: "false" +labels: + app: users + +replicaCount: 1 + +resources: + requests: + memory: "200Mi" + cpu: "200m" + limits: + memory: "1000Mi" + cpu: "400m" + +rbac: + serviceAccountName: null + automountToken: false + +schedulerName: default-scheduler + +s3: + endpoint: "http://minio" + +db: + host: "postgres-postgresql" + port: "5432" + name: "users" + +keycloak: + externalUrl: "http://example.com" + internalUrl: "http://bagerdoc-keycloack" + +nameOverride: "" + +affinity: {} +nodeSelector: {} +tolerations: [] + +secret: + enabled: true + dbuser: "postgres" + dbpassword: "postgres" + s3user: "serviceuser" + s3password: "12345678" + +securityContext: {} + +servicePort: 80 +serviceType: ClusterIP + +updateStrategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate \ No newline at end of file diff --git a/users/chart/values.yaml b/users/chart/values.yaml deleted file mode 100644 index f67f9d9a2..000000000 --- a/users/chart/values.yaml +++ /dev/null @@ -1,2 +0,0 @@ -image: - registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc diff --git a/users/docker-compose.yml b/users/docker-compose.yml index 9b7bf88bd..04cc96140 100644 --- a/users/docker-compose.yml +++ b/users/docker-compose.yml @@ -32,7 +32,8 @@ services: build: . environment: KEYCLOAK_ENDPOINT: "http://keycloak:8080" - MINIO_URI: "minio:9000" + S3_CREDENTIALS_PROVIDER: "minio" + S3_ENDPOINT: "minio:9000" ports: - "8000:8000" depends_on: diff --git a/users/requirements.txt b/users/requirements.txt index ab74b9e0e..574e6115f 100644 --- a/users/requirements.txt +++ b/users/requirements.txt @@ -3,7 +3,6 @@ pydantic==1.8.2 aiohttp==3.7.4.post0 requests==2.26.0 uvicorn==0.15.0 -pytest==6.2.5 python-dotenv==0.19.1 python-multipart==0.0.5 minio==7.1.0 diff --git a/users/src/config.py b/users/src/config.py index b21beaea7..f56795777 100644 --- a/users/src/config.py +++ b/users/src/config.py @@ -18,10 +18,13 @@ "ADMIN_CLIENT_SECRET", "5aaae332-b24d-45b6-b2ea-f5949f0c95ae" ) -# Minio settings. -MINIO_URI = os.getenv("MINIO_URI", "") -MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "minioadmin") -MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "minioadmin") +# S3 settings +S3_CREDENTIALS_PROVIDER = os.getenv("S3_CREDENTIALS_PROVIDER") +S3_PREFIX = os.getenv("S3_PREFIX", "") +S3_ENDPOINT = os.getenv("S3_ENDPOINT") +S3_ACCESS_KEY = os.getenv("S3_ACCESS_KEY") +S3_SECRET_KEY = os.getenv("S3_SECRET_KEY") +AWS_PROFILE = os.getenv("AWS_PROFILE") # app settings. ROOT_PATH = os.getenv("ROOT_PATH", "") diff --git a/users/src/keycloak/query.py b/users/src/keycloak/query.py index 05e066a62..a5f753f28 100644 --- a/users/src/keycloak/query.py +++ b/users/src/keycloak/query.py @@ -5,7 +5,7 @@ import src.keycloak.resources as resources import src.keycloak.schemas as schemas from fastapi import HTTPException, status -from src.logger import logger +from src import logger class AuthData(TypedDict): @@ -131,8 +131,9 @@ async def get_user(realm: str, token: str, user_id: str) -> schemas.User: url = resources.user_uri.substitute(realm=realm, id=user_id) method = "GET" headers = create_bearer_header(token) - logger.info( - "Sending request to Keycloak REST API url= %s to get user_info by user_id= %s. Deprecating endpoint", + logger.Logger.info( + "Sending request to Keycloak url: %s to get user_info user_id: %s. " + "Deprecating endpoint", url, user_id, ) @@ -145,7 +146,6 @@ async def get_user(realm: str, token: str, user_id: str) -> schemas.User: async def introspect_token(token: str) -> Token_Data: """Introspects token data by sending request to Keycloak REST API""" url = resources.token_introspection_uri.substitute(realm="master") - print(url) method = "POST" headers = create_bearer_header(token) payload = { @@ -153,10 +153,9 @@ async def introspect_token(token: str) -> Token_Data: "client_id": "BadgerDoc", "client_secret": config.BADGERDOC_CLIENT_SECRET, } - logger.info( - f"Sending request to Keycloak REST API url= %s to get user_info by data from token= %s", + logger.Logger.info( + "Sending request to Keycloak url: %s to get user_info", url, - token, ) try: async with aiohttp.request( @@ -172,12 +171,10 @@ async def introspect_token(token: str) -> Token_Data: ) return data_to_return except aiohttp.ClientConnectionError as e: - logger.error( - f"Exception while sending request to Keycloak REST API: %s", e - ) + logger.Logger.error("Exception while sending request to Keycloak: %s", e) raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail=f"Exception while sending request to Keycloak REST API: {e}", + detail=f"Exception while sending request to Keycloak: {e}", ) @@ -279,8 +276,8 @@ async def get_master_realm_auth_data() -> AuthData: "grant_type": "password", } url = resources.token_uri.substitute(realm="master") - logger.info( - f"Sending request to Keycloak REST API url= %s to get admin authentification data with payload= %s", + logger.Logger.info( + "Sending request to Keycloak url: %s to get admin auth data, " "payload: %s", url, payload, ) @@ -298,12 +295,10 @@ async def get_master_realm_auth_data() -> AuthData: return data_to_return except aiohttp.ClientConnectionError as e: - logger.error( - f"Exception while sending request to Keycloak REST API: %s", e - ) + logger.Logger.error("Exception while sending request to Keycloak: %s", e) raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail=f"Exception while sending request to Keycloak REST API: {e}", + detail=f"Exception while sending request to Keycloak: {e}", ) @@ -312,10 +307,9 @@ async def get_identity_providers_data( ) -> Any: """Get all data about Identity Providers set in Keycloak""" headers = {"Authorization": f"Bearer {master_realm_access_token}"} - # url = "http://dev2.gcov.ru/auth/admin/realms/master/identity-provider/instances" url = resources.identity_providers_uri.substitute(realm="master") - logger.info( - "Sending request to Keycloak REST API %s to get identity providers data", + logger.Logger.info( + "Sending request to Keycloak url: %s to get identity providers data", url, ) try: @@ -328,10 +322,8 @@ async def get_identity_providers_data( return await resp.json() except aiohttp.ClientConnectionError as e: - logger.error( - f"Exception while sending request to Keycloak REST API: %s", e - ) + logger.Logger.error("Exception while sending request to Keycloak: %s", e) raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail=f"Exception while sending request to Keycloak REST API: {e}", + detail=f"Exception while sending request to Keycloak: {e}", ) diff --git a/users/src/logger.py b/users/src/logger.py index 6c6a5c9fd..a736eb903 100644 --- a/users/src/logger.py +++ b/users/src/logger.py @@ -1,7 +1,10 @@ import logging -_log_format = f"%(asctime)s - [%(levelname)s] - %(name)s - (%(filename)s).%(funcName)s(%(lineno)d) - %(message)s" +_log_format = ( + "%(asctime)s - [%(levelname)s] - %(name)s - " + "(%(filename)s).%(funcName)s(%(lineno)d) - %(message)s" +) _datefmt = "%d-%b-%y %H:%M:%S" logging.basicConfig(level=logging.INFO, format=_log_format, datefmt=_datefmt) -logger = logging.getLogger(__name__) +Logger = logging.getLogger(__name__) diff --git a/users/src/main.py b/users/src/main.py index f3ea40132..0af871526 100644 --- a/users/src/main.py +++ b/users/src/main.py @@ -6,29 +6,25 @@ import src.keycloak.query as kc_query import src.keycloak.schemas as kc_schemas import src.keycloak.utils as kc_utils -import src.minio_storage as ms -import src.utils as utils from aiohttp.web_exceptions import HTTPException as AIOHTTPException from apscheduler.schedulers.background import BackgroundScheduler from email_validator import EmailNotValidError, validate_email from fastapi import Depends, FastAPI, Header, HTTPException, Query, Request from fastapi.responses import JSONResponse from fastapi.security import OAuth2PasswordRequestForm +from src import s3, utils from src.config import ( KEYCLOAK_ROLE_ADMIN, KEYCLOAK_USERS_PUBLIC_KEY, ROOT_PATH, ) from src.schemas import Users - -# TODO: move response messages to somewhere. -from src.utils import delete_file_after_7_days from tenant_dependency import TenantData, get_tenant_info from urllib3.exceptions import MaxRetryError app = FastAPI(title="users", root_path=ROOT_PATH, version="0.1.2") realm = conf.KEYCLOAK_REALM -minio_client = ms.get_minio_client() +minio_client = s3.get_minio_client() tenant = get_tenant_info( KEYCLOAK_USERS_PUBLIC_KEY, algorithm="RS256", debug=True @@ -173,12 +169,13 @@ async def get_tenants( async def create_tenant( tenant: str = Query(..., regex="^[a-z0-9][a-z0-9\\.\\-]{1,61}[a-z0-9]$"), token: TenantData = Depends(tenant), + bucket: str = Depends(utils.get_bucket_name), current_tenant: Optional[str] = Header(None, alias="X-Current-Tenant"), ) -> Dict[str, str]: """Create new tenant.""" check_authorization(token, KEYCLOAK_ROLE_ADMIN) try: - ms.create_bucket(minio_client, tenant) + s3.create_bucket(minio_client, bucket) except MaxRetryError: raise HTTPException( status_code=503, detail="Cannot connect to the Minio." @@ -243,7 +240,9 @@ async def get_users_by_filter( if filters.get("role") is not None: users_list = await kc_query.get_users_by_role( - token=token.token, realm=realm, role=filters.get("role").value # type: ignore + token=token.token, + realm=realm, + role=filters.get("role").value, # type: ignore ) else: users_list = await kc_query.get_users_v2( @@ -280,5 +279,10 @@ async def get_idp_names_and_SSOauth_links() -> Dict[str, List[Dict[str, str]]]: @app.on_event("startup") def periodic() -> None: scheduler = BackgroundScheduler() - scheduler.add_job(delete_file_after_7_days, "cron", hour="*/1") + scheduler.add_job( + utils.delete_file_after_7_days, + kwargs={"client": minio_client}, + trigger="cron", + hour="*/1", + ) scheduler.start() diff --git a/users/src/minio_storage.py b/users/src/minio_storage.py deleted file mode 100644 index 50251888d..000000000 --- a/users/src/minio_storage.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import Optional - -import src.config as config -from minio import Minio - - -def get_minio_client() -> Optional[Minio]: - """Return Minio client if URI is provided via config.py.""" - if not config.MINIO_URI: - return None - return Minio( - endpoint=config.MINIO_URI, - access_key=config.MINIO_ACCESS_KEY, - secret_key=config.MINIO_SECRET_KEY, - secure=False, - ) - - -def create_bucket( - client: Minio, - bucket_name: str, - location: str = "us-east-1", - object_lock: bool = False, -) -> None: - """Create minio bucket.""" - if not client.bucket_exists(bucket_name): - client.make_bucket(bucket_name, location, object_lock) diff --git a/users/src/s3.py b/users/src/s3.py new file mode 100644 index 000000000..3f138b541 --- /dev/null +++ b/users/src/s3.py @@ -0,0 +1,62 @@ +import enum +from typing import Any, Dict, Optional + +from minio import Minio, credentials +from src import config, logger + + +class S3Providers(str, enum.Enum): + MINIO = "minio" + AWS_IAM = "aws_iam" + AWS_ENV = "aws_env" + AWS_CONF = "aws_config" + + +def get_minio_config( + s3_provider: S3Providers, + endpoint: Optional[str], + access_key: Optional[str], + secret_key: Optional[str], + **kwargs: Optional[str], +) -> Dict[str, Any]: + minio_config = {"endpoint": endpoint, "secure": False} + if s3_provider == S3Providers.MINIO: + minio_config["access_key"] = access_key + minio_config["secret_key"] = secret_key + elif s3_provider == S3Providers.AWS_IAM: + minio_config["credentials"] = credentials.IamAwsProvider() + elif s3_provider == S3Providers.AWS_ENV: + minio_config["credentials"] = credentials.EnvAWSProvider() + elif s3_provider == S3Providers.AWS_CONF: + minio_config["credentials"] = credentials.AWSConfigProvider( + profile=kwargs.get("aws_profile") + ) + return minio_config + + +def get_minio_client() -> Minio: + """Return Minio client if URI is provided via config.py.""" + s3_provider = S3Providers(config.S3_CREDENTIALS_PROVIDER) + logger.Logger.debug("S3_CREDENTIALS_PROVIDER is set to %s", s3_provider) + minio_config = get_minio_config( + s3_provider=s3_provider, + endpoint=config.S3_ENDPOINT, + access_key=config.S3_ACCESS_KEY, + secret_key=config.S3_SECRET_KEY, + aws_profile=config.AWS_PROFILE, + ) + return Minio(**minio_config) + + +def create_bucket( + client: Minio, + bucket_name: str, + location: str = "us-east-1", + object_lock: bool = False, +) -> None: + """Create minio bucket.""" + if not client.bucket_exists(bucket_name): + logger.Logger.debug( + "Creating new bucket, name=%s, location=%s", bucket_name, location + ) + client.make_bucket(bucket_name, location, object_lock) diff --git a/users/src/utils.py b/users/src/utils.py index 38a07b671..d7b980f84 100644 --- a/users/src/utils.py +++ b/users/src/utils.py @@ -1,10 +1,8 @@ from datetime import datetime, timedelta, timezone from typing import Any, Dict, List, Optional -import src.minio_storage as ms -from src.config import KEYCLOAK_ENDPOINT - -minio_client = ms.get_minio_client() +from minio import Minio +from src import config def extract_idp_data_needed( @@ -15,9 +13,11 @@ def extract_idp_data_needed( for alias in IDP_aliases: IDP_info = { "Alias": alias, - "Auth link": f"{KEYCLOAK_ENDPOINT}/auth/realms/master/protocol/openid-connect/auth?" + "Auth link": f"{config.KEYCLOAK_ENDPOINT}" + f"/auth/realms/master/protocol/openid-connect/auth?" f"client_id=BadgerDoc&response_type=token&" - f"redirect_uri={KEYCLOAK_ENDPOINT}/login&kc_idp_hint={alias}", + f"redirect_uri={config.KEYCLOAK_ENDPOINT}" + f"/login&kc_idp_hint={alias}", } IDPs_info.append(IDP_info) @@ -25,17 +25,19 @@ def extract_idp_data_needed( def delete_file_after_7_days( - days: Optional[int] = 7, prefix: Optional[str] = "coco/" + client: Minio, days: Optional[int] = 7, prefix: Optional[str] = "coco/" ) -> None: """Check files from all buckets with input prefix and delete files with old last modified""" - buckets = minio_client.list_buckets() + buckets = client.list_buckets() delta = timedelta(days=days) today = datetime.now(timezone.utc) for bucket in buckets: - files = minio_client.list_objects( - bucket.name, recursive=True, prefix=prefix - ) + files = client.list_objects(bucket.name, recursive=True, prefix=prefix) for file in files: if file.last_modified + delta <= today: - minio_client.remove_object(bucket.name, file.object_name) + client.remove_object(bucket.name, file.object_name) + + +def get_bucket_name(tenant: str) -> str: + return f"{config.S3_PREFIX}-{tenant}" if config.S3_PREFIX else tenant diff --git a/users/tests/test_main.py b/users/tests/test_main.py index e97d4b880..e34462a31 100644 --- a/users/tests/test_main.py +++ b/users/tests/test_main.py @@ -1,18 +1,13 @@ -from unittest.mock import patch, Mock +from contextlib import contextmanager +from unittest.mock import patch + import pytest -from fastapi.testclient import TestClient -from fastapi.security import OAuth2PasswordBearer -from src.main import ( - app, - tenant, - check_authorization, - get_user_info_from_token_introspection, -) -import src.keycloak.schemas as kc_schemas -from tenant_dependency import get_tenant_info, TenantData from fastapi import HTTPException -from contextlib import contextmanager +from fastapi.testclient import TestClient +from tenant_dependency import TenantData +import src.keycloak.schemas as kc_schemas +from src.main import app, check_authorization, tenant client = TestClient(app) @@ -269,7 +264,7 @@ def test_get_tenants_status_code(self, mock_groups): @patch("src.keycloak.query.create_group", return_value=None) -@patch("src.minio_storage.create_bucket", return_value=None) +@patch("src.s3.create_bucket", return_value=None) class TestCreateTenant: def test_create_tenant_body(self, mock_group, mock_bucket): response = client.post("/tenants?tenant=tenant") @@ -818,7 +813,7 @@ def test_get_idp_names_and_SSOauth_links( "Identity Providers Info": [ { "Alias": "EPAM_SSO", - "Auth link": "http://dev2.badgerdoc.com/auth/realms/master/protocol/openid-connect/auth?client_id=BadgerDoc&response_type=token&redirect_uri=http://dev2.badgerdoc.com/login&kc_idp_hint=EPAM_SSO", + "Auth link": "http://dev2.badgerdoc.com/auth/realms/master/protocol/openid-connect/auth?client_id=BadgerDoc&response_type=token&redirect_uri=http://dev2.badgerdoc.com/login&kc_idp_hint=EPAM_SSO", # noqa: E501 } ] } diff --git a/users/tests/test_utils.py b/users/tests/test_utils.py index e86c8c327..04b445465 100644 --- a/users/tests/test_utils.py +++ b/users/tests/test_utils.py @@ -1,4 +1,7 @@ -import src.utils as utils +from unittest.mock import patch + +import pytest +from src import utils def test_extract_idp_data_needed(): @@ -18,10 +21,10 @@ def test_extract_idp_data_needed(): "firstBrokerLoginFlowAlias": "first broker login", "config": { "validateSignature": "true", - "signingCertificate": "MIIFKzCCBBOgAwIBAgITZQAHGmQ7vhbktV0OvQANAAcaZDANBgkqhkiG9w0BAQsFADA/MRMwEQYKCZImiZPyLGQBGRYDY29tMRQwEgYKCZImiZPyLGQBGRYEZXBhbTESMBAGA1UEAxMJSXNzdWluZ0NBMB4XDTIxMDEyMjEyNTMxNFoXDTIzMDEyMjEyNTMxNFowIjEgMB4GA1UEAxMXYWNjZXNzLXN0YWdpbmcuZXBhbS5jb20wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDHV62Dz3FotIJodYXj4O9yxOpog6lK3IgTWPITQhGMsNM1GXmKETD33xN8xXQraQAJN01X7c2TYo8TMmHt4aNr0//I0ketykjqxYbl34mN3L2lG/ieKwO0PATvSi5P/w34e21CrbRdrM3cDqXYZpLln4Mg5EKfxrpgDxFSXMO3eg2G54THOqKtDikwQ58MZi+9m5f50mb68QBzNiwl/+FNea4SDqRw2qQQRZf4VJaTuK88vskbDaawXUclBph2dOS/KgTOIGWceNHj37/v9yrKc3H0MYgLSDrntRsySiqqQgZPkqRxTWG8Em3dEoLUmfXzwQ/rOlgQc7zacJc+nA/pAgMBAAGjggI7MIICNzALBgNVHQ8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwIGCCsGAQUFBwMBMHgGCSqGSIb3DQEJDwRrMGkwDgYIKoZIhvcNAwICAgCAMA4GCCqGSIb3DQMEAgIAgDALBglghkgBZQMEASowCwYJYIZIAWUDBAEtMAsGCWCGSAFlAwQBAjALBglghkgBZQMEAQUwBwYFKw4DAgcwCgYIKoZIhvcNAwcwHQYDVR0OBBYEFFT7J7MbpKx3XSgJT4ECNDRAt19LMCIGA1UdEQQbMBmCF2FjY2Vzcy1zdGFnaW5nLmVwYW0uY29tMB8GA1UdIwQYMBaAFMgdWInjbFrMllgz54ufaSO3FRX+MEAGA1UdHwQ5MDcwNaAzoDGGL2h0dHA6Ly9jYS5lcGFtLmNvbS9jZXJ0ZW5yb2xsL0lzc3VpbmdDQSgxMikuY3JsMIGABggrBgEFBQcBAQR0MHIwRwYIKwYBBQUHMAKGO2h0dHA6Ly9jYS5lcGFtLmNvbS9DZXJ0RW5yb2xsL2NhLmVwYW0uY29tX0lzc3VpbmdDQSgxMykuY3J0MCcGCCsGAQUFBzABhhtodHRwOi8vcm9vdGNhLmVwYW0uY29tL29jc3AwPQYJKwYBBAGCNxUHBDAwLgYmKwYBBAGCNxUIi7drhfuEV4eNmT2Gpb0pgoyZUIEngoefI4f4k1ICAWQCASMwJwYJKwYBBAGCNxUKBBowGDAKBggrBgEFBQcDAjAKBggrBgEFBQcDATANBgkqhkiG9w0BAQsFAAOCAQEAch8BgaLUPxX8yKWEveCgbCjPgZZENUY1YbOcSXv87/v/iHh/wuSBkzIyfYyMRH+pecyYO2ohr02xdBNxXwPUOOWY6ETx4b/eqGs+clp7kgBMfYoIlSx39j4bKxU0gjF0jt1aeLPLj88mj45fIRA3WNue8yRD+T/E+dvxr14cvk7bIA+9LziDGmUnsJpeOacfSSNlsMNGKBv46DpQZ4lydSubnOgAR2MIfJhnTVaISNXzttjSAcpAwZXKPk7LmfuPHobCr/8v2yZZa4rXw0C+6qPCJSlSyO/fB84KlgnsHlU7RFFbZ4kzlMEi4FGmgKohHU080s6/1MvEQWsgZvuSdw==,MIICmTCCAYECBgFt93DFLjANBgkqhkiG9w0BAQsFADAQMQ4wDAYDVQQDDAVwbHVzeDAeFw0xOTEwMjMwNzA1MjVaFw0yOTEwMjMwNzA3MDVaMBAxDjAMBgNVBAMMBXBsdXN4MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAr2ul4mwYBN/h0yxZG6OGGrusOmoW33IwLiHrw26AL8/r9xtG5N9GsHLd+29zviTbzxBPQdYG5s22jpngA/QEDWtxm5Qrbqeu2wSr56aXfeoyfbyBHIMnlh46gB/N6vSuD/4hR2VJrY/UayzbTdhENMP3gpsdiV4wu/Ttjz51KGcdivjChCcjn8W9Yc8r3kHPr7AB9+vde4znWSEeXNBk8yfSdNI/HeAxAnBXzMcaTYKaQJjtpFIKnSlhGdE9X4erisJlNvTv0Wx3/6RSeHOqGifMEQVUsDkCsLeOec++XdfGWkpO98vCr6fXwg1i4/x7CDa56D37GQkGPiR6g/fqEQIDAQABMA0GCSqGSIb3DQEBCwUAA4IBAQCJItbwx4MGGG+mnwxCO4fsJmL/igBYOAOlOR4mjUzSuqlYCVwZYgC+URztuTd6fhAu5ndOTKe66Qk4yT9VMBXKkmaAheyUWdUyxKkoWzMf9JrQUtb+T0q2WtSBtz9naDZJrzuMwo7wLjzpdD0dA4ciQ7W/yylNR+QvgZPJav5w7RYV7GkXmmHkNYPl17gW3CQbXW1Gm4BHdExUky5S2zN99dzMuVKB+QCO9pNEnyM2tA1boPahJPIO2xxZIkTCE6m4wqeVs5oe3PNP+61XRniQMyC5NcCtUX7yxUmqe9HSR0f7vYl/0nlhNnEN8Xvmn2rk9xbFOghHwV/sHTtOjXKU", + "signingCertificate": "MIIFKzCCBBOgAwIBAgITZQAHGmQ7vhbktV0OvQANAAcaZDANBgkqhkiG9w0BAQsFADA/MRMwEQYKCZImiZPyLGQBGRYDY29tMRQwEgYKCZImiZPyLGQBGRYEZXBhbTESMBAGA1UEAxMJSXNzdWluZ0NBMB4XDTIxMDEyMjEyNTMxNFoXDTIzMDEyMjEyNTMxNFowIjEgMB4GA1UEAxMXYWNjZXNzLXN0YWdpbmcuZXBhbS5jb20wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDHV62Dz3FotIJodYXj4O9yxOpog6lK3IgTWPITQhGMsNM1GXmKETD33xN8xXQraQAJN01X7c2TYo8TMmHt4aNr0//I0ketykjqxYbl34mN3L2lG/ieKwO0PATvSi5P/w34e21CrbRdrM3cDqXYZpLln4Mg5EKfxrpgDxFSXMO3eg2G54THOqKtDikwQ58MZi+9m5f50mb68QBzNiwl/+FNea4SDqRw2qQQRZf4VJaTuK88vskbDaawXUclBph2dOS/KgTOIGWceNHj37/v9yrKc3H0MYgLSDrntRsySiqqQgZPkqRxTWG8Em3dEoLUmfXzwQ/rOlgQc7zacJc+nA/pAgMBAAGjggI7MIICNzALBgNVHQ8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwIGCCsGAQUFBwMBMHgGCSqGSIb3DQEJDwRrMGkwDgYIKoZIhvcNAwICAgCAMA4GCCqGSIb3DQMEAgIAgDALBglghkgBZQMEASowCwYJYIZIAWUDBAEtMAsGCWCGSAFlAwQBAjALBglghkgBZQMEAQUwBwYFKw4DAgcwCgYIKoZIhvcNAwcwHQYDVR0OBBYEFFT7J7MbpKx3XSgJT4ECNDRAt19LMCIGA1UdEQQbMBmCF2FjY2Vzcy1zdGFnaW5nLmVwYW0uY29tMB8GA1UdIwQYMBaAFMgdWInjbFrMllgz54ufaSO3FRX+MEAGA1UdHwQ5MDcwNaAzoDGGL2h0dHA6Ly9jYS5lcGFtLmNvbS9jZXJ0ZW5yb2xsL0lzc3VpbmdDQSgxMikuY3JsMIGABggrBgEFBQcBAQR0MHIwRwYIKwYBBQUHMAKGO2h0dHA6Ly9jYS5lcGFtLmNvbS9DZXJ0RW5yb2xsL2NhLmVwYW0uY29tX0lzc3VpbmdDQSgxMykuY3J0MCcGCCsGAQUFBzABhhtodHRwOi8vcm9vdGNhLmVwYW0uY29tL29jc3AwPQYJKwYBBAGCNxUHBDAwLgYmKwYBBAGCNxUIi7drhfuEV4eNmT2Gpb0pgoyZUIEngoefI4f4k1ICAWQCASMwJwYJKwYBBAGCNxUKBBowGDAKBggrBgEFBQcDAjAKBggrBgEFBQcDATANBgkqhkiG9w0BAQsFAAOCAQEAch8BgaLUPxX8yKWEveCgbCjPgZZENUY1YbOcSXv87/v/iHh/wuSBkzIyfYyMRH+pecyYO2ohr02xdBNxXwPUOOWY6ETx4b/eqGs+clp7kgBMfYoIlSx39j4bKxU0gjF0jt1aeLPLj88mj45fIRA3WNue8yRD+T/E+dvxr14cvk7bIA+9LziDGmUnsJpeOacfSSNlsMNGKBv46DpQZ4lydSubnOgAR2MIfJhnTVaISNXzttjSAcpAwZXKPk7LmfuPHobCr/8v2yZZa4rXw0C+6qPCJSlSyO/fB84KlgnsHlU7RFFbZ4kzlMEi4FGmgKohHU080s6/1MvEQWsgZvuSdw==,MIICmTCCAYECBgFt93DFLjANBgkqhkiG9w0BAQsFADAQMQ4wDAYDVQQDDAVwbHVzeDAeFw0xOTEwMjMwNzA1MjVaFw0yOTEwMjMwNzA3MDVaMBAxDjAMBgNVBAMMBXBsdXN4MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAr2ul4mwYBN/h0yxZG6OGGrusOmoW33IwLiHrw26AL8/r9xtG5N9GsHLd+29zviTbzxBPQdYG5s22jpngA/QEDWtxm5Qrbqeu2wSr56aXfeoyfbyBHIMnlh46gB/N6vSuD/4hR2VJrY/UayzbTdhENMP3gpsdiV4wu/Ttjz51KGcdivjChCcjn8W9Yc8r3kHPr7AB9+vde4znWSEeXNBk8yfSdNI/HeAxAnBXzMcaTYKaQJjtpFIKnSlhGdE9X4erisJlNvTv0Wx3/6RSeHOqGifMEQVUsDkCsLeOec++XdfGWkpO98vCr6fXwg1i4/x7CDa56D37GQkGPiR6g/fqEQIDAQABMA0GCSqGSIb3DQEBCwUAA4IBAQCJItbwx4MGGG+mnwxCO4fsJmL/igBYOAOlOR4mjUzSuqlYCVwZYgC+URztuTd6fhAu5ndOTKe66Qk4yT9VMBXKkmaAheyUWdUyxKkoWzMf9JrQUtb+T0q2WtSBtz9naDZJrzuMwo7wLjzpdD0dA4ciQ7W/yylNR+QvgZPJav5w7RYV7GkXmmHkNYPl17gW3CQbXW1Gm4BHdExUky5S2zN99dzMuVKB+QCO9pNEnyM2tA1boPahJPIO2xxZIkTCE6m4wqeVs5oe3PNP+61XRniQMyC5NcCtUX7yxUmqe9HSR0f7vYl/0nlhNnEN8Xvmn2rk9xbFOghHwV/sHTtOjXKU", # noqa: E501 "postBindingLogout": "true", "postBindingResponse": "true", - "nameIDPolicyFormat": "urn:oasis:names:tc:SAML:1.1:nameid-format:emailAddress", + "nameIDPolicyFormat": "urn:oasis:names:tc:SAML:1.1:nameid-format:emailAddress", # noqa: E501 "entityId": "http://dev2.badgerdoc.com/auth/realms/master", "xmlSigKeyInfoKeyNameTransformer": "KEY_ID", "signatureAlgorithm": "RSA_SHA256", @@ -32,7 +35,7 @@ def test_extract_idp_data_needed(): "authnContextComparisonType": "exact", "postBindingAuthnRequest": "true", "wantAuthnRequestsSigned": "true", - "singleSignOnServiceUrl": "https://access-staging.epam.com/auth/realms/plusx/protocol/saml", + "singleSignOnServiceUrl": "https://access-staging.epam.com/auth/realms/plusx/protocol/saml", # noqa: E501 "addExtensionsElementWithKeyInfo": "false", "principalType": "SUBJECT", }, @@ -41,6 +44,14 @@ def test_extract_idp_data_needed(): assert utils.extract_idp_data_needed(mocked_data_to_convert) == [ { "Alias": "EPAM_SSO", - "Auth link": "http://dev2.badgerdoc.com/auth/realms/master/protocol/openid-connect/auth?client_id=BadgerDoc&response_type=token&redirect_uri=http://dev2.badgerdoc.com/login&kc_idp_hint=EPAM_SSO", + "Auth link": "http://dev2.badgerdoc.com/auth/realms/master/protocol/openid-connect/auth?client_id=BadgerDoc&response_type=token&redirect_uri=http://dev2.badgerdoc.com/login&kc_idp_hint=EPAM_SSO", # noqa: E501 } ] + + +@pytest.mark.parametrize( + ("prefix", "expected"), (("", "tenant"), ("prefix", "prefix-tenant")) +) +def test_bucket_dependency(prefix: str, expected: str) -> None: + with patch("src.config.S3_PREFIX", prefix): + assert utils.get_bucket_name("tenant") == expected diff --git a/web/chart/templates/deployment.yaml b/web/chart/templates/deployment.yaml index 87c202c77..182ec550c 100644 --- a/web/chart/templates/deployment.yaml +++ b/web/chart/templates/deployment.yaml @@ -4,38 +4,66 @@ metadata: annotations: deployment.kubernetes.io/revision: "1" labels: - app: badgerdoc-ui - name: badgerdoc-ui + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: progressDeadlineSeconds: 600 - replicas: 1 + replicas: {{ default .Values.global.replicaCount .Values.replicaCount }} revisionHistoryLimit: 10 selector: matchLabels: - app: badgerdoc-ui + app: {{ template "svc.name" . }} strategy: - rollingUpdate: - maxSurge: 25% - maxUnavailable: 25% - type: RollingUpdate +{{ .Values.updateStrategy | default .Values.global.updateStrategy | toYaml | indent 4 }} template: metadata: - annotations: - sidecar.istio.io/inject: "false" - creationTimestamp: null labels: - app: badgerdoc-ui + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "svc.name" . }}" + {{- if .Values.labels }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.global.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + annotations: + {{- if .Values.podAnnotation }} + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else}} + {{- range $key, $value := .Values.global.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} spec: + affinity: +{{ .Values.affinity | default .Values.global.affinity | toYaml | indent 8 }} + nodeSelector: +{{ .Values.nodeSelector | default .Values.global.nodeSelector | toYaml | indent 8 }} + tolerations: +{{ .Values.tolerations | default .Values.global.tolerations | toYaml | indent 8 }} containers: - - image: {{ .Values.image.registry }}/badgerdoc_ui:{{ .Values.image.tag }} - imagePullPolicy: IfNotPresent - name: badgerdoc-ui - resources: {} + - image: "{{ .Values.imageName }}:{{ default .Values.global.imageTag .Values.imageTag }}" + imagePullPolicy: "{{ default .Values.global.imagePullPolicy .Values.imagePullPolicy }}" + name: {{ template "svc.name" . }} + resources: +{{ .Values.resources | default .Values.global.resources | toYaml | indent 10 }} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File dnsPolicy: ClusterFirst restartPolicy: Always - schedulerName: default-scheduler - securityContext: {} + schedulerName: {{ default .Values.global.schedulerName .Values.schedulerName }} + securityContext: +{{ toYaml .Values.securityContext | indent 8 }} terminationGracePeriodSeconds: 30 diff --git a/web/chart/templates/mapping.yaml b/web/chart/templates/mapping.yaml index 17f546b2c..c84a262fe 100644 --- a/web/chart/templates/mapping.yaml +++ b/web/chart/templates/mapping.yaml @@ -1,14 +1,14 @@ apiVersion: getambassador.io/v2 kind: Mapping metadata: - name: badgerdoc-ui + name: {{ template "svc.name" . }} namespace: {{ .Release.Namespace }} spec: - host: {{ .Release.Namespace }}.gcov.ru + host: {{ default .Values.host .Values.global.host }} connect_timeout_ms: 30000 idle_timeout_ms: 50000 prefix: / - service: badgerdoc-ui:80 + service: {{ template "svc.name" . }}:80 timeout_ms: 30000 keepalive: interval: 10 diff --git a/web/chart/templates/service.yaml b/web/chart/templates/service.yaml index 3e51d8d97..5f9a213ee 100644 --- a/web/chart/templates/service.yaml +++ b/web/chart/templates/service.yaml @@ -2,19 +2,18 @@ apiVersion: v1 kind: Service metadata: labels: - app: badgerdoc-ui - name: badgerdoc-ui + app: "{{ template "svc.name" . }}" + name: "{{ template "svc.name" . }}" namespace: {{ .Release.Namespace }} spec: ipFamilies: - IPv4 ipFamilyPolicy: SingleStack ports: - - port: 80 - protocol: TCP - targetPort: 3000 + - port: {{ default .Values.global.servicePort .Values.servicePort }} + protocol: TCP + targetPort: 3000 selector: - app: badgerdoc-ui + app: "{{ template "svc.name" . }}" sessionAffinity: None - type: ClusterIP - + type: "{{ default .Values.global.serviceType .Values.serviceType }}" \ No newline at end of file diff --git a/web/chart/values-c.yaml b/web/chart/values-c.yaml new file mode 100644 index 000000000..162ae40a8 --- /dev/null +++ b/web/chart/values-c.yaml @@ -0,0 +1,35 @@ +extraEnvs: [] + +image: + registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc + tag: latest + +imagePullPolicy: Always + +podAnnotations: + sidecar.istio.io/inject: "false" +labels: + app: badgerdoc-ui + +replicaCount: 1 + +resources: {} + +schedulerName: default-scheduler + +nameOverride: "" + +affinity: {} +nodeSelector: {} +tolerations: [] + +securityContext: {} + +servicePort: 80 +serviceType: ClusterIP + +updateStrategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate \ No newline at end of file diff --git a/web/chart/values.yaml b/web/chart/values.yaml deleted file mode 100644 index f67f9d9a2..000000000 --- a/web/chart/values.yaml +++ /dev/null @@ -1,2 +0,0 @@ -image: - registry: 818863528939.dkr.ecr.eu-central-1.amazonaws.com/badgerdoc diff --git a/web/src/components/dataset/dataset-choose-form.tsx b/web/src/components/dataset/dataset-choose-form.tsx index 0f355da79..6f1060706 100644 --- a/web/src/components/dataset/dataset-choose-form.tsx +++ b/web/src/components/dataset/dataset-choose-form.tsx @@ -68,7 +68,7 @@ export const DatasetChooseForm: FC = ({ onChooseDataset, ...modalProps } dataSource={dataSource} title={(

Datasets

) as unknown as HTMLElement} dataset={selectedDataset} - > + /> diff --git a/web/src/components/dataset/dataset-row/dataset-row.tsx b/web/src/components/dataset/dataset-row/dataset-row.tsx index 9ac6037a6..315474602 100644 --- a/web/src/components/dataset/dataset-row/dataset-row.tsx +++ b/web/src/components/dataset/dataset-row/dataset-row.tsx @@ -1,4 +1,4 @@ -import { Button, FlexRow, LinkButton, FlexSpacer, ErrorNotification, Text } from '@epam/loveship'; +import { FlexRow, LinkButton, FlexSpacer, ErrorNotification, Text } from '@epam/loveship'; import { useUuiContext, INotification } from '@epam/uui'; import { useDeleteDatasetMutation } from 'api/hooks/datasets'; diff --git a/web/src/connectors/documents-card-connector/documents-card-connector.tsx b/web/src/connectors/documents-card-connector/documents-card-connector.tsx index 57ec523ef..d8d863f30 100644 --- a/web/src/connectors/documents-card-connector/documents-card-connector.tsx +++ b/web/src/connectors/documents-card-connector/documents-card-connector.tsx @@ -8,11 +8,11 @@ import { Operators } from '../../api/typings'; import { Job } from '../../api/typings/jobs'; import { DocumentsSearch } from 'shared/contexts/documents-search'; -type DocumentsTableConnectorProps = { +type DocumentsCardConnectorProps = { onFilesSelect?: (files: number[]) => void; }; -export const DocumentsCardConnector: FC = ({ onFilesSelect }) => { +export const DocumentsCardConnector: FC = ({ onFilesSelect }) => { const { pageConfig, onPageChange, totalCount, onTotalCountChange } = usePageTable('category'); const { query, facetFilter, documentsSort } = useContext(DocumentsSearch); const [jobs, setJobs] = useState(); diff --git a/web/src/connectors/documents-sidebar-connector/documents-sidebar-connector.tsx b/web/src/connectors/documents-sidebar-connector/documents-sidebar-connector.tsx index 155983d53..fbea564f7 100644 --- a/web/src/connectors/documents-sidebar-connector/documents-sidebar-connector.tsx +++ b/web/src/connectors/documents-sidebar-connector/documents-sidebar-connector.tsx @@ -6,7 +6,6 @@ import styles from './documents-sidebar-connector.module.scss'; import { Filter, PagedResponse, - Pipeline, QueryHookParamsType, QueryHookType, SortingDirection @@ -41,14 +40,14 @@ export const DocumentsSidebarConnector = ({ filters }: DocumentsSidebarConnectorProps) => { const [searchText, setSearchText] = useState(''); - const [sortConfig, setSortConfig] = useState({ - field: sortField, - direction: SortingDirection.ASC - }); const [page, setPageNum] = useState(1); const [items, setItems] = useState([]); const [canLoad, setCanLoad] = useState(false); + const sortConfig = { + field: sortField, + direction: SortingDirection.ASC + }; const { data } = useEntitiesHook({ searchText, sortConfig, page, size, filters }, {}); useEffect(() => { diff --git a/web/src/connectors/documents-table-connector/documents-table-connector.tsx b/web/src/connectors/documents-table-connector/documents-table-connector.tsx index ec81ce17b..6ea0cbdeb 100644 --- a/web/src/connectors/documents-table-connector/documents-table-connector.tsx +++ b/web/src/connectors/documents-table-connector/documents-table-connector.tsx @@ -1,4 +1,4 @@ -import React, { useEffect, useMemo, useRef, useState, useContext, Fragment } from 'react'; +import React, { useEffect, useMemo, useRef, useState, useContext } from 'react'; import styles from './documents-table-connector.module.scss'; import { Button, @@ -34,7 +34,7 @@ import { ReactComponent as DeleteIcon } from '@epam/assets/icons/common/action-d import { DatasetChooseForm, DatasetWithFiles } from '../../components'; import { getError } from '../../shared/helpers/get-error'; import { useAddFilesToDatasetMutation } from '../../api/hooks/datasets'; -import { getFiltersFromStorage, saveFiltersToStorage } from '../../shared/helpers/set-filters'; +import { saveFiltersToStorage } from '../../shared/helpers/set-filters'; type DocumentsTableConnectorProps = { dataset?: Dataset | null | undefined; @@ -107,9 +107,8 @@ export const DocumentsTableConnector: React.FC = ( }, [selectedFiles]); useEffect(() => { - const localFilters = getFiltersFromStorage('documents'); - if (localFilters) setFilters(localFilters); - }, []); + onTableValueChange({ ...tableValue, filter: dataset ? {} : undefined }); + }, [dataset]); useEffect(() => { let filtersToSet: Filter[] = []; @@ -145,14 +144,24 @@ export const DocumentsTableConnector: React.FC = ( operator: Operators.EQ, value: dataset.id }); + saveFiltersToStorage(filtersToSet, 'documents'); + setFilters(filtersToSet); } if (tableValue.filter) { saveFiltersToStorage(filtersToSet, 'documents'); - setFilters(filtersToSet); } }, [tableValue.filter, dataset]); - const { data: files, isFetching } = useDocuments( + + useEffect(() => { + refetch(); + }, [filters]); + + const { + data: files, + isFetching, + refetch + } = useDocuments( { page: pageConfig.page, size: pageConfig.pageSize, @@ -187,11 +196,6 @@ export const DocumentsTableConnector: React.FC = ( filters ); - const getCardSort = () => ({ - direction: 'desc', - field: documentsSort - }); - const view = dataSource.useView(tableValue, onTableValueChange, { getRowOptions: () => ({ checkbox: { isVisible: true }, diff --git a/web/src/connectors/login-connector/login-connector.tsx b/web/src/connectors/login-connector/login-connector.tsx index 26c819065..eb8e118cb 100644 --- a/web/src/connectors/login-connector/login-connector.tsx +++ b/web/src/connectors/login-connector/login-connector.tsx @@ -66,7 +66,7 @@ export const LoginConnector = ({ onSuccess = noop, onError = noop }: LoginConnec -