Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
7692922
feat: add azure blob storage support
khyurri Jul 25, 2024
2c45004
feat: add azure blob storage support to badgerdoc_storage
khyurri Jul 26, 2024
66ba7aa
test(annotation): cover row to dict function (#887)
djo753 Aug 12, 2024
19824f0
test(annotation): cover main.py functions (#888)
djo753 Aug 12, 2024
e607abe
test(jobs): cover services.py functions (#892)
Negoje Aug 13, 2024
dcb72a8
test: Add parameterized test cases for validate_user_actions function…
BogicevicMilan Aug 13, 2024
20dd6c8
test(annotation): cover construct_annotated_doc (#896)
djo753 Aug 14, 2024
f274af4
test(annotation): cover tasks/services.py (#900)
BogicevicMilan Aug 14, 2024
22a4d2c
test(annotation): cover jobs.services functions (#904)
Negoje Aug 19, 2024
d27615f
test(annotation): cover upload json and pages to minio (#897)
djo753 Aug 19, 2024
d02df0e
test(annotation): cover distribution/main.py (#893)
UkiStefanovic Aug 19, 2024
b45d1db
test(annotation): cover update_pages_array and check_if_kafka_message…
djo753 Aug 20, 2024
b1380fb
test(annotation): cover mark validated pages (#902)
djo753 Aug 22, 2024
8921fc4
test(annotation): cover jobs.services.py functions (#907)
Negoje Aug 22, 2024
0264d90
test(annotation): cover distribute_tasks_extensively (#909)
UkiStefanovic Aug 22, 2024
15e0dee
test(annotation): cover load functions (#908)
djo753 Aug 23, 2024
7f8b995
test(annotation): cover check null fields (#912)
djo753 Aug 23, 2024
2d103f0
test(annotation): cover accumulate pages info (#913)
djo753 Aug 23, 2024
988df96
feat: remove convert microservice
khyurri Aug 23, 2024
49f5e5c
test(annotation): cover construct document links (#914)
djo753 Aug 23, 2024
cd4739d
test(annotation): cover distribute_whole_files (#918)
UkiStefanovic Aug 23, 2024
f6e9e2b
test(annotation): cover create_tasks (#919)
UkiStefanovic Aug 23, 2024
96395cd
test(annotation): cover distribute (#916)
UkiStefanovic Aug 23, 2024
61046ab
test(annotation): cover jobs.services.py functions (#917)
Negoje Aug 23, 2024
2af9e95
test(annotation): cover module distribution (#920)
UkiStefanovic Aug 26, 2024
5f19b33
refactor(annotation): code from PR 922 (#939)
BogicevicMilan Aug 27, 2024
56ee49c
test(annotation): cover tasks status validation functions (#925)
BogicevicMilan Aug 27, 2024
5d443ab
test(annotation): cover jobs.services.py functions (#924)
Negoje Aug 27, 2024
bab4281
test(annotation): agreement metrics (#945)
BogicevicMilan Aug 27, 2024
db1e590
test(annotation): cover accum_annotation functions (#929)
BogicevicMilan Aug 27, 2024
daaec46
test(annotation): cover id and annotation tasks (#930)
BogicevicMilan Aug 27, 2024
4b241c3
test(annotation): cover ids and links functions (#931)
BogicevicMilan Aug 28, 2024
70839ef
test(annotation): cover common objs functions (#932)
BogicevicMilan Aug 28, 2024
705b55a
test(annotation): cover tasks without ids (#933)
BogicevicMilan Aug 29, 2024
f5121ba
remove convert from docker-compose
khyurri Aug 29, 2024
691f2cc
test(annotation): validation function (#949)
MilanBogicevic8 Aug 29, 2024
3e09815
test(annotation): task annotation pages (#951)
MilanBogicevic8 Aug 29, 2024
18c461a
fix(tasks): add LRU class, remove lru-dict dependency (#940)
djo753 Aug 30, 2024
ea74f02
test(annotation): csv and lru functions (#953)
MilanBogicevic8 Aug 30, 2024
a15fc81
fix(annotation): remove kafka dependency (#943)
UkiStefanovic Aug 30, 2024
39ce681
fix(annotation): rewrite jobs_communication sync to async http commun…
Negoje Aug 30, 2024
219c9e8
feat: Add datasets tab with new table (#944)
aazaliyaa Sep 2, 2024
bf182b0
feat: add ci to badgerdoc_storage (#946)
khyurri Sep 2, 2024
3ef0c39
doc: add minio and azure blob storage configuration documentation
khyurri Sep 2, 2024
4a319b9
doc: remove Set up Azure Blob Storage section
khyurri Sep 2, 2024
c5be26e
feat: rename ci workflows and jobs (#956)
khyurri Sep 3, 2024
2e8902e
feat: add badgerdoc common ci (#957)
khyurri Sep 3, 2024
db71173
feat: badgerdoc ci build only in case of PR into main (#958)
khyurri Sep 3, 2024
c515181
feat: jobs add datasets (#955)
ervandagadzhanyan Sep 3, 2024
631043e
fix: annotation test hotfix (#959)
khyurri Sep 4, 2024
5b569a1
fix: fix KeyError (#960)
ervandagadzhanyan Sep 5, 2024
c5325da
fix: content type blob storage hotfix (#962)
khyurri Sep 5, 2024
9f69e54
feat: editable document label. (#961)
aazaliyaa Sep 5, 2024
e6fa4b1
fix error handling in files_data_to_pipeline_arg
ervandagadzhanyan Sep 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 26 additions & 30 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# !!!! Important !!!!
# This .env file is for running local installation only

################################################
# General configuration
################################################

LOG_LEVEL=DEBUG

Expand All @@ -13,47 +12,43 @@ POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DB=badgerdoc

# S3 configuration
# You should repeat aws creds in both of sections
# because minio lib doesn't use env vars

S3_PROVIDER=minio
################################################
# Storage provider configuration
################################################

# Boto configuration
# Can be one of: minio, azure, s3
STORAGE_PROVIDER=minio

AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_DEFAULT_REGION=
# Minio configuration
# This MINIO_PUBLIC_HOST must be accessible from your browser
MINIO_PUBLIC_HOST=http://badgerdoc-minio:9000

# Minio dev configuration
# Azure Blob Storage Configuration
AZURE_BLOB_STORAGE_CONNECTION_STRING=

S3_SECURE=false
AWS_REGION=
# S3 configuration

# TODO: We need to unify this configuration, boto3 requires with http, Minio without
# TODO: DEPRECATED S3_ENDPOINT_URL
S3_ENDPOINT_URL=http://badgerdoc-minio:9000
S3_ENDPOINT=badgerdoc-minio:9000
S3_ACCESS_KEY=minioadmin
S3_SECRET_KEY=minioadmin
S3_PREFIX=
S3_REGION=
S3_SECURE=false

MINIO_DEFAULT_BUCKETS=local

# KeyCloak dev configuration
# KeyCloak
KEYCLOAK_HOST=http://badgerdoc-keycloak:8080

KEYCLOAK_SYSTEM_USER_CLIENT=badgerdoc-internal
KEYCLOAK_SYSTEM_USER_SECRET=

KEYCLOAK_USER=admin
KEYCLOAK_PASSWORD=admin


# Gottenberg dev configuration
GOTENBERG_HOST=gotenberg

################################################
# Services
################################################

ASSETS_SERVICE_SCHEME=http
ASSETS_SERVICE_HOST=badgerdoc-assets
Expand All @@ -62,13 +57,14 @@ ASSETS_SERVICE_PORT=8080
ANNOTATION_SERVICE_SCHEME=http
ANNOTATION_SERVICE_HOST=badgerdoc-annotation
ANNOTATION_SERVICE_PORT=8080
AGREEMENT_SCORE_SERVICE_HOST=localhost:5000 # TODO: add prefix ANNOTATION_

JOBS_SERVICE_SCHEME=http
JOBS_SERVICE_HOST=badgerdoc-jobs
JOBS_SERVICE_PORT=8080
JOBS_RUN_PIPELINES_WITH_SIGNED_URL=False
# pre signed urls expire in 48 hours
S3_PRE_SIGNED_EXPIRES_HOURS=48
JOBS_SIGNED_URL_ENABLED=False
JOBS_SIGNED_URL_TTL=12
JOBS_SIGNED_URL_KEY_NAME=

TAXONOMY_SERVICE_SCHEME=http
TAXONOMY_SERVICE_HOST=badgerdoc-taxonomy
Expand All @@ -91,11 +87,12 @@ USERS_SERVICE_PORT=8080
# Web configuration

WEB_CORS=*
KAFKA_BOOTSTRAP_SERVER=badgerdoc-kafka:9092 # TODO: remove port
KAFKA_SEARCH_TOPIC=search
AGREEMENT_SCORE_SERVICE_HOST=localhost:5000 # TODO: remove port
MAX_REQ_SIZE=100M

################################################
# Pipelines
################################################

# Airflow configuration

AIRFLOW_ENABLED=True
Expand All @@ -106,7 +103,6 @@ AIRFLOW_SERVICE_HOST=airflow-webserver
AIRFLOW_SERVICE_PORT=8080
AIRFLOW_SERVICE_PATH_PREFIX=/api/v1


# Databricks configuration

DATABRICKS_ENABLED=False
Expand Down
21 changes: 11 additions & 10 deletions .github/workflows/annotation.yml
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
name: annotation precommit and test
run-name: annotation precommit and test
name: annotation
run-name: annotation
on:
push:
paths:
- 'annotation/**'
- "annotation/**"
pull_request:
paths:
- 'annotation/**'
- "annotation/**"
jobs:
annotation-pre-commit-actions:
linters:
strategy:
matrix:
python-version: [ "3.8.15" ]
python-version: ["3.8.15"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache: "pip"
- run: pip install flake8
- run: flake8 --extend-ignore=E203 annotation
annotation-build:
tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.8.15" ]
python-version: ["3.8.15"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache: "pip"
- name: Install dependencies and run tests
run: |
python -m pip install --upgrade pip
Expand All @@ -41,6 +41,7 @@ jobs:
poetry install --no-root
poetry add ../lib/filter_lib
poetry add ../lib/tenants
poetry add ../lib/badgerdoc_storage
poetry run pytest
env:
POSTGRES_HOST: 127.0.0.1
Expand Down
25 changes: 13 additions & 12 deletions .github/workflows/assets.yml
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
name: assets precommit and test
run-name: assets precommit and test
name: assets
run-name: assets
on:
push:
paths:
- 'assets/**'
- '.github/workflows/assets.yml'
- "assets/**"
- ".github/workflows/assets.yml"
pull_request:
paths:
- 'assets/**'
- '.github/workflows/assets.yml'
- "assets/**"
- ".github/workflows/assets.yml"
jobs:
assets-pre-commit-actions:
linters:
strategy:
matrix:
python-version: [ "3.8.15" ]
python-version: ["3.8.15"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache: "pip"
- run: pip install flake8
- run: flake8 --extend-ignore=E203 assets
assets-build:
tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.8.15" ]
python-version: ["3.8.15"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache: "pip"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand All @@ -44,6 +44,7 @@ jobs:
poetry install --no-root --no-interaction
poetry add ../lib/filter_lib
poetry add ../lib/tenants
poetry add ../lib/badgerdoc_storage
- name: Test with pytest
run: |
cd assets
Expand Down
12 changes: 0 additions & 12 deletions .github/workflows/badgerdoc-build.yml

This file was deleted.

32 changes: 32 additions & 0 deletions .github/workflows/badgerdoc-storage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: badgerdoc_storage
run-name: badgerdoc_storage
on:
push:
paths:
- lib/badgerdoc_storage/**
- .github/worlflows/badgerdoc_storage.yml
pull_request:
paths:
- lib/badgerdoc_storage/**
- .github/worlflows/badgerdoc_storage.yml
jobs:
linters:
strategy:
matrix:
python-version: ["3.8.15", "3.12"]
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./lib/badgerdoc_storage
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- run: pip install poetry==1.7.1
- run: poetry install
- run: poetry run pylint src/
- run: poetry run mypy src/
- run: poetry run black --check src/
- run: poetry run isort --check src/
53 changes: 53 additions & 0 deletions .github/workflows/badgerdoc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: badgerdoc

on:
pull_request:
branches:
- "**"
push:
branches:
- "**"
pull_request_target:
types:
- opened
- edited
- synchronize

env:
PYTHON_VERSION: 3.8

jobs:
linters:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install python dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
poetry install --only dev
- name: Run Isort
run: |
poetry run isort --diff --check-only .
- name: Run Black
run: |
poetry run black --config pyproject.toml --diff --check .
build:
if: github.event_name == 'pull_request' && github.base_ref == 'main'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build services
run: make build_badgerdoc
semantic_pr:
if: github.event_name == 'pull_request_target'
runs-on: ubuntu-latest
steps:
- uses: amannn/action-semantic-pull-request@v5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
37 changes: 0 additions & 37 deletions .github/workflows/convert.yml

This file was deleted.

Loading