Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
9ac2455
unified repo interface "remove_if_exists"
steve-space Nov 20, 2025
2826e3c
updated pytest, added factory-boy dependency
steve-space Nov 20, 2025
01bddf7
added filesystem root variable for testing
steve-space Nov 20, 2025
bf5a924
added variable for elasticsearch index name (for testing purposes)
steve-space Nov 20, 2025
f5b71d2
removed weaviate profile
steve-space Nov 20, 2025
f9a1980
added weaviate postfix (for testing purposes)
steve-space Nov 20, 2025
2b1d0e6
first working conftest
steve-space Nov 26, 2025
25a725d
added new project service, removed events
steve-space Nov 26, 2025
9be5b18
updated weaviate client
steve-space Nov 26, 2025
cdc28b9
added first working versions of user and project factories
steve-space Nov 26, 2025
6a8c561
added factories
steve-space Nov 26, 2025
a32640f
added basic project endpoint tests
steve-space Nov 26, 2025
fe0ce03
added more factories
steve-space Nov 27, 2025
d0a9dad
added missing exception handlers
steve-space Dec 4, 2025
68085e4
added core endpoint tests
steve-space Dec 16, 2025
b302833
added redis repo, updated conftest, configured redis db idx for testi…
steve-space Dec 16, 2025
6ac4764
updated worker script to correctly handle sigterm, sigint events and …
steve-space Dec 16, 2025
191346a
added test user endpoint
steve-space Jan 14, 2026
e1ff3c0
added tag endpoint tests
steve-space Jan 14, 2026
20827e1
improved update tests with parametrize
steve-space Jan 27, 2026
7b39dec
moved old tests to test_old
steve-space Mar 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ UUID_NAMESPACE=
# Where to store uploaded files.
# <path_to_dats_repo>/docker/backend_repo
SHARED_FILESYSTEM_ROOT=/insert_path_to_dats_repo/docker/backend_repo
SHARED_FILESYSTEM_ROOT_TEST=/insert_path_to_dats_repo/backend/test/test_repo

# The system user is automatically created and owns automatically generated data.
SYSTEM_USER_EMAIL="SYSTEM@dats.org"
Expand Down Expand Up @@ -62,6 +63,7 @@ HF_HUB_TOKEN=replace-me-with-your-own-token
CONTENT_SERVER_PORT=13121

ES_PORT=13125
ES_INDEX_PREFIX=dats

POSTGRES_PORT=13122
POSTGRES_DB=dats
Expand All @@ -72,6 +74,7 @@ REDIS_PORT=13124
REDIS_PASSWORD=dats123

WEAVIATE_PORT=13132
WEAVIATE_COLLECTION_POSTFIX=""
WEAVIATE_GRPC_PORT=13134

RAY_HOST=localhost
Expand Down
1 change: 1 addition & 0 deletions backend/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__/
.venv/

.env
test/test_repo
4 changes: 3 additions & 1 deletion backend/configs/development.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ filesystem:
weaviate:
host: localhost
port: ${oc.env:WEAVIATE_PORT, 13132}
collection_postfix: ${oc.env:WEAVIATE_COLLECTION_POSTFIX,""}
grpc_port: ${oc.env:WEAVIATE_GRPC_PORT, 13134}

postgres:
Expand Down Expand Up @@ -83,7 +84,7 @@ redis:
host: localhost
port: ${oc.env:REDIS_PORT, 13124}
password: ${oc.env:REDIS_PASSWORD, dats123}
rq_idx: 10
rq_idx: ${oc.env:REDIS_INDEX, 10}

logging:
max_file_size: 500 # MB
Expand All @@ -92,6 +93,7 @@ logging:
elasticsearch:
host: 127.0.0.1
port: ${oc.env:ES_PORT, 13125}
index_prefix: ${oc.env:ES_INDEX_PREFIX, dats}
use_ssl: False
verify_certs: False
sniff_on_start: False
Expand Down
4 changes: 3 additions & 1 deletion backend/configs/production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ filesystem:
weaviate:
host: weaviate
port: 8080
collection_postfix: ${oc.env:WEAVIATE_COLLECTION_POSTFIX,""}
grpc_port: 50051

postgres:
Expand Down Expand Up @@ -83,7 +84,7 @@ redis:
host: redis
port: 6379
password: ${oc.env:REDIS_PASSWORD, dats123}
rq_idx: 10
rq_idx: ${oc.env:REDIS_INDEX, 10}

logging:
max_file_size: 500 # MB
Expand All @@ -92,6 +93,7 @@ logging:
elasticsearch:
host: elasticsearch
port: 9200
index_prefix: ${oc.env:ES_INDEX_PREFIX, dats}
use_ssl: False
verify_certs: False
sniff_on_start: False
Expand Down
5 changes: 3 additions & 2 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ dependencies = [
"elasticsearch==7.17",
"email-validator==2.2.0",
"evaluate==0.4.5",
"factory-boy==3.3.3",
"fastapi==0.115.12",
"fastapi-mail==1.4.1",
"ffmpeg-python==0.2.0",
Expand Down Expand Up @@ -57,7 +58,7 @@ dependencies = [
"pydantic-core==2.27.2",
"pymupdf==1.23.4",
"pyright==1.1.385",
"pytest==7.4.3",
"pytest==9.0.1",
"pytest-order==1.2.1",
"python-jose==3.3",
"python-magic==0.4.27",
Expand Down Expand Up @@ -88,7 +89,7 @@ dependencies = [
"ujson>=5.10.0",
"umap-learn==0.5.5",
"uvicorn==0.23.2",
"weaviate-client==4.14.4",
"weaviate-client==4.18.1",
"webdriver-manager==4.0.1",
"yake==0.4.8",
]
Expand Down
12 changes: 1 addition & 11 deletions backend/src/core/doc/document_embedding_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from core.doc.document_collection import DocumentCollection
from core.doc.document_embedding_dto import DocumentObjectIdentifier
from repos.vector.embedding_crud_base import CRUDBase
from systems.event_system.events import project_deleted, source_document_deleted
from systems.event_system.events import source_document_deleted


class CRUDDocumentEmbedding(CRUDBase[DocumentObjectIdentifier, DocumentCollection]):
Expand Down Expand Up @@ -82,13 +82,3 @@ def handle_source_document_deleted(sender, sdoc_id: int, project_id: int):
crud_document_embedding.remove_by_sdoc_id(
client=client, project_id=project_id, sdoc_id=sdoc_id
)


@project_deleted.connect
def handle_project_deleted(sender, project_id: int):
from repos.vector.weaviate_repo import WeaviateRepo

with WeaviateRepo().weaviate_session() as client:
crud_document_embedding.remove_embeddings_by_project(
client=client, project_id=project_id
)
12 changes: 1 addition & 11 deletions backend/src/core/doc/image_embedding_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from core.doc.image_collection import ImageCollection
from core.doc.image_embedding_dto import ImageObjectIdentifier
from repos.vector.embedding_crud_base import CRUDBase
from systems.event_system.events import project_deleted, source_document_deleted
from systems.event_system.events import source_document_deleted


class CRUDImageEmbedding(CRUDBase[ImageObjectIdentifier, ImageCollection]):
Expand Down Expand Up @@ -82,13 +82,3 @@ def handle_source_document_deleted(sender, sdoc_id: int, project_id: int):
crud_image_embedding.remove_by_sdoc_id(
client=client, project_id=project_id, sdoc_id=sdoc_id
)


@project_deleted.connect
def handle_project_deleted(sender, project_id: int):
from repos.vector.weaviate_repo import WeaviateRepo

with WeaviateRepo().weaviate_session() as client:
crud_image_embedding.remove_embeddings_by_project(
client=client, project_id=project_id
)
21 changes: 0 additions & 21 deletions backend/src/core/doc/sdoc_elastic_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from repos.elastic.elastic_crud_base import ElasticCrudBase
from repos.elastic.elastic_dto_base import PaginatedElasticSearchHits
from systems.event_system.events import (
project_created,
project_deleted,
source_document_deleted,
)

Expand Down Expand Up @@ -79,22 +77,3 @@ def handle_source_document_deleted(sender, sdoc_id: int, project_id: int):
id=sdoc_id,
proj_id=project_id,
)


@project_created.connect
def handle_project_created(sender, project_id: int):
from repos.elastic.elastic_repo import ElasticSearchRepo

crud_elastic_sdoc.index.create_index(
client=ElasticSearchRepo().client, proj_id=project_id
)


@project_deleted.connect
def handle_project_deleted(sender, project_id: int):
from repos.elastic.elastic_repo import ElasticSearchRepo

crud_elastic_sdoc.index.delete_index(
client=ElasticSearchRepo().client,
proj_id=project_id,
)
12 changes: 1 addition & 11 deletions backend/src/core/doc/sentence_embedding_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from core.doc.sentence_embedding_dto import SentenceObjectIdentifier
from repos.vector.embedding_crud_base import CRUDBase
from repos.vector.weaviate_models import EmbeddingSearchResult
from systems.event_system.events import project_deleted, source_document_deleted
from systems.event_system.events import source_document_deleted


class CRUDSentenceEmbedding(CRUDBase[SentenceObjectIdentifier, SentenceCollection]):
Expand Down Expand Up @@ -129,13 +129,3 @@ def handle_source_document_deleted(sender, sdoc_id: int, project_id: int):
crud_sentence_embedding.remove_by_sdoc_id(
client=client, project_id=project_id, sdoc_id=sdoc_id
)


@project_deleted.connect
def handle_project_deleted(sender, project_id: int):
from repos.vector.weaviate_repo import WeaviateRepo

with WeaviateRepo().weaviate_session() as client:
crud_sentence_embedding.remove_embeddings_by_project(
client=client, project_id=project_id
)
24 changes: 0 additions & 24 deletions backend/src/core/memo/memo_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from core.tag.tag_orm import TagORM
from repos.db.crud_base import CRUDBase
from repos.elastic.elastic_repo import ElasticSearchRepo
from systems.event_system.events import user_added_to_project


class CRUDMemo(CRUDBase[MemoORM, MemoCreateIntern, MemoUpdate]):
Expand Down Expand Up @@ -290,26 +289,3 @@ def get_memo_read_dto_from_orm(db: Session, db_obj: MemoORM) -> MemoRead:


crud_memo = CRUDMemo(MemoORM)


@user_added_to_project.connect
def user_added_to_project_handler(sender, project_id: int, user_id: int):
from uuid import uuid4

from repos.db.sql_repo import SQLRepo

with SQLRepo().db_session() as db:
crud_memo.create_for_attached_object(
db=db,
attached_object_id=project_id,
attached_object_type=AttachedObjectType.project,
create_dto=MemoCreateIntern(
uuid=str(uuid4()),
title="Project Memo",
content="",
content_json="",
starred=False,
user_id=user_id,
project_id=project_id,
),
)
21 changes: 0 additions & 21 deletions backend/src/core/memo/memo_elastic_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from repos.elastic.elastic_crud_base import ElasticCrudBase
from repos.elastic.elastic_dto_base import PaginatedElasticSearchHits
from systems.event_system.events import (
project_created,
project_deleted,
source_document_deleted,
)

Expand Down Expand Up @@ -85,22 +83,3 @@ def search_memos_by_content_query(
def handle_source_document_deleted(sender, sdoc_id: int, project_id: int):
# TODO: Implement memo deletion logic
print("TODO! Handle source document deleted for memos")


@project_created.connect
def handle_project_created(sender, project_id: int):
from repos.elastic.elastic_repo import ElasticSearchRepo

crud_elastic_memo.index.create_index(
client=ElasticSearchRepo().client, proj_id=project_id
)


@project_deleted.connect
def handle_project_deleted(sender, project_id: int):
from repos.elastic.elastic_repo import ElasticSearchRepo

crud_elastic_memo.index.delete_index(
client=ElasticSearchRepo().client,
proj_id=project_id,
)
6 changes: 4 additions & 2 deletions backend/src/core/memo/memo_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,11 @@ def delete_by_id(
) -> MemoRead:
authz_user.assert_in_same_project_as(Crud.MEMO, memo_id)

memo = crud_memo.delete(db=db, id=memo_id)
memo = crud_memo.read(db, memo_id)
memo_dto = crud_memo.get_memo_read_dto_from_orm(db, memo)
crud_memo.delete(db=db, id=memo_id)

return crud_memo.get_memo_read_dto_from_orm(db, memo)
return memo_dto


@router.get(
Expand Down
76 changes: 2 additions & 74 deletions backend/src/core/project/project_crud.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,15 @@
from fastapi.encoders import jsonable_encoder
from sqlalchemy.orm import Session

from core.code.code_crud import crud_code
from core.metadata.project_metadata_crud import crud_project_meta
from core.project.project_dto import ProjectCreate, ProjectUpdate
from core.project.project_orm import ProjectORM
from core.user.user_crud import (
ASSISTANT_FEWSHOT_ID,
ASSISTANT_TRAINED_ID,
ASSISTANT_ZEROSHOT_ID,
SYSTEM_USER_ID,
crud_user,
)
from core.user.user_orm import UserORM
from repos.db.crud_base import CRUDBase
from repos.filesystem_repo import FilesystemRepo
from systems.event_system.events import (
project_created,
project_deleted,
user_added_to_project,
)


class CRUDProject(CRUDBase[ProjectORM, ProjectCreate, ProjectUpdate]):
### CREATE OPERATIONS ###

def create(
self, db: Session, *, create_dto: ProjectCreate, creating_user_id: int
) -> ProjectORM:
# 1) create the project
dto_obj_data = jsonable_encoder(create_dto)
# noinspection PyArgumentList
db_obj = self.model(**dto_obj_data)
db.add(db_obj)
db.commit()
db.refresh(db_obj)
project_id = db_obj.id

# 2) associate the system users
self.associate_user(db=db, proj_id=project_id, user_id=SYSTEM_USER_ID)
self.associate_user(db=db, proj_id=project_id, user_id=ASSISTANT_ZEROSHOT_ID)
self.associate_user(db=db, proj_id=project_id, user_id=ASSISTANT_FEWSHOT_ID)
self.associate_user(db=db, proj_id=project_id, user_id=ASSISTANT_TRAINED_ID)

# 3) associate the user that created the project
if creating_user_id not in [
SYSTEM_USER_ID,
ASSISTANT_ZEROSHOT_ID,
ASSISTANT_FEWSHOT_ID,
ASSISTANT_TRAINED_ID,
]:
self.associate_user(db=db, proj_id=project_id, user_id=creating_user_id)

# 4) create system codes
crud_code.create_system_codes_for_project(db=db, proj_id=project_id)

# 5) create project metadata
crud_project_meta.create_project_metadata_for_project(db=db, proj_id=project_id)

# 6) create filesystem directory structure
FilesystemRepo().create_directory_structure_for_project(proj_id=project_id)

# 7) emit project created event
project_created.send(self, project_id=project_id)

return db_obj

### DELETE OPERATIONS ###

def delete(self, db: Session, *, id: int) -> ProjectORM:
# 1) delete the project and all connected data via cascading delete
proj_db_obj = super().delete(db=db, id=id)

# 2) delete the files from filesystem
FilesystemRepo().purge_project_data(proj_id=id)

# 3) Emit project deleted event
project_deleted.send(self, project_id=id)

return proj_db_obj

### OTHER OPERATIONS ###

def associate_user(self, db: Session, *, proj_id: int, user_id: int) -> UserORM:
Expand All @@ -92,15 +22,13 @@ def associate_user(self, db: Session, *, proj_id: int, user_id: int) -> UserORM:
db.add(proj_db_obj)
db.commit()

# 3) emit user associated event
user_added_to_project.send(self, project_id=proj_id, user_id=user_id)

return user_db_obj

def dissociate_user(self, db: Session, *, proj_id: int, user_id: int) -> UserORM:
# 1) read project
proj_db_obj = self.read(db=db, id=proj_id)

# remove user from project
# 2) remove user from project
user_db_obj = crud_user.read(db=db, id=user_id)
proj_db_obj.users.remove(user_db_obj)
db.add(proj_db_obj)
Expand Down
Loading