diff --git a/integration-tests/tests/pages/submission.page.ts b/integration-tests/tests/pages/submission.page.ts index c1b7a9d6e0..7bfdc58e93 100644 --- a/integration-tests/tests/pages/submission.page.ts +++ b/integration-tests/tests/pages/submission.page.ts @@ -96,7 +96,7 @@ export class SingleSequenceSubmissionPage extends SubmissionPage { await this.page.getByLabel('Author affiliations').fill(authorAffiliations); } - async fillSubmissionFormDummyOrganism({ + async fillSubmissionFormFilesOrganism({ submissionId, country, date, diff --git a/integration-tests/tests/specs/features/file-sharing.spec.ts b/integration-tests/tests/specs/features/file-sharing.spec.ts index 51364e6dea..95dfc55a3a 100644 --- a/integration-tests/tests/specs/features/file-sharing.spec.ts +++ b/integration-tests/tests/specs/features/file-sharing.spec.ts @@ -7,7 +7,7 @@ import { SearchPage } from '../../pages/search.page'; import { BulkSubmissionPage, SingleSequenceSubmissionPage } from '../../pages/submission.page'; const ORGANISM_NAME = 'Test organism (with files)'; -const ORGANISM_URL_NAME = 'dummy-organism-with-files'; +const ORGANISM_URL_NAME = 'test-organism-files'; const RAW_READS = 'raw_reads'; const METADATA_HEADERS = ['submissionId', 'country', 'date']; const COUNTRY_1 = 'Norway'; @@ -26,7 +26,7 @@ test('submit single seq w/ 2 files thru single seq submission form', async ({ void groupId; const submissionPage = new SingleSequenceSubmissionPage(page); await submissionPage.navigateToSubmissionPage(ORGANISM_NAME); - await submissionPage.fillSubmissionFormDummyOrganism({ + await submissionPage.fillSubmissionFormFilesOrganism({ submissionId: ID_1, country: COUNTRY_1, date: '2023-10-15', @@ -175,7 +175,7 @@ test('single revise seq with files via edit page', async ({ page, groupId, tmpDi // Step 1: Submit and release a sequence const submissionPage = new SingleSequenceSubmissionPage(page); await submissionPage.navigateToSubmissionPage(ORGANISM_NAME); - await submissionPage.fillSubmissionFormDummyOrganism({ + await submissionPage.fillSubmissionFormFilesOrganism({ submissionId: 'single-rev', country: COUNTRY_1, date: '2023-01-01', diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml index e931f6054c..8ab4ff6f49 100644 --- a/kubernetes/loculus/values.yaml +++ b/kubernetes/loculus/values.yaml @@ -1645,7 +1645,7 @@ defaultOrganisms: sequence: "[[URL:https://corneliusroemer.github.io/seqs/artefacts/sars-cov-2/ORF9b.fasta]]" - name: S sequence: "[[URL:https://corneliusroemer.github.io/seqs/artefacts/sars-cov-2/S.fasta]]" - dummy-organism-with-files: + test-organism-files: schema: image: "https://cdn.who.int/media/images/default-source/mca/mca-covid-19/coronavirus-2.tmb-1920v.jpg?sfvrsn=4dba955c_19" organismName: "Test organism (with files)" @@ -1657,21 +1657,41 @@ defaultOrganisms: - name: raw_reads files: - name: raw_reads - metadata: *dummyMetadata + metadata: + - name: date + type: date + initiallyVisible: true + header: "Collection Details" + - name: country + initiallyVisible: true + type: string + generateIndex: true + autocomplete: true + header: "Collection Details" website: tableColumns: - country - - division - date defaultOrder: descending defaultOrderBy: date preprocessing: - version: 1 - image: ghcr.io/loculus-project/preprocessing-dummy + image: ghcr.io/loculus-project/preprocessing-nextclade args: - - "--watch" - - "--disableConsensusSequences" - referenceGenomes: [] + - "prepro" + configFile: + log_level: DEBUG + batch_size: 100 + segments: + - name: main + references: + - name: singleReference + genes: [] + referenceGenomes: + - name: main + references: + - name: singleReference + sequence: "[[URL:https://corneliusroemer.github.io/seqs/artefacts/sars-cov-2/reference.fasta]]" not-aligned-organism: enabled: true schema: diff --git a/preprocessing/nextclade/src/loculus_preprocessing/backend.py b/preprocessing/nextclade/src/loculus_preprocessing/backend.py index cdc6fe3095..711af286a9 100644 --- a/preprocessing/nextclade/src/loculus_preprocessing/backend.py +++ b/preprocessing/nextclade/src/loculus_preprocessing/backend.py @@ -101,6 +101,7 @@ def parse_ndjson(ndjson_data: str) -> Sequence[UnprocessedEntry]: unalignedNucleotideSequences=trimmed_unaligned_nucleotide_sequences if unaligned_nucleotide_sequences else {}, + files=json_object["data"].get("files"), ) entry = UnprocessedEntry( accessionVersion=f"{json_object['accession']}.{json_object['version']}", diff --git a/preprocessing/nextclade/src/loculus_preprocessing/datatypes.py b/preprocessing/nextclade/src/loculus_preprocessing/datatypes.py index 692aac2304..1da3175a85 100644 --- a/preprocessing/nextclade/src/loculus_preprocessing/datatypes.py +++ b/preprocessing/nextclade/src/loculus_preprocessing/datatypes.py @@ -79,6 +79,7 @@ class UnprocessedData: submittedAt: str # timestamp # noqa: N815 metadata: InputMetadata unalignedNucleotideSequences: dict[SequenceName, NucleotideSequence | None] # noqa: N815 + files: dict[str, list[dict[str, str]]] | None = None # External files attached to submission @dataclass diff --git a/preprocessing/nextclade/src/loculus_preprocessing/prepro.py b/preprocessing/nextclade/src/loculus_preprocessing/prepro.py index 8ccfce847c..ef4f2eae0d 100644 --- a/preprocessing/nextclade/src/loculus_preprocessing/prepro.py +++ b/preprocessing/nextclade/src/loculus_preprocessing/prepro.py @@ -268,6 +268,14 @@ def processed_entry_no_alignment( # noqa: PLR0913, PLR0917 nucleotide_insertions: dict[SequenceName, list[NucleotideInsertion]] = {} amino_acid_insertions: dict[GeneName, list[AminoAcidInsertion]] = {} + # Convert files format from backend (list of dicts) to FileIdAndName objects + files_output = None + if unprocessed.files: + files_output = { + category: [FileIdAndName(fileId=f["fileId"], name=f["name"]) for f in file_list] + for category, file_list in unprocessed.files.items() + } + return SubmissionData( processed_entry=ProcessedEntry( accession=accession_from_str(accession_version), @@ -280,6 +288,7 @@ def processed_entry_no_alignment( # noqa: PLR0913, PLR0917 alignedAminoAcidSequences=aligned_aminoacid_sequences, aminoAcidInsertions=amino_acid_insertions, sequenceNameToFastaId=sequenceNameToFastaId, + files=files_output, ), errors=errors, warnings=warnings,