From e7e8e87e07c0bd65885e8352f1b7b8e37bdbf51d Mon Sep 17 00:00:00 2001 From: theosanderson-agent Date: Thu, 26 Mar 2026 11:42:45 +0000 Subject: [PATCH 1/3] refactor(backend): move revocation versionComment from dedicated column to metadata Store revocation versionComment in originalData metadata instead of the dedicated version_comment column, unifying how versionComment is handled for both revisions and revocations. - When creating a revocation, store versionComment in originalData.metadata and compress using the compression service - Update the database view to automatically construct joint_metadata for revocations from originalData, making it pipeline-version-independent - Remove versionCommentColumn from SequenceEntriesTable and SequenceEntriesView - Remove versionComment field from RawProcessedData (now comes through processedData metadata via the view) - Remove conditional versionComment addition in ReleasedDataModel - Add migration V1.26 to move existing version_comment values into originalData metadata and drop the column Closes #3135 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/model/ReleasedDataModel.kt | 8 -- .../submission/SubmissionDatabaseService.kt | 34 ++++-- .../dbtables/SequenceEntriesTable.kt | 1 - .../dbtables/SequenceEntriesView.kt | 1 - ...1.26__move_version_comment_to_metadata.sql | 101 ++++++++++++++++++ .../utils/EarliestReleaseDateFinderTest.kt | 1 - 6 files changed, 125 insertions(+), 21 deletions(-) create mode 100644 backend/src/main/resources/db/migration/V1.26__move_version_comment_to_metadata.sql diff --git a/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt index 2bbd9ca26c..1d1d89a5ea 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt @@ -165,14 +165,6 @@ open class ReleasedDataModel( ) }, ) + - conditionalMetadata( - rawProcessedData.isRevocation, - { - mapOf( - "versionComment" to TextNode(rawProcessedData.versionComment), - ) - }, - ) + conditionalMetadata( earliestReleaseDate != null, { diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt index 462e7a5cba..b1d3a834c5 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt @@ -52,6 +52,7 @@ import org.loculus.backend.api.FileIdAndNameAndReadUrl import org.loculus.backend.api.GeneticSequence import org.loculus.backend.api.GetSequenceResponse import org.loculus.backend.api.Organism +import org.loculus.backend.api.OriginalData import org.loculus.backend.api.OriginalDataWithFileUrls import org.loculus.backend.api.PreprocessingStatus.IN_PROCESSING import org.loculus.backend.api.PreprocessingStatus.PROCESSED @@ -736,7 +737,6 @@ class SubmissionDatabaseService( SequenceEntriesView.accessionColumn, SequenceEntriesView.versionColumn, SequenceEntriesView.isRevocationColumn, - SequenceEntriesView.versionCommentColumn, SequenceEntriesView.jointDataColumn, SequenceEntriesView.submitterColumn, SequenceEntriesView.groupIdColumn, @@ -779,7 +779,6 @@ class SubmissionDatabaseService( DataUseTermsType.fromString(it[DataUseTermsTable.dataUseTermsTypeColumn]), it[DataUseTermsTable.restrictedUntilColumn], ), - versionComment = it[SequenceEntriesView.versionCommentColumn], dataUseTermsChangeDate = it[DataUseTermsTable.changeDateColumn], ) } @@ -955,16 +954,14 @@ class SubmissionDatabaseService( SequenceEntriesTable.insert( SequenceEntriesTable.select( - SequenceEntriesTable.accessionColumn, SequenceEntriesTable.versionColumn.plus(1), - when (versionComment) { - null -> Op.nullOp() - else -> stringParam(versionComment) - }, + SequenceEntriesTable.accessionColumn, + SequenceEntriesTable.versionColumn.plus(1), SequenceEntriesTable.submissionIdColumn, stringParam(authenticatedUser.username), SequenceEntriesTable.groupIdColumn, dateTimeParam(dateProvider.getCurrentDateTime()), - booleanParam(true), SequenceEntriesTable.organismColumn, + booleanParam(true), + SequenceEntriesTable.organismColumn, ).where { ( SequenceEntriesTable.accessionColumn inList @@ -975,7 +972,6 @@ class SubmissionDatabaseService( columns = listOf( SequenceEntriesTable.accessionColumn, SequenceEntriesTable.versionColumn, - SequenceEntriesTable.versionCommentColumn, SequenceEntriesTable.submissionIdColumn, SequenceEntriesTable.submitterColumn, SequenceEntriesTable.groupIdColumn, @@ -985,6 +981,25 @@ class SubmissionDatabaseService( ), ) + if (versionComment != null) { + val originalData = compressionService.compressSequencesInOriginalData( + OriginalData( + metadata = mapOf("versionComment" to versionComment), + unalignedNucleotideSequences = emptyMap(), + ), + organism, + ) + SequenceEntriesTable.update( + where = { + (SequenceEntriesTable.accessionColumn inList accessions) and + SequenceEntriesTable.isMaxVersion and + (SequenceEntriesTable.isRevocationColumn eq true) + }, + ) { + it[originalDataColumn] = originalData + } + } + auditLogger.log( authenticatedUser.username, "Revoked ${accessions.size} sequences: " + @@ -1525,7 +1540,6 @@ data class RawProcessedData( override val accession: Accession, override val version: Version, val isRevocation: Boolean, - val versionComment: String?, val submitter: String, val groupId: Int, val groupName: String, diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/dbtables/SequenceEntriesTable.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/dbtables/SequenceEntriesTable.kt index 494be8492f..5d787bd0ed 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/dbtables/SequenceEntriesTable.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/dbtables/SequenceEntriesTable.kt @@ -21,7 +21,6 @@ object SequenceEntriesTable : Table(SEQUENCE_ENTRIES_TABLE_NAME) { val accessionColumn = varchar("accession", 255) val versionColumn = long("version") - val versionCommentColumn = varchar("version_comment", 255).nullable() val organismColumn = varchar("organism", 255) val submissionIdColumn = varchar("submission_id", 255) val submitterColumn = varchar("submitter", 255) diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/dbtables/SequenceEntriesView.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/dbtables/SequenceEntriesView.kt index 3d53b88486..b88d1bf1cb 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/dbtables/SequenceEntriesView.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/dbtables/SequenceEntriesView.kt @@ -43,7 +43,6 @@ object SequenceEntriesView : Table(SEQUENCE_ENTRIES_VIEW_NAME) { val statusColumn = varchar("status", 255) val processingResultColumn = varchar("processing_result", 255).nullable() val isRevocationColumn = bool("is_revocation").default(false) - val versionCommentColumn = varchar("version_comment", 255).nullable() val errorsColumn = jacksonSerializableJsonb>("errors").nullable() val warningsColumn = jacksonSerializableJsonb>("warnings").nullable() val pipelineVersionColumn = long("pipeline_version").nullable() diff --git a/backend/src/main/resources/db/migration/V1.26__move_version_comment_to_metadata.sql b/backend/src/main/resources/db/migration/V1.26__move_version_comment_to_metadata.sql new file mode 100644 index 0000000000..a202d0b1a2 --- /dev/null +++ b/backend/src/main/resources/db/migration/V1.26__move_version_comment_to_metadata.sql @@ -0,0 +1,101 @@ +-- Moves revocation versionComment from the dedicated version_comment column +-- into the original_data JSONB metadata, and updates the view to automatically +-- construct joint_metadata for revocations from original_data. +-- This unifies how versionComment is stored for both revisions and revocations. + +-- Step 1: For existing revocations with non-null version_comment and no original_data, +-- create original_data with versionComment in metadata +UPDATE sequence_entries +SET original_data = jsonb_build_object( + 'metadata', jsonb_build_object('versionComment', version_comment), + 'unalignedNucleotideSequences', '{}'::jsonb +) +WHERE is_revocation = true AND version_comment IS NOT NULL AND original_data IS NULL; + +-- For revocations that already have original_data (unlikely but safe), +-- merge versionComment into existing metadata +UPDATE sequence_entries +SET original_data = jsonb_set( + original_data, + '{metadata}', + COALESCE(original_data -> 'metadata', '{}'::jsonb) || jsonb_build_object('versionComment', version_comment) +) +WHERE is_revocation = true AND version_comment IS NOT NULL AND original_data IS NOT NULL; + +-- Step 2: Drop the view (must be done before dropping the column) +DROP VIEW IF EXISTS sequence_entries_view; + +-- Step 3: Drop the version_comment column +ALTER TABLE sequence_entries DROP COLUMN version_comment; + +-- Step 4: Recreate the view without version_comment. +-- For revocations, joint_metadata is constructed from original_data +-- so versionComment survives pipeline version changes. +CREATE VIEW sequence_entries_view AS +SELECT + se.accession, + se.version, + se.organism, + se.submission_id, + se.submitter, + se.approver, + se.group_id, + se.submitted_at, + se.released_at, + se.is_revocation, + se.original_data, + sepd.started_processing_at, + sepd.finished_processing_at, + sepd.processed_data, + CASE + WHEN se.is_revocation AND se.original_data IS NOT NULL THEN + jsonb_build_object( + 'metadata', COALESCE(se.original_data -> 'metadata', '{}'::jsonb), + 'unalignedNucleotideSequences', '{}'::jsonb, + 'alignedNucleotideSequences', '{}'::jsonb, + 'nucleotideInsertions', '{}'::jsonb, + 'alignedAminoAcidSequences', '{}'::jsonb, + 'aminoAcidInsertions', '{}'::jsonb, + 'files', 'null'::jsonb + ) + WHEN se.is_revocation THEN NULL + WHEN aem.external_metadata IS NULL THEN sepd.processed_data + ELSE sepd.processed_data || + jsonb_build_object('metadata', (sepd.processed_data -> 'metadata') || aem.external_metadata) + END AS joint_metadata, + CASE + WHEN se.is_revocation THEN cpp.version + ELSE sepd.pipeline_version + END AS pipeline_version, + sepd.errors, + sepd.warnings, + CASE + WHEN se.released_at IS NOT NULL THEN 'APPROVED_FOR_RELEASE' + WHEN se.is_revocation THEN 'PROCESSED' + WHEN sepd.processing_status = 'IN_PROCESSING' THEN 'IN_PROCESSING' + WHEN sepd.processing_status = 'PROCESSED' THEN 'PROCESSED' + ELSE 'RECEIVED' + END AS status, + CASE + WHEN sepd.processing_status = 'IN_PROCESSING' THEN NULL + WHEN sepd.errors IS NOT NULL AND jsonb_array_length(sepd.errors) > 0 THEN 'HAS_ERRORS' + WHEN sepd.warnings IS NOT NULL AND jsonb_array_length(sepd.warnings) > 0 THEN 'HAS_WARNINGS' + ELSE 'NO_ISSUES' + END AS processing_result +FROM sequence_entries se +LEFT JOIN current_processing_pipeline cpp + ON se.organism = cpp.organism +LEFT JOIN sequence_entries_preprocessed_data sepd + ON se.accession = sepd.accession + AND se.version = sepd.version + AND sepd.pipeline_version = cpp.version +LEFT JOIN ( + SELECT + em.accession, + em.version, + jsonb_merge_agg(em.external_metadata) AS external_metadata + FROM external_metadata em + GROUP BY em.accession, em.version +) aem + ON aem.accession = se.accession + AND aem.version = se.version; diff --git a/backend/src/test/kotlin/org/loculus/backend/utils/EarliestReleaseDateFinderTest.kt b/backend/src/test/kotlin/org/loculus/backend/utils/EarliestReleaseDateFinderTest.kt index 5c623a693f..06a88dbe16 100644 --- a/backend/src/test/kotlin/org/loculus/backend/utils/EarliestReleaseDateFinderTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/utils/EarliestReleaseDateFinderTest.kt @@ -66,7 +66,6 @@ fun row( files = null, ), isRevocation = false, - versionComment = null, submitter = "foo", submissionId = "foo", submittedAtTimestamp = releasedAt, From 97b55366725b97e22640538a31c9d91effc442b6 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Thu, 26 Mar 2026 12:30:37 +0000 Subject: [PATCH 2/3] Update schema documentation based on migration changes --- backend/docs/db/schema.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/docs/db/schema.sql b/backend/docs/db/schema.sql index 1303346292..78b79a679e 100644 --- a/backend/docs/db/schema.sql +++ b/backend/docs/db/schema.sql @@ -4,8 +4,8 @@ \restrict dummy --- Dumped from database version 15.15 (Debian 15.15-1.pgdg13+1) --- Dumped by pg_dump version 16.11 (Debian 16.11-1.pgdg13+1) +-- Dumped from database version 15.17 (Debian 15.17-1.pgdg13+1) +-- Dumped by pg_dump version 16.13 (Debian 16.13-1.pgdg13+1) SET statement_timeout = 0; SET lock_timeout = 0; @@ -426,8 +426,7 @@ CREATE TABLE public.sequence_entries ( submitted_at timestamp without time zone NOT NULL, released_at timestamp without time zone, is_revocation boolean DEFAULT false NOT NULL, - original_data jsonb, - version_comment text + original_data jsonb ); @@ -468,11 +467,12 @@ CREATE VIEW public.sequence_entries_view AS se.released_at, se.is_revocation, se.original_data, - se.version_comment, sepd.started_processing_at, sepd.finished_processing_at, sepd.processed_data, CASE + WHEN (se.is_revocation AND (se.original_data IS NOT NULL)) THEN jsonb_build_object('metadata', COALESCE((se.original_data -> 'metadata'::text), '{}'::jsonb), 'unalignedNucleotideSequences', '{}'::jsonb, 'alignedNucleotideSequences', '{}'::jsonb, 'nucleotideInsertions', '{}'::jsonb, 'alignedAminoAcidSequences', '{}'::jsonb, 'aminoAcidInsertions', '{}'::jsonb, 'files', 'null'::jsonb) + WHEN se.is_revocation THEN NULL::jsonb WHEN (aem.external_metadata IS NULL) THEN sepd.processed_data ELSE (sepd.processed_data || jsonb_build_object('metadata', ((sepd.processed_data -> 'metadata'::text) || aem.external_metadata))) END AS joint_metadata, From 30a12fcd2a87a39afa938039ed2677e9b142a964 Mon Sep 17 00:00:00 2001 From: anna-parker <50943381+anna-parker@users.noreply.github.com> Date: Thu, 26 Mar 2026 13:41:11 +0100 Subject: [PATCH 3/3] Trigger workflow / empty commit