From 1f3106342777c983aa5f5d9f35dee9432e2e339c Mon Sep 17 00:00:00 2001 From: James R Kane Date: Fri, 5 Dec 2025 06:53:38 -0600 Subject: [PATCH 01/31] Adding initial PDS registration work --- app/api/PDSRegistrationEndpoints.scala | 33 ++++++ .../PDSRegistrationController.scala | 41 +++++++ app/models/PDSRegistration.scala | 19 ++++ app/models/dal/MetadataSchema.scala | 27 +++++ app/modules/PDSRegistrationModule.scala | 13 +++ .../PDSRegistrationRepository.scala | 44 +++++++ app/services/ATProtocolClient.scala | 92 +++++++++++++++ app/services/PDSRegistrationService.scala | 107 ++++++++++++++++++ conf/application.conf | 19 +++- conf/evolutions/metadata/1.sql | 20 ++++ conf/routes | 2 + documents/register.mermaid | 38 +++++++ 12 files changed, 452 insertions(+), 3 deletions(-) create mode 100644 app/api/PDSRegistrationEndpoints.scala create mode 100644 app/controllers/PDSRegistrationController.scala create mode 100644 app/models/PDSRegistration.scala create mode 100644 app/models/dal/MetadataSchema.scala create mode 100644 app/modules/PDSRegistrationModule.scala create mode 100644 app/repositories/PDSRegistrationRepository.scala create mode 100644 app/services/ATProtocolClient.scala create mode 100644 app/services/PDSRegistrationService.scala create mode 100644 conf/evolutions/metadata/1.sql create mode 100755 documents/register.mermaid diff --git a/app/api/PDSRegistrationEndpoints.scala b/app/api/PDSRegistrationEndpoints.scala new file mode 100644 index 0000000..4dd2cc8 --- /dev/null +++ b/app/api/PDSRegistrationEndpoints.scala @@ -0,0 +1,33 @@ +package api + +import models.PDSRegistration +import play.api.libs.json.{Format, Json} +import sttp.tapir.* +import sttp.tapir.generic.auto.* +import sttp.tapir.json.play.* + +// --- DTOs (Data Transfer Objects) --- +case class PdsRegistrationRequest( + did: String, + handle: String, + pdsUrl: String, + rToken: String +) + +object PdsRegistrationRequest { + implicit val format: Format[PdsRegistrationRequest] = Json.format[PdsRegistrationRequest] +} + +object PDSRegistrationEndpoints { + + val registerPdsEndpoint: PublicEndpoint[PdsRegistrationRequest, String, PDSRegistration, Any] = + endpoint.post + .in("api" / "registerPDS") + .name("Register PDS") + .description("Registers a new PDS (Personal Data Server) with the system.") + .in(jsonBody[PdsRegistrationRequest]) + .out(jsonBody[PDSRegistration]) + .errorOut(stringBody) + + val all = List(registerPdsEndpoint) +} diff --git a/app/controllers/PDSRegistrationController.scala b/app/controllers/PDSRegistrationController.scala new file mode 100644 index 0000000..102f1ea --- /dev/null +++ b/app/controllers/PDSRegistrationController.scala @@ -0,0 +1,41 @@ +package controllers + +import api.PdsRegistrationRequest +import play.api.libs.json.{JsError, JsSuccess, Json} +import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} +import services.PDSRegistrationService + +import javax.inject.{Inject, Singleton} +import scala.concurrent.{ExecutionContext, Future} + +@Singleton +class PDSRegistrationController @Inject()( + val controllerComponents: ControllerComponents, + pdsRegistrationService: PDSRegistrationService +)(implicit ec: ExecutionContext) extends BaseController { + + /** + * Handles the registration of a new Personal Data Server (PDS). + * Expects a JSON body containing PdsRegistrationRequest. + * + * @return An `Action` that processes the registration request. + */ + def registerPDS(): Action[play.api.libs.json.JsValue] = Action.async(parse.json) { implicit request => + request.body.validate[PdsRegistrationRequest] match { + case JsSuccess(pdsRegistrationRequest, _) => + pdsRegistrationService.registerPDS( + pdsRegistrationRequest.did, + pdsRegistrationRequest.handle, + pdsRegistrationRequest.pdsUrl, + pdsRegistrationRequest.rToken + ).map { + case Right(pdsRegistration) => + Ok(Json.toJson(pdsRegistration)) + case Left(errorMessage) => + BadRequest(Json.obj("error" -> errorMessage)) + } + case JsError(errors) => + Future.successful(BadRequest(Json.obj("error" -> "Invalid JSON body", "details" -> JsError.toJson(errors)))) + } + } +} diff --git a/app/models/PDSRegistration.scala b/app/models/PDSRegistration.scala new file mode 100644 index 0000000..95fcc2d --- /dev/null +++ b/app/models/PDSRegistration.scala @@ -0,0 +1,19 @@ +package models + +import java.time.ZonedDateTime +import play.api.libs.json.{Format, Json} // Import Play-JSON classes + +case class PDSRegistration( + did: String, + pdsUrl: String, + handle: String, + lastCommitCid: Option[String], + lastCommitSeq: Option[Long], + cursor: Long = 0L, + createdAt: ZonedDateTime, + updatedAt: ZonedDateTime +) + +object PDSRegistration { + implicit val format: Format[PDSRegistration] = Json.format[PDSRegistration] +} diff --git a/app/models/dal/MetadataSchema.scala b/app/models/dal/MetadataSchema.scala new file mode 100644 index 0000000..477aad8 --- /dev/null +++ b/app/models/dal/MetadataSchema.scala @@ -0,0 +1,27 @@ +package models.dal + +import models.PDSRegistration +import models.dal.MyPostgresProfile.api.* +import slick.lifted.ProvenShape + +import java.time.ZonedDateTime + +object MetadataSchema { + + class PDSRegistrationsTable(tag: Tag) extends Table[PDSRegistration](tag, "pds_registrations") { + def did = column[String]("did", O.PrimaryKey) + def pdsUrl = column[String]("pds_url") + def handle = column[String]("handle") + def lastCommitCid = column[Option[String]]("last_commit_cid") + def lastCommitSeq = column[Option[Long]]("last_commit_seq") + def cursor = column[Long]("cursor") + def createdAt = column[ZonedDateTime]("created_at") + def updatedAt = column[ZonedDateTime]("updated_at") + + def * : ProvenShape[PDSRegistration] = ( + did, pdsUrl, handle, lastCommitCid, lastCommitSeq, cursor, createdAt, updatedAt + ) <> ((PDSRegistration.apply _).tupled, PDSRegistration.unapply) + } + + val pdsRegistrations = TableQuery[PDSRegistrationsTable] +} diff --git a/app/modules/PDSRegistrationModule.scala b/app/modules/PDSRegistrationModule.scala new file mode 100644 index 0000000..b7f23be --- /dev/null +++ b/app/modules/PDSRegistrationModule.scala @@ -0,0 +1,13 @@ +package modules + +import com.google.inject.AbstractModule +import repositories.PDSRegistrationRepository +import services.{ATProtocolClient, PDSRegistrationService} + +class PDSRegistrationModule extends AbstractModule { + override def configure(): Unit = { + bind(classOf[ATProtocolClient]).asEagerSingleton() + bind(classOf[PDSRegistrationRepository]).asEagerSingleton() + bind(classOf[PDSRegistrationService]).asEagerSingleton() + } +} diff --git a/app/repositories/PDSRegistrationRepository.scala b/app/repositories/PDSRegistrationRepository.scala new file mode 100644 index 0000000..e9922d8 --- /dev/null +++ b/app/repositories/PDSRegistrationRepository.scala @@ -0,0 +1,44 @@ +package repositories + +import models.PDSRegistration +import models.dal.MetadataSchema.pdsRegistrations +import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} +import slick.jdbc.JdbcProfile + +import java.time.ZonedDateTime +import javax.inject.{Inject, Singleton} +import scala.concurrent.{ExecutionContext, Future} + +@Singleton +class PDSRegistrationRepository @Inject()( + @javax.inject.Named("metadata") protected val dbConfigProvider: DatabaseConfigProvider +)(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[JdbcProfile] { + + import profile.api._ + + def create(pdsRegistration: PDSRegistration): Future[PDSRegistration] = db.run { + pdsRegistrations += pdsRegistration + }.map(_ => pdsRegistration) + + def findByDid(did: String): Future[Option[PDSRegistration]] = db.run { + pdsRegistrations.filter(_.did === did).result.headOption + } + + def findByHandle(handle: String): Future[Option[PDSRegistration]] = db.run { + pdsRegistrations.filter(_.handle === handle).result.headOption + } + + def updateCursor(did: String, lastCommitCid: String, newCursor: Long): Future[Int] = db.run { + pdsRegistrations.filter(_.did === did) + .map(reg => (reg.lastCommitCid, reg.cursor, reg.updatedAt)) + .update((Some(lastCommitCid), newCursor, ZonedDateTime.now())) + } + + def listAll: Future[Seq[PDSRegistration]] = db.run { + pdsRegistrations.result + } + + def delete(did: String): Future[Int] = db.run { + pdsRegistrations.filter(_.did === did).delete + } +} diff --git a/app/services/ATProtocolClient.scala b/app/services/ATProtocolClient.scala new file mode 100644 index 0000000..aaf24a6 --- /dev/null +++ b/app/services/ATProtocolClient.scala @@ -0,0 +1,92 @@ +package services + +import com.google.inject.Inject +import play.api.libs.json.{JsError, JsSuccess, Json} +import play.api.libs.ws.WSClient +import play.api.{Configuration, Logging} + +import scala.concurrent.duration.* // Import for FiniteDuration + +import scala.concurrent.{ExecutionContext, Future} + +/** + * Service to interact with the AT Protocol for PDS (Personal Data Server) operations. + * + * This client provides methods to resolve DIDs to PDS endpoints and verify repository commits. + * + * @param ws The `WSClient` used for making HTTP requests. + * @param configuration Play configuration for settings like timeouts. + * @param ec The execution context for asynchronous operations. + */ +class ATProtocolClient @Inject()( + ws: WSClient, + configuration: Configuration +)(implicit ec: ExecutionContext) extends Logging { + + private val timeout: FiniteDuration = configuration.getOptional[Int]("atproto.client.timeout").getOrElse(5000).millis + + /** + * Resolves a DID to its associated PDS endpoint URL. + * This typically involves querying a DID resolver or a well-known endpoint on the PDS itself. + * + * @param did The Decentralized Identifier (DID) to resolve. + * @return A Future containing the PDS URL if resolved, otherwise None. + */ + def resolveHandle(handle: String): Future[Option[String]] = { + // This is a simplified resolution. In a real scenario, this would involve a DID resolver service. + // For now, we assume the handle can directly be used to construct a potential PDS URL for verification. + // Or, more accurately, the PDS_URL is provided by the client, and this step is more about DID Document verification. + // Based on the mermaid diagram, R_Edge verifies identity via resolveHandle. + // The ScalaApp receives DID, R_Token, PDS_URL. So, we verify the PDS_URL against the DID. + Future.successful(None) // Placeholder for actual implementation + } + + /** + * Verifies a PDS and retrieves its latest commit information using the provided authentication token. + * + * @param pdsUrl The base URL of the PDS. + * @param repoDid The DID of the repository on the PDS. + * @param authToken The authentication token (JWT) for accessing the PDS. + * @return A Future containing `Option[LatestCommitResponse]` if successful, otherwise None. + */ + def getLatestCommit(pdsUrl: String, repoDid: String, authToken: String): Future[Option[LatestCommitResponse]] = { + val url = s"$pdsUrl/xrpc/com.atproto.repo.getCommit" // ATProto spec uses getCommit for this info + + ws.url(url) + .addQueryStringParameters("repo" -> repoDid) + .withHttpHeaders("Authorization" -> s"Bearer $authToken") + .withRequestTimeout(timeout) + .get() + .map { response => + if (response.status == 200) { + Json.fromJson[LatestCommitResponse](response.json) match { + case JsSuccess(value, _) => Some(value) + case JsError(errors) => + logger.error(s"Failed to parse getLatestCommit response from $pdsUrl for $repoDid: $errors") + None + } + } else { + logger.warn(s"Failed to get latest commit from $pdsUrl for $repoDid. Status: ${response.status}, Body: ${response.body}") + None + } + } + .recover { + case e: Exception => + logger.error(s"Error calling getLatestCommit on $pdsUrl for $repoDid: ${e.getMessage}", e) + None + } + } +} + +// Define case class for the expected response from com.atproto.repo.getCommit +// This is a simplified representation. The actual response might be more complex. +// Based on AT Protocol spec, getCommit returns 'cid', 'rev', 'seq' etc. +case class LatestCommitResponse( + cid: String, // The CID of the latest commit + rev: String, // The repository revision + seq: Long // The sequence number of the latest commit +) + +object LatestCommitResponse { + implicit val format: play.api.libs.json.Format[LatestCommitResponse] = Json.format[LatestCommitResponse] +} diff --git a/app/services/PDSRegistrationService.scala b/app/services/PDSRegistrationService.scala new file mode 100644 index 0000000..10bcc09 --- /dev/null +++ b/app/services/PDSRegistrationService.scala @@ -0,0 +1,107 @@ +package services + +import models.PDSRegistration +import play.api.Logging + +import javax.inject.{Inject, Singleton} +import repositories.PDSRegistrationRepository +import java.time.ZonedDateTime // Import ZonedDateTime + +import scala.concurrent.{ExecutionContext, Future} + +@Singleton +class PDSRegistrationService @Inject()( + atProtocolClient: ATProtocolClient, + pdsRegistrationRepository: PDSRegistrationRepository +)(implicit ec: ExecutionContext) extends Logging { + + /** + * Registers a new PDS, performing server-side verification with the AT Protocol. + * + * @param did The Decentralized Identifier (DID) of the PDS. + * @param handle The handle associated with the PDS. + * @param pdsUrl The base URL of the PDS. + * @param rToken The AT Protocol authentication token provided by the Researcher Edge App. + * @return A Future indicating success or failure of the registration. + */ + def registerPDS(did: String, handle: String, pdsUrl: String, rToken: String): Future[Either[String, PDSRegistration]] = { + // 1. Check if PDS already registered + pdsRegistrationRepository.findByDid(did).flatMap { + case Some(existingRegistration) => + Future.successful(Left(s"PDS with DID $did is already registered.")) + case None => + // 2. Perform server-side verification with the AT Protocol + atProtocolClient.getLatestCommit(pdsUrl, did, rToken).flatMap { + case Some(commitResponse) => + // 3. Validation: Confirm DID is valid and PDS is responsive (implicitly done by successful commit fetch) + // 4. Write New DID Record + val newRegistration = PDSRegistration( + did = did, + pdsUrl = pdsUrl, + handle = handle, + lastCommitCid = Some(commitResponse.cid), + lastCommitSeq = Some(commitResponse.seq), + cursor = 0L, + createdAt = ZonedDateTime.now(), + updatedAt = ZonedDateTime.now() + ) + pdsRegistrationRepository.create(newRegistration).map(Right(_)) + case None => + Future.successful(Left(s"Failed to verify PDS $pdsUrl for DID $did. Could not get latest commit.")) + } + } recover { + case e: Exception => + logger.error(s"Error during PDS registration for DID $did: ${e.getMessage}", e) + Left("An unexpected error occurred during PDS registration.") + } + } + + /** + * Retrieves a PDS registration by its DID. + */ + def getPDSByDid(did: String): Future[Option[PDSRegistration]] = { + pdsRegistrationRepository.findByDid(did) + } + + /** + * Retrieves a PDS registration by its handle. + */ + def getPDSByHandle(handle: String): Future[Option[PDSRegistration]] = { + pdsRegistrationRepository.findByHandle(handle) + } + + /** + * Lists all registered PDS entries. + */ + def listAllPDS(): Future[Seq[PDSRegistration]] = { + pdsRegistrationRepository.listAll + } + + /** + * Updates the cursor (last commit CID and sequence) for a registered PDS. + */ + def updatePDSCursor(did: String, lastCommitCid: String, newCursor: Long): Future[Either[String, Unit]] = { + pdsRegistrationRepository.updateCursor(did, lastCommitCid, newCursor).map { affectedRows => + if (affectedRows > 0) Right(()) + else Left(s"PDS with DID $did not found or cursor update failed.") + } recover { + case e: Exception => + logger.error(s"Error updating PDS cursor for DID $did: ${e.getMessage}", e) + Left("An unexpected error occurred during PDS cursor update.") + } + } + + /** + * Deletes a PDS registration. + */ + def deletePDS(did: String): Future[Either[String, Unit]] = { + pdsRegistrationRepository.delete(did).map { affectedRows => + if (affectedRows > 0) Right(()) + else Left(s"PDS with DID $did not found or deletion failed.") + } recover { + case e: Exception => + logger.error(s"Error deleting PDS for DID $did: ${e.getMessage}", e) + Left("An unexpected error occurred during PDS deletion.") + } + } +} diff --git a/conf/application.conf b/conf/application.conf index bb33481..3ff1d1e 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -16,6 +16,7 @@ play.modules.enabled += "modules.RecaptchaModule" play.modules.enabled += "modules.StartupModule" play.modules.enabled += "modules.ApplicationModule" play.modules.enabled += "modules.ApiSecurityModule" +play.modules.enabled += "modules.PDSRegistrationModule" play.cache.caffeine { # Default cache configuration @@ -57,9 +58,21 @@ slick.dbs.default { # Local Development Credentials username="decodingus_user" # Overridden in production on startup - password="decodingus_password" - } -} + password="decodingus_password" + } + + slick.dbs.metadata { + profile="slick.jdbc.PostgresProfile$" + db { + numThreads = 32 + queueSize = 5000 + + driver="org.postgresql.Driver" + url="jdbc:postgresql://localhost:5432/decodingus_db" # Temporarily pointing to the same DB + username="decodingus_user" + password="decodingus_password" + } + }} contact { recipient.email = "your-email@domain.com" diff --git a/conf/evolutions/metadata/1.sql b/conf/evolutions/metadata/1.sql new file mode 100644 index 0000000..16d7dfb --- /dev/null +++ b/conf/evolutions/metadata/1.sql @@ -0,0 +1,20 @@ +# PDS Registrations schema +# --- !Ups + +CREATE TABLE pds_registrations ( + did TEXT PRIMARY KEY, + pds_url TEXT NOT NULL, + handle TEXT NOT NULL, + last_commit_cid TEXT, + last_commit_seq BIGINT DEFAULT 0, + cursor BIGINT NOT NULL DEFAULT 0, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX pds_registrations_handle_idx ON pds_registrations (handle); +CREATE INDEX pds_registrations_last_commit_cid_idx ON pds_registrations (last_commit_cid); + +# --- !Downs + +DROP TABLE IF EXISTS pds_registrations; \ No newline at end of file diff --git a/conf/routes b/conf/routes index fbcd384..6353713 100644 --- a/conf/routes +++ b/conf/routes @@ -89,6 +89,8 @@ PATCH /api/private/sequencing-labs/:id DELETE /api/private/sequencing-labs/:id controllers.SequencingLabAdminController.delete(id: Int) # --- API Routes (Handled by Tapir, including Swagger UI) --- +POST /api/registerPDS controllers.PDSRegistrationController.registerPDS() + # Delegate all requests starting with /api to the Tapir-based ApiRouter -> /api controllers.ApiRouter diff --git a/documents/register.mermaid b/documents/register.mermaid new file mode 100755 index 0000000..e42590e --- /dev/null +++ b/documents/register.mermaid @@ -0,0 +1,38 @@ +sequenceDiagram + participant R_Edge as "Researcher (JVM Edge App)" + participant R_PDS as "Researcher's PDS" + participant ScalaApp as "App Server (Scala/Play)" + participant MetadataDB as "T4 Metadata DB (DID Registry)" + + title PDS Registration and Sync Setup + + R_Edge->>R_PDS: 1. Login: com.atproto.server.createSession(handle, password) + activate R_PDS + R_PDS-->>R_Edge: 2. Response: Auth Token (R_Token), DID (did:plc:XYZ) + deactivate R_PDS + + R_Edge->>R_PDS: 3. Verify Identity: com.atproto.identity.resolveHandle + activate R_PDS + R_PDS-->>R_Edge: 4. Response: DID Document (Confirms PDS Endpoint) + deactivate R_PDS + + R_Edge->>ScalaApp: 5. Registration Request: POST /api/registerPDS(DID, R_Token, PDS_URL) + activate ScalaApp + + ScalaApp->>R_PDS: 6. *Server-Side Verification*: com.atproto.repo.getLatestCommit (Using R_Token) + activate R_PDS + R_PDS-->>ScalaApp: 7. Response: Latest Commit CID, Repo Root + deactivate R_PDS + + ScalaApp->>ScalaApp: 8. Validation: Confirm DID is valid and PDS is responsive + + ScalaApp->>MetadataDB: 9. Write New DID Record: INSERT(DID, PDS_URL, Initial_Cursor=0) + activate MetadataDB + MetadataDB-->>ScalaApp: 10. Success + deactivate MetadataDB + + ScalaApp-->>R_Edge: 11. Final Response: Registration Success + deactivate ScalaApp + + ScalaApp->>ScalaApp: 12. Trigger Internal Notification (e.g., Pekko Pub/Sub) + Note over ScalaApp, MetadataDB: Rust Sync Cluster detects new entry in Metadata DB (next poll) and begins monitoring. From b58b864cc11471e73186a09c8c9922d9a8e242e5 Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 06:56:02 -0600 Subject: [PATCH 02/31] Add flow diagram for sync --- documents/BGS-Firehouse-Sync.mermaid | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 documents/BGS-Firehouse-Sync.mermaid diff --git a/documents/BGS-Firehouse-Sync.mermaid b/documents/BGS-Firehouse-Sync.mermaid new file mode 100644 index 0000000..93f297e --- /dev/null +++ b/documents/BGS-Firehouse-Sync.mermaid @@ -0,0 +1,25 @@ +sequenceDiagram + participant AS as App Server (Play View) + participant PS1 as Participant PDS 1 + participant PS2 as Participant PDS 2 + participant DBR as Internal DID Registry + participant IMB as Internal Message Bus (Kafka/Akka) + + title Custom BGS/Firehose Sync Flow + + AS->>DBR: 1. Get List of Active DIDs and Sync Cursors + + loop Sync all DIDs in Registry + AS->>PS1: 2a. Sync Request: com.atproto.sync.getLatestCommit(did:...) + PS1-->>AS: 3a. Commit Response (Root CID) + + AS->>PS1: 4a. Fetch Blocks: com.atproto.sync.getRepo(did:..., since: Cursor) + PS1-->>AS: 5a. Response: CAR file containing new records/diff + + AS->>AS: 6. Verify, Decode, Filter for "app.citizen.report" + + AS->>IMB: 7. Publish Event: Decoded Citizen Report + AS->>DBR: 8. Update Cursor (New last synced sequence number) + end + + AS->>AS: 9. App View Consumes IMB Topic (The Custom Firehose) \ No newline at end of file From 71263665ca94c65c4455baf3de4147cf32b961ae Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 07:24:17 -0600 Subject: [PATCH 03/31] Fixing config for new metadata db --- .../PDSRegistrationRepository.scala | 3 ++- conf/application.conf | 27 ++++++++++--------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/app/repositories/PDSRegistrationRepository.scala b/app/repositories/PDSRegistrationRepository.scala index e9922d8..3c513b3 100644 --- a/app/repositories/PDSRegistrationRepository.scala +++ b/app/repositories/PDSRegistrationRepository.scala @@ -4,6 +4,7 @@ import models.PDSRegistration import models.dal.MetadataSchema.pdsRegistrations import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} import slick.jdbc.JdbcProfile +import play.db.NamedDatabase import java.time.ZonedDateTime import javax.inject.{Inject, Singleton} @@ -11,7 +12,7 @@ import scala.concurrent.{ExecutionContext, Future} @Singleton class PDSRegistrationRepository @Inject()( - @javax.inject.Named("metadata") protected val dbConfigProvider: DatabaseConfigProvider + @NamedDatabase("metadata") protected val dbConfigProvider: DatabaseConfigProvider )(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[JdbcProfile] { import profile.api._ diff --git a/conf/application.conf b/conf/application.conf index 3ff1d1e..6ec6385 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -60,19 +60,20 @@ slick.dbs.default { # Overridden in production on startup password="decodingus_password" } - - slick.dbs.metadata { - profile="slick.jdbc.PostgresProfile$" - db { - numThreads = 32 - queueSize = 5000 - - driver="org.postgresql.Driver" - url="jdbc:postgresql://localhost:5432/decodingus_db" # Temporarily pointing to the same DB - username="decodingus_user" - password="decodingus_password" - } - }} +} + +slick.dbs.metadata { + profile="slick.jdbc.PostgresProfile$" + db { + numThreads = 32 + queueSize = 5000 + + driver="org.postgresql.Driver" + url="jdbc:postgresql://localhost:5432/decodingus_metadata" # Temporarily pointing to the same DB + username="decodingus_user" + password="decodingus_password" + } +} contact { recipient.email = "your-email@domain.com" From 6287ba6f7b7ce456d35ae856e2836229ca8135f8 Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 07:59:56 -0600 Subject: [PATCH 04/31] feat: Support Atmosphere BGS integration with Citizen/PDS linking - Added BGS Integration Plan document. - Updated to include and . - Updated to find donors by DID + Identifier. - Updated to auto-provision or link Specimen Donors for Atmosphere samples. --- app/models/api/ExternalBiosampleRequest.scala | 7 +- .../SpecimanDonorRepository.scala | 8 ++ app/services/ExternalBiosampleService.scala | 28 ++++- documents/BGS_Integration_Plan.md | 107 ++++++++++++++++++ 4 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 documents/BGS_Integration_Plan.md diff --git a/app/models/api/ExternalBiosampleRequest.scala b/app/models/api/ExternalBiosampleRequest.scala index bdedb5c..1e95cb3 100644 --- a/app/models/api/ExternalBiosampleRequest.scala +++ b/app/models/api/ExternalBiosampleRequest.scala @@ -1,6 +1,6 @@ package models.api -import models.domain.genomics.BiologicalSex +import models.domain.genomics.{BiologicalSex, BiosampleType} import play.api.libs.json.{Json, OFormat} import java.time.LocalDateTime @@ -17,6 +17,8 @@ import java.time.LocalDateTime * @param sex Optional biological sex information for the biosample. * @param latitude Optional geographical latitude information related to the biosample. * @param longitude Optional geographical longitude information related to the biosample. + * @param citizenDid Optional decentralized identifier (DID) for linking to a citizen/PDS user. + * @param donorType Optional type of the donor (e.g., Citizen, PGP, Standard). * @param publication Optional publication information related to the biosample, represented by the `PublicationInfo` structure. * @param sequenceData Information regarding the sequencing data associated with the biosample, represented by the `SequenceDataInfo` structure. */ @@ -29,6 +31,9 @@ case class ExternalBiosampleRequest( sex: Option[BiologicalSex], latitude: Option[Double], longitude: Option[Double], + citizenDid: Option[String], + donorIdentifier: Option[String], + donorType: Option[BiosampleType], publication: Option[PublicationInfo], sequenceData: SequenceDataInfo ) diff --git a/app/repositories/SpecimanDonorRepository.scala b/app/repositories/SpecimanDonorRepository.scala index 04dc839..19feffc 100644 --- a/app/repositories/SpecimanDonorRepository.scala +++ b/app/repositories/SpecimanDonorRepository.scala @@ -21,6 +21,7 @@ trait SpecimenDonorRepository { def findByBiobankAndType(biobank: String, donorType: BiosampleType): Future[Seq[SpecimenDonor]] def deleteMany(ids: Seq[Int]): Future[Int] def transferBiosamples(fromDonorIds: Seq[Int], toDonorId: Int): Future[Int] + def findByDidAndIdentifier(did: String, identifier: String): Future[Option[SpecimenDonor]] } @@ -40,6 +41,13 @@ class SpecimenDonorRepositoryImpl @Inject()( db.run(donorsTable.filter(_.id === id).result.headOption) } + override def findByDidAndIdentifier(did: String, identifier: String): Future[Option[SpecimenDonor]] = { + db.run(donorsTable + .filter(d => d.citizenBiosampleDid === did && d.donorIdentifier === identifier) + .result.headOption + ) + } + override def create(donor: SpecimenDonor): Future[SpecimenDonor] = { val insertQuery = (donorsTable returning donorsTable.map(_.id) into ((d, id) => d.copy(id = Some(id)))) diff --git a/app/services/ExternalBiosampleService.scala b/app/services/ExternalBiosampleService.scala index d3983fd..efd61c5 100644 --- a/app/services/ExternalBiosampleService.scala +++ b/app/services/ExternalBiosampleService.scala @@ -91,9 +91,35 @@ class ExternalBiosampleService @Inject()( request.sex.isDefined || request.latitude.isDefined || request.longitude.isDefined } + def handleCitizenDonor(): Future[Option[Int]] = { + (request.citizenDid, request.donorIdentifier) match { + case (Some(did), Some(identifier)) => + specimenDonorRepository.findByDidAndIdentifier(did, identifier).flatMap { + case Some(existingDonor) => Future.successful(existingDonor.id) + case None => + val newDonor = SpecimenDonor( + donorIdentifier = identifier, + originBiobank = request.centerName, + donorType = request.donorType.getOrElse(BiosampleType.Citizen), + sex = request.sex, + geocoord = None, // Coordinates handled separately if needed, or could be passed here + pgpParticipantId = None, + citizenBiosampleDid = Some(did), + dateRangeStart = None, + dateRangeEnd = None + ) + specimenDonorRepository.create(newDonor).map(_.id) + } + case _ => Future.successful(None) + } + } + (for { geocoord <- validateCoordinates(request.latitude, request.longitude) - donorId <- if (shouldCreateDonor) { + citizenDonorId <- handleCitizenDonor() + donorId <- if (citizenDonorId.isDefined) { + Future.successful(citizenDonorId) + } else if (shouldCreateDonor) { createSpecimenDonor(geocoord).map(donor => Some(donor.id.get)) } else { Future.successful(None) diff --git a/documents/BGS_Integration_Plan.md b/documents/BGS_Integration_Plan.md new file mode 100644 index 0000000..ff2fb78 --- /dev/null +++ b/documents/BGS_Integration_Plan.md @@ -0,0 +1,107 @@ +### Architecture Overview + +For the MVP, we will utilize a **Secure REST API** pattern. The BGS server will act as an authenticated API client, pushing operational data directly to the `decodingus` backend. + +* **Integration Point:** `POST /api/private/external/biosamples` +* **Controller:** `app/controllers/ExternalBiosampleController.scala` +* **Data Model:** `app/models/api/ExternalBiosampleRequest.scala` +* **Security:** API Key authentication via `X-API-Key` header. + +### 2. Atmosphere / Citizen Sample Linking + +For **Atmosphere** (Citizen) biosamples, the system must correctly place the sample within a hierarchy: +1. **Researcher/User (PDS Owner):** Identified by `citizenDid`. A single researcher may manage multiple donors. +2. **Specimen Donor:** Identified by a unique `donorIdentifier` within the Researcher's context. A single donor may have multiple biosamples (e.g., different tissues, or different sequencing technologies like Short-read vs. HiFi). +3. **Biosample/Sequencing Data:** The actual data being uploaded. + +* **Linkage Keys:** + * `citizenDid`: Identifies the Researcher/PDS. + * `donorIdentifier`: Identifies the specific biological source (person) *within* that Researcher's collection. + +* **SpecimenDonor Resolution Logic:** + The system attempts to find a `SpecimenDonor` matching **BOTH** the `citizenDid` and the `donorIdentifier`. + * **If Found:** The new Biosample is linked to this *existing* Specimen Donor. + * *Benefit:* This aggregates multiple datasets (e.g., WGS + HiFi) under the same physical donor. + * **If Not Found:** A new `SpecimenDonor` record is created. + * `citizenBiosampleDid` = `citizenDid` + * `donorIdentifier` = `donorIdentifier` + * `donorType` = "Citizen" + +**Revised Sequence Diagram:** + +```mermaid +sequenceDiagram + participant BGS as BGS Server + participant API as ExternalBiosampleController + participant Service as ExternalBiosampleService + participant Repo as SpecimenDonorRepository + participant DB as Database + + BGS->>API: POST /biosamples + Note right of BGS: citizenDid="did:123"
donorIdentifier="Subject-A" + + API->>Service: createBiosample(req) + + alt citizenDid is present + Service->>Repo: findByDidAndIdentifier("did:123", "Subject-A") + Repo->>DB: SELECT * FROM specimen_donors
WHERE citizen_biosample_did = 'did:123'
AND donor_identifier = 'Subject-A' + + alt Donor Exists (e.g., adding HiFi to existing Subject-A) + DB-->>Service: Returns Donor(id=55) + else Donor Missing (New Subject) + Service->>Repo: create(SpecimenDonor{did="did:123", id="Subject-A"}) + Repo-->>Service: Returns New Donor(id=99) + end + end + + Service->>Repo: createBiosample(donorId=55 or 99) + Service-->>API: Returns UUID + API-->>BGS: 201 Created +``` + +### 1. Data Payload Specification + +**JSON Structure:** +```json +{ + "sampleAccession": "BGS-UUID-12345", + "sourceSystem": "BGS_MVP", + "description": "Processed by BGS Node 1", + "centerName": "DecodingUs Ops", + "sex": "Male", + "citizenDid": "did:plc:u76f5w...", + "donorIdentifier": "Subject-001", + "donorType": "Citizen", + "sequenceData": { + // ... (same as before) + } +} +``` + +### 4. Rust Implementation (BGS Side) + +**Suggested Rust Structs:** + +```rust +use serde::Serialize; + +#[derive(Serialize)] +struct ExternalBiosampleRequest { + sampleAccession: String, + sourceSystem: String, + description: String, + centerName: String, + citizenDid: Option, + donorIdentifier: Option, // New: Identifies specific donor + donorType: Option, + sequenceData: SequenceDataInfo, + // ... +} +``` + +### Next Steps +1. **Provision Key:** Ensure a valid API key is set in your AWS Secrets Manager (for prod) or `application.conf` (if configured for dev overrides). +2. **Deploy BGS:** Configure the BGS MVP node with the `decodingus` URL and the API Key. +3. **Verify:** Send a test payload from the BGS node (with and without `citizenDid`) and verify: + * Data appears in `biosamples` and `sequence_libraries`. + * `SpecimenDonor` is correctly linked or created for Atmosphere samples. \ No newline at end of file From f950171598d24093edaf6b704304dd4e786a8c15 Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 08:15:45 -0600 Subject: [PATCH 05/31] Update the integration plan and add lexicon draft --- documents/Atmosphere_Lexicon.md | 168 ++++++++++++++++++++++++++++++ documents/BGS_Integration_Plan.md | 31 +++++- 2 files changed, 196 insertions(+), 3 deletions(-) create mode 100644 documents/Atmosphere_Lexicon.md diff --git a/documents/Atmosphere_Lexicon.md b/documents/Atmosphere_Lexicon.md new file mode 100644 index 0000000..1537d62 --- /dev/null +++ b/documents/Atmosphere_Lexicon.md @@ -0,0 +1,168 @@ +# Atmosphere Lexicon Design + +To support the "Atmosphere" integration within the AT Protocol (Bluesky) ecosystem, we define a specific Lexicon (schema) for Genomic Data. This allows `decodingus` to interact with the global network of Personal Data Stores (PDS) using standard XRPC methods, effectively turning genomic metadata into a portable, user-owned record type. + +## Namespace: `com.decodingus.atmosphere` + +This namespace covers the genomic operational data generated by BGS nodes and owned by Citizens. + +--- + +### 1. Biosample Record (`com.decodingus.atmosphere.biosample`) + +This record represents a single biological sample processed by a BGS node. It maps directly to the `ExternalBiosampleRequest` used in the MVP REST API. + +**NSID:** `com.decodingus.atmosphere.biosample` + +```json +{ + "lexicon": 1, + "id": "com.decodingus.atmosphere.biosample", + "defs": { + "main": { + "type": "record", + "description": "A record representing a biological sample and its associated sequencing metadata.", + "key": "tid", + "record": { + "type": "object", + "required": ["sampleAccession", "donorIdentifier", "centerName", "sequenceData"], + "properties": { + "sampleAccession": { + "type": "string", + "description": "Unique identifier for the sample (e.g., UUID from BGS)." + }, + "donorIdentifier": { + "type": "string", + "description": "Identifier for the specimen donor within the user's context." + }, + "description": { + "type": "string", + "description": "Human-readable description of the sample." + }, + "centerName": { + "type": "string", + "description": "The name of the Sequencing Center or BGS Node." + }, + "sex": { + "type": "string", + "description": "Biological sex of the donor.", + "knownValues": ["Male", "Female", "Other", "Unknown"] + }, + "sequenceData": { + "type": "ref", + "ref": "#sequenceData", + "description": "Technical details about the sequencing run." + }, + "createdAt": { + "type": "string", + "format": "datetime" + } + } + } + }, + "sequenceData": { + "type": "object", + "description": "Technical metrics regarding the sequencing output.", + "required": ["platformName", "testType", "files"], + "properties": { + "platformName": { + "type": "string", + "description": "Sequencing platform (e.g., ILLUMINA, PACBIO)." + }, + "testType": { + "type": "string", + "description": "Type of test (e.g., WGS, EXOME)." + }, + "reads": { + "type": "integer", + "description": "Total number of reads." + }, + "readLength": { + "type": "integer", + "description": "Average read length." + }, + "coverage": { + "type": "float", + "description": "Estimated sequencing coverage (e.g., 30.5)." + }, + "files": { + "type": "array", + "items": { + "type": "ref", + "ref": "#fileInfo" + } + } + } + }, + "fileInfo": { + "type": "object", + "description": "Metadata about a specific data file (FASTQ, BAM, etc.).", + "required": ["fileName", "fileFormat", "location"], + "properties": { + "fileName": { + "type": "string" + }, + "fileSizeBytes": { + "type": "integer" + }, + "fileFormat": { + "type": "string", + "knownValues": ["FASTQ", "BAM", "CRAM", "VCF"] + }, + "checksum": { + "type": "string", + "description": "SHA-256 or similar checksum." + }, + "location": { + "type": "string", + "format": "uri", + "description": "The URI where the file is stored (e.g., s3://..., ipfs://...)." + } + } + } + } +} +``` + +## Integration Strategy + +In the "Atmosphere" model, this Lexicon bridges the gap between the MVP REST API and the decentralized future: + +1. **MVP (Current):** The BGS Node (Rust) constructs a JSON payload matching the `ExternalBiosampleRequest`. It pushes this to `decodingus` via REST. +2. **Phase 2 (Hybrid):** The BGS Node uses this Lexicon structure to construct the payload. `decodingus` accepts the payload and internally treats it as a valid ATP Record, potentially signing it on behalf of the user or the lab. +3. **Phase 3 (Full Atmosphere):** + * The User grants the BGS Node "Write Access" to their PDS (repo). + * The BGS Node writes a `com.decodingus.atmosphere.biosample` record directly to the User's PDS. + * `decodingus` (acting as an AppView) ingests this record from the ATP Firehose, indexing it for the tree visualization. + +## Mapping to Scala Models + +| Lexicon Field | Scala Field (`ExternalBiosampleRequest`) | +| :--- | :--- | +| `sampleAccession` | `sampleAccession` | +| `donorIdentifier` | `donorIdentifier` | +| `centerName` | `centerName` | +| `sequenceData` | `sequenceData` (Mapped Object) | +| `files` | `files` (Mapped Object) | + +## Lifecycle Management (AppView Logic) + +As an AppView, `decodingus` subscribes to the AT Protocol Firehose to maintain a synchronized state of the genomic network. + +### 1. The Firehose Event Stream +We listen for `com.atproto.sync.subscribeRepos` events containing operations for the collection `com.decodingus.atmosphere.biosample`. + +### 2. Event Handling Strategy + +| Event Action | Description | DecodingUs Logic | +| :--- | :--- | :--- | +| **Create** | User creates a new record. | 1. Extract `citizenDid` (Repo DID) and record body.
2. Invoke `ExternalBiosampleService.create`.
3. Store the `at_uri` (e.g., `at://did.../collection/rkey`) and `at_cid` in the `biosamples` table. | +| **Update** | User modifies an existing record. | 1. Lookup `Biosample` by `at_uri`.
2. Compare `at_cid` to ensure strictly newer version.
3. Update mutable fields (description, metrics, file URLs).
4. Update `at_cid`. | +| **Delete** | User removes a record. | 1. Lookup `Biosample` by `at_uri`.
2. Perform **Soft Delete** (mark as archived/hidden).
3. Remove from active visualization trees.
*Hard deletes are avoided to preserve scientific lineage unless legally required (GDPR).* | + +### 3. Schema Requirements +To support robust syncing, the internal `biosamples` table requires tracking fields: + +* **`at_uri` (String, Unique):** The canonical decentralized address of the record. Used for lookups during Update/Delete. +* **`at_cid` (String):** The content identifier (hash) of the current version. Used for optimistic locking and preventing replay attacks/race conditions. + diff --git a/documents/BGS_Integration_Plan.md b/documents/BGS_Integration_Plan.md index ff2fb78..726afe7 100644 --- a/documents/BGS_Integration_Plan.md +++ b/documents/BGS_Integration_Plan.md @@ -99,9 +99,34 @@ struct ExternalBiosampleRequest { } ``` + // Handle 201 Created or errors + Ok(()) +} +``` + +### 5. Integration Roadmap + +The integration strategy evolves through three distinct phases, moving from a simple direct connection to a robust, decentralized architecture. + +#### Phase 1: Direct REST API (Current / MVP) +* **Mechanism:** Synchronous HTTP POST. +* **Flow:** `BGS Server` -> `DecodingUs Controller` -> `Service` -> `DB`. +* **Pros:** Simplest to implement; immediate feedback on success/failure. +* **Cons:** Tightly coupled; requires BGS to handle retries if DecodingUs is down. + +#### Phase 2: Asynchronous Ingestion (Kafka) +* **Mechanism:** Message Queue. +* **Flow:** `BGS Server` -> `Kafka Topic` -> `DecodingUs Consumer` -> `Service` -> `DB`. +* **Change:** BGS replaces the HTTP Client with a Kafka Producer. DecodingUs adds a Kafka Consumer service. +* **Pros:** Decoupled; handles bursts of traffic; high resilience. + +#### Phase 3: Decentralized AppView (Atmosphere) +* **Mechanism:** AT Protocol Firehose. +* **Flow:** `BGS Server` -> `Researcher PDS` -> `AT Proto Relay` -> `DecodingUs Firehose Consumer` -> `Service` -> `DB`. +* **Change:** BGS writes directly to the user's PDS using the `com.decodingus.atmosphere.biosample` Lexicon. DecodingUs becomes a passive indexer. +* **Pros:** True user data ownership; interoperability with other AT Protocol apps. + ### Next Steps 1. **Provision Key:** Ensure a valid API key is set in your AWS Secrets Manager (for prod) or `application.conf` (if configured for dev overrides). 2. **Deploy BGS:** Configure the BGS MVP node with the `decodingus` URL and the API Key. -3. **Verify:** Send a test payload from the BGS node (with and without `citizenDid`) and verify: - * Data appears in `biosamples` and `sequence_libraries`. - * `SpecimenDonor` is correctly linked or created for Atmosphere samples. \ No newline at end of file +3. **Verify:** Send a test payload from the BGS node and verify the data appears in the `biosamples` and `sequence_libraries` tables. \ No newline at end of file From 0632d90c85608979bb1e8d0a6e58568262c251d9 Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 08:20:08 -0600 Subject: [PATCH 06/31] feat: Implement Upsert logic for external biosample submission - Modified update method to include more fields. - Added to for efficient sequence file cleanup. - Implemented in to handle full replacement of sequencing data. - Refactored to perform an upsert (update or insert) based on , including Specimen Donor and full sequence data replacement. --- app/repositories/BiosampleRepository.scala | 8 +++ app/repositories/SequenceFileRepository.scala | 12 ++++ app/services/BiosampleDataService.scala | 30 +++++++++ app/services/ExternalBiosampleService.scala | 62 ++++++++++++++----- 4 files changed, 95 insertions(+), 17 deletions(-) diff --git a/app/repositories/BiosampleRepository.scala b/app/repositories/BiosampleRepository.scala index 018798e..b8ed5ef 100644 --- a/app/repositories/BiosampleRepository.scala +++ b/app/repositories/BiosampleRepository.scala @@ -234,10 +234,18 @@ class BiosampleRepositoryImpl @Inject()( .filter(_.id === id) .map(b => ( b.alias, + b.description, + b.centerName, + b.specimenDonorId, + b.sourcePlatform, b.locked )) .update(( biosample.alias, + biosample.description, + biosample.centerName, + biosample.specimenDonorId, + biosample.sourcePlatform, biosample.locked )) .map(_ > 0) diff --git a/app/repositories/SequenceFileRepository.scala b/app/repositories/SequenceFileRepository.scala index fedffb9..11c19f7 100644 --- a/app/repositories/SequenceFileRepository.scala +++ b/app/repositories/SequenceFileRepository.scala @@ -48,6 +48,14 @@ trait SequenceFileRepository { * @return a future containing a sequence of files */ def findByLibraryId(libraryId: Int): Future[Seq[SequenceFile]] + + /** + * Deletes all sequence files associated with a library. + * + * @param libraryId the ID of the sequence library + * @return a future containing the number of deleted files + */ + def deleteByLibraryId(libraryId: Int): Future[Int] } @Singleton @@ -106,6 +114,10 @@ class SequenceFileRepositoryImpl @Inject()( db.run(sequenceFiles.filter(_.id === id).delete.map(_ > 0)) } + override def deleteByLibraryId(libraryId: Int): Future[Int] = { + db.run(sequenceFiles.filter(_.libraryId === libraryId).delete) + } + override def findByLibraryId(libraryId: Int): Future[Seq[SequenceFile]] = { db.run(sequenceFiles.filter(_.libraryId === libraryId).result) } diff --git a/app/services/BiosampleDataService.scala b/app/services/BiosampleDataService.scala index fd3f5ae..8fdc9b4 100644 --- a/app/services/BiosampleDataService.scala +++ b/app/services/BiosampleDataService.scala @@ -53,6 +53,36 @@ class BiosampleDataService @Inject()( createSequenceData(sampleGuid, data) } + /** + * Replaces the sequencing data for a specific sample. + * + * This method first removes all existing sequencing libraries and their associated files + * for the given sample GUID, and then adds the new sequencing data. + * + * @param sampleGuid The unique identifier of the sample to update. + * @param data The new metadata and details about the sequencing data. + * @return A `Future` representing the asynchronous completion of the operation. + */ + def replaceSequenceData(sampleGuid: UUID, data: SequenceDataInfo): Future[Unit] = { + for { + // 1. Find all existing libraries + libraries <- sequenceLibraryRepository.findBySampleGuid(sampleGuid) + + // 2. Delete files for each library + _ <- Future.sequence(libraries.map { lib => + sequenceFileRepository.deleteByLibraryId(lib.id.get) + }) + + // 3. Delete the libraries themselves + _ <- Future.sequence(libraries.map { lib => + sequenceLibraryRepository.delete(lib.id.get) + }) + + // 4. Create new sequence data + _ <- createSequenceData(sampleGuid, data) + } yield () + } + /** * Associates a publication with a specific biosample identified by its unique GUID. If the publication * does not already exist in the repository, it is created. Optionally, original haplogroup information diff --git a/app/services/ExternalBiosampleService.scala b/app/services/ExternalBiosampleService.scala index efd61c5..98f425e 100644 --- a/app/services/ExternalBiosampleService.scala +++ b/app/services/ExternalBiosampleService.scala @@ -58,33 +58,43 @@ class ExternalBiosampleService @Inject()( centerName = request.centerName, specimenDonorId = donorId, locked = false, - sourcePlatform = None + sourcePlatform = Some(request.sourceSystem) ) - // Check for existing accession first - biosampleRepository.findByAccession(request.sampleAccession).flatMap { - case Some(_) => Future.failed(DuplicateAccessionException(request.sampleAccession)) - case None => biosampleRepository.create(biosample) - } + biosampleRepository.create(biosample) + } + + def updateBiosample(existingBiosample: Biosample, donorId: Option[Int]) = { + val updatedBiosample = existingBiosample.copy( + description = request.description, + alias = request.alias, + centerName = request.centerName, + specimenDonorId = donorId, + sourcePlatform = Some(request.sourceSystem) + ) + biosampleRepository.update(updatedBiosample).map(_ => existingBiosample.sampleGuid) } - def handleDataAssociation() = { + def handleDataAssociation(guid: UUID, isUpdate: Boolean) = { val publicationFuture = request.publication - .map(pub => biosampleDataService.linkPublication(sampleGuid, pub) + .map(pub => biosampleDataService.linkPublication(guid, pub) .recoverWith { case e => Future.failed(PublicationLinkageException(e.getMessage)) }) .getOrElse(Future.successful(())) - val sequenceDataFuture = biosampleDataService.addSequenceData(sampleGuid, request.sequenceData) - .recoverWith { case e => - Future.failed(SequenceDataValidationException(e.getMessage)) - } + val sequenceDataFuture = if (isUpdate) { + biosampleDataService.replaceSequenceData(guid, request.sequenceData) + } else { + biosampleDataService.addSequenceData(guid, request.sequenceData) + } for { _ <- publicationFuture - _ <- sequenceDataFuture - } yield sampleGuid + _ <- sequenceDataFuture.recoverWith { case e => + Future.failed(SequenceDataValidationException(e.getMessage)) + } + } yield guid } def shouldCreateDonor: Boolean = { @@ -124,12 +134,30 @@ class ExternalBiosampleService @Inject()( } else { Future.successful(None) } - biosample <- createBiosample(donorId) - guid <- handleDataAssociation() + + // Check for existing accession + existing <- biosampleRepository.findByAccession(request.sampleAccession) + + guid <- existing match { + case Some((existingBiosample, _)) => + // Update existing + for { + guid <- updateBiosample(existingBiosample, donorId) + _ <- handleDataAssociation(guid, isUpdate = true) + } yield guid + + case None => + // Create new + for { + created <- createBiosample(donorId) + guid <- handleDataAssociation(created.sampleGuid, isUpdate = false) + } yield guid + } + } yield guid).recoverWith { case e: BiosampleServiceException => Future.failed(e) case e: Exception => Future.failed(new RuntimeException( - s"Failed to create biosample: ${e.getMessage}", e)) + s"Failed to process biosample: ${e.getMessage}", e)) } } } \ No newline at end of file From b03cfcb4ae5fae0fbd62c78906cd64de1a937e9f Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 08:24:51 -0600 Subject: [PATCH 07/31] feat: Implement DELETE endpoint for external biosamples - Added method to for direct deletion by ID. - Added to and for cascading deletions. - Implemented in to orchestrate a hard delete of a biosample and all its dependent data. - Added to to provide service-level deletion by accession. - Exposed a endpoint in for BGS server initiated deletion. --- .../ExternalBiosampleController.scala | 31 +++++++++++-- ...iosampleOriginalHaplogroupRepository.scala | 12 +++++ app/repositories/BiosampleRepository.scala | 39 +++++++++++----- .../PublicationBiosampleRepository.scala | 12 +++++ app/services/BiosampleDataService.scala | 44 +++++++++++++++---- app/services/ExternalBiosampleService.scala | 15 +++++++ 6 files changed, 130 insertions(+), 23 deletions(-) diff --git a/app/controllers/ExternalBiosampleController.scala b/app/controllers/ExternalBiosampleController.scala index 3412c43..ec591a4 100644 --- a/app/controllers/ExternalBiosampleController.scala +++ b/app/controllers/ExternalBiosampleController.scala @@ -88,7 +88,30 @@ class ExternalBiosampleController @Inject()( "error" -> "Internal server error", "message" -> "An unexpected error occurred while processing the request" )) - } - } - -} \ No newline at end of file + } + } + + /** + * Handles an HTTP request to delete an external biosample by its accession. + * + * This method processes a request to delete a biosample identified by its unique accession. + * Upon successful deletion, it returns a `204 No Content` HTTP response. + * If the biosample is not found, it returns a `404 Not Found` response. + * + * @param accession The unique accession of the biosample to be deleted. + * @return An asynchronous `Action` that responds with `204 No Content`, `404 Not Found`, + * or `500 Internal Server Error` in case of an unexpected error. + */ + def delete(accession: String): Action[AnyContent] = secureApi.async { + externalBiosampleService.deleteBiosample(accession).map { + case true => NoContent + case false => NotFound(Json.obj("error" -> "Biosample not found", "message" -> s"Biosample with accession '$accession' not found.")) + }.recover { + case e: Exception => + InternalServerError(Json.obj( + "error" -> "Internal server error", + "message" -> s"An unexpected error occurred while attempting to delete biosample with accession '$accession': ${e.getMessage}" + )) + } + } + } \ No newline at end of file diff --git a/app/repositories/BiosampleOriginalHaplogroupRepository.scala b/app/repositories/BiosampleOriginalHaplogroupRepository.scala index 82dceb3..e5b7d88 100644 --- a/app/repositories/BiosampleOriginalHaplogroupRepository.scala +++ b/app/repositories/BiosampleOriginalHaplogroupRepository.scala @@ -63,6 +63,14 @@ trait BiosampleOriginalHaplogroupRepository { * @return a future containing an optional haplogroup assignment */ def findByBiosampleAndPublication(biosampleId: Int, publicationId: Int): Future[Option[BiosampleOriginalHaplogroup]] + + /** + * Deletes all `BiosampleOriginalHaplogroup` entries associated with the specified biosample ID. + * + * @param biosampleId The unique identifier of the biosample for which associated entries are to be deleted. + * @return A `Future` containing the number of deleted rows. + */ + def deleteByBiosampleId(biosampleId: Int): Future[Int] } @Singleton @@ -107,6 +115,10 @@ class BiosampleOriginalHaplogroupRepositoryImpl @Inject()( db.run(haplogroups.filter(_.id === id).delete.map(_ > 0)) } + override def deleteByBiosampleId(biosampleId: Int): Future[Int] = { + db.run(haplogroups.filter(_.biosampleId === biosampleId).delete) + } + override def findByBiosampleAndPublication( biosampleId: Int, publicationId: Int diff --git a/app/repositories/BiosampleRepository.scala b/app/repositories/BiosampleRepository.scala index b8ed5ef..680be42 100644 --- a/app/repositories/BiosampleRepository.scala +++ b/app/repositories/BiosampleRepository.scala @@ -114,6 +114,8 @@ trait BiosampleRepository { def findByGuid(guid: UUID): Future[Option[(Biosample, Option[SpecimenDonor])]] def getAllGeoLocations: Future[Seq[(Point, Int)]] + + def delete(id: Int): Future[Boolean] } @Singleton @@ -424,15 +426,32 @@ class BiosampleRepositoryImpl @Inject()( getBiosampleWithDonor(biosamplesTable.filter(_.sampleGuid === guid)) } - def getAllGeoLocations: Future[Seq[(Point, Int)]] = { - val query = specimenDonorsTable - .filter(_.geocoord.isDefined) - .groupBy(_.geocoord) - .map { case (point, group) => - (point.asColumnOf[Point], group.length) - } + def getAllGeoLocations: Future[Seq[(Point, Int)]] = { - db.run(query.result) - } + val query = specimenDonorsTable + + .filter(_.geocoord.isDefined) + + .groupBy(_.geocoord) + + .map { case (point, group) => + + (point.asColumnOf[Point], group.length) + + } + + + + db.run(query.result) + + } + + + + override def delete(id: Int): Future[Boolean] = { + + db.run(biosamplesTable.filter(_.id === id).delete.map(_ > 0)) + + } -} \ No newline at end of file + } \ No newline at end of file diff --git a/app/repositories/PublicationBiosampleRepository.scala b/app/repositories/PublicationBiosampleRepository.scala index 4aad962..ad82aeb 100644 --- a/app/repositories/PublicationBiosampleRepository.scala +++ b/app/repositories/PublicationBiosampleRepository.scala @@ -38,6 +38,14 @@ trait PublicationBiosampleRepository { * @return A `Future` containing a sequence of `PublicationBiosample` objects associated with the given biosample ID. */ def findByBiosampleId(biosampleId: Int): Future[Seq[PublicationBiosample]] + + /** + * Deletes all `PublicationBiosample` entries associated with the specified biosample ID. + * + * @param biosampleId The unique identifier of the biosample for which associated entries are to be deleted. + * @return A `Future` containing the number of deleted rows. + */ + def deleteByBiosampleId(biosampleId: Int): Future[Int] } class PublicationBiosampleRepositoryImpl @Inject()(protected val dbConfigProvider: DatabaseConfigProvider)(implicit ec: ExecutionContext) @@ -78,4 +86,8 @@ class PublicationBiosampleRepositoryImpl @Inject()(protected val dbConfigProvide override def findByBiosampleId(biosampleId: Int): Future[Seq[PublicationBiosample]] = { db.run(publicationBiosamples.filter(_.biosampleId === biosampleId).result) } + + override def deleteByBiosampleId(biosampleId: Int): Future[Int] = { + db.run(publicationBiosamples.filter(_.biosampleId === biosampleId).delete) + } } diff --git a/app/services/BiosampleDataService.scala b/app/services/BiosampleDataService.scala index 8fdc9b4..3b88bc5 100644 --- a/app/services/BiosampleDataService.scala +++ b/app/services/BiosampleDataService.scala @@ -140,15 +140,41 @@ class BiosampleDataService @Inject()( originalYHaplogroup = haplogroupInfo.yHaplogroup, originalMtHaplogroup = haplogroupInfo.mtHaplogroup, notes = haplogroupInfo.notes - )) - }.getOrElse(Future.successful(())) - } yield () - } - - - private def createSequenceData(sampleGuid: UUID, data: SequenceDataInfo): Future[Unit] = { - val library = SequenceLibrary( - id = None, + }) + .getOrElse(Future.successful(())) + } yield () + } + + /** + * Fully deletes a biosample and all its associated data (publication links, + * original haplogroups, sequence libraries, sequence files, file locations, and checksums). + * + * @param biosampleId The internal ID of the biosample to delete. + * @param sampleGuid The GUID of the biosample to delete. + * @return A `Future` that completes when all associated data and the biosample itself have been deleted. + */ + def fullyDeleteBiosampleAndDependencies(biosampleId: Int, sampleGuid: UUID): Future[Unit] = { + for { + // 1. Delete associated publication links + _ <- publicationBiosampleRepository.deleteByBiosampleId(biosampleId) + // 2. Delete associated original haplogroup records + _ <- biosampleOriginalHaplogroupRepository.deleteByBiosampleId(biosampleId) + // 3. Find and delete all sequence libraries and their files + libraries <- sequenceLibraryRepository.findBySampleGuid(sampleGuid) + _ <- Future.sequence(libraries.map { lib => + for { + _ <- sequenceFileRepository.deleteByLibraryId(lib.id.get) // Deletes files, locations, checksums (if cascading) + _ <- sequenceLibraryRepository.delete(lib.id.get) // Deletes the library + } yield () + }) + // 4. Delete the biosample itself + _ <- biosampleRepository.delete(biosampleId) + } yield () + } + + private def createSequenceData(sampleGuid: UUID, data: SequenceDataInfo): Future[Unit] = { + val library = SequenceLibrary( + id = None, sampleGuid = sampleGuid, lab = data.platformName, testType = data.testType, diff --git a/app/services/ExternalBiosampleService.scala b/app/services/ExternalBiosampleService.scala index 98f425e..85502e8 100644 --- a/app/services/ExternalBiosampleService.scala +++ b/app/services/ExternalBiosampleService.scala @@ -160,4 +160,19 @@ class ExternalBiosampleService @Inject()( s"Failed to process biosample: ${e.getMessage}", e)) } } + + /** + * Deletes a biosample and all its associated data by its sample accession. + * + * @param accession The sample accession of the biosample to delete. + * @return A `Future` containing `true` if the biosample was found and deleted, `false` otherwise. + */ + def deleteBiosample(accession: String): Future[Boolean] = { + biosampleRepository.findByAccession(accession).flatMap { + case Some((biosample, _)) => + biosampleDataService.fullyDeleteBiosampleAndDependencies(biosample.id.get, biosample.sampleGuid).map(_ => true) + case None => + Future.successful(false) + } + } } \ No newline at end of file From 77ddafb694a9e52de8dab508d5c9596fbf0a3aff Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 08:41:00 -0600 Subject: [PATCH 08/31] feat: Enhance security for DELETE biosample endpoint - Updated endpoint in to require parameter. - Updated to verify that the biosample belongs to the provided before deletion. - Prevents ID collision and ensures ownership verification for BGS-initiated deletions. --- app/controllers/ExternalBiosampleController.scala | 10 ++++++---- app/services/ExternalBiosampleService.scala | 11 ++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/app/controllers/ExternalBiosampleController.scala b/app/controllers/ExternalBiosampleController.scala index ec591a4..93b495e 100644 --- a/app/controllers/ExternalBiosampleController.scala +++ b/app/controllers/ExternalBiosampleController.scala @@ -95,17 +95,19 @@ class ExternalBiosampleController @Inject()( * Handles an HTTP request to delete an external biosample by its accession. * * This method processes a request to delete a biosample identified by its unique accession. + * The request must include the `citizenDid` to verify ownership and prevent collisions. * Upon successful deletion, it returns a `204 No Content` HTTP response. - * If the biosample is not found, it returns a `404 Not Found` response. + * If the biosample is not found or the DID does not match, it returns a `404 Not Found` response. * * @param accession The unique accession of the biosample to be deleted. + * @param citizenDid The DID of the citizen who owns the biosample. * @return An asynchronous `Action` that responds with `204 No Content`, `404 Not Found`, * or `500 Internal Server Error` in case of an unexpected error. */ - def delete(accession: String): Action[AnyContent] = secureApi.async { - externalBiosampleService.deleteBiosample(accession).map { + def delete(accession: String, citizenDid: String): Action[AnyContent] = secureApi.async { + externalBiosampleService.deleteBiosample(accession, citizenDid).map { case true => NoContent - case false => NotFound(Json.obj("error" -> "Biosample not found", "message" -> s"Biosample with accession '$accession' not found.")) + case false => NotFound(Json.obj("error" -> "Biosample not found", "message" -> s"Biosample with accession '$accession' and DID '$citizenDid' not found or mismatch.")) }.recover { case e: Exception => InternalServerError(Json.obj( diff --git a/app/services/ExternalBiosampleService.scala b/app/services/ExternalBiosampleService.scala index 85502e8..e2ab34f 100644 --- a/app/services/ExternalBiosampleService.scala +++ b/app/services/ExternalBiosampleService.scala @@ -162,16 +162,17 @@ class ExternalBiosampleService @Inject()( } /** - * Deletes a biosample and all its associated data by its sample accession. + * Deletes a biosample and all its associated data by its sample accession and owner DID. * * @param accession The sample accession of the biosample to delete. - * @return A `Future` containing `true` if the biosample was found and deleted, `false` otherwise. + * @param citizenDid The DID of the citizen who owns the biosample. + * @return A `Future` containing `true` if the biosample was found, owned by the DID, and deleted; `false` otherwise. */ - def deleteBiosample(accession: String): Future[Boolean] = { + def deleteBiosample(accession: String, citizenDid: String): Future[Boolean] = { biosampleRepository.findByAccession(accession).flatMap { - case Some((biosample, _)) => + case Some((biosample, Some(donor))) if donor.citizenBiosampleDid.contains(citizenDid) => biosampleDataService.fullyDeleteBiosampleAndDependencies(biosample.id.get, biosample.sampleGuid).map(_ => true) - case None => + case _ => Future.successful(false) } } From 02e95847a5364187d9c828209b6fa41f8391e33d Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 08:47:48 -0600 Subject: [PATCH 09/31] fix: Compilation errors in BGS integration code - Fixed syntax errors in (mismatched braces/parentheses). - Added missing import in . - Verified successful compilation with [info] welcome to sbt 1.10.11 (Red Hat, Inc. Java 21.0.9) [info] loading global plugins from /home/jkane/.sbt/1.0/plugins [info] loading settings for project decodingus-build from plugins.sbt... [info] loading project definition from /home/jkane/devel/scala3/decodingus/project [info] loading settings for project root from build.sbt... [info] __ __ [info] \ \ ____ / /____ _ __ __ [info] \ \ / __ \ / // __ `// / / / [info] / / / /_/ // // /_/ // /_/ / [info] /_/ / .___//_/ \__,_/ \__, / [info] /_/ /____/ [info] [info] Version 3.0.9 running Java 21.0.9 [info] [info] Play is run entirely by the community. Please consider contributing and/or donating: [info] https://www.playframework.com/sponsors [info] [info] Executing in batch mode. For better performance use sbt's shell [success] Total time: 0 s, completed Dec 5, 2025, 8:47:48 AM. --- app/controllers/ExternalBiosampleController.scala | 2 +- app/services/BiosampleDataService.scala | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/app/controllers/ExternalBiosampleController.scala b/app/controllers/ExternalBiosampleController.scala index 93b495e..7d733d4 100644 --- a/app/controllers/ExternalBiosampleController.scala +++ b/app/controllers/ExternalBiosampleController.scala @@ -4,7 +4,7 @@ import actions.ApiSecurityAction import jakarta.inject.{Inject, Singleton} import models.api.ExternalBiosampleRequest import play.api.libs.json.{Json, OFormat} -import play.api.mvc.{Action, BaseController, ControllerComponents} +import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} import services.{BiosampleServiceException, DuplicateAccessionException, ExternalBiosampleService, InvalidCoordinatesException, PublicationLinkageException, SequenceDataValidationException} import scala.concurrent.ExecutionContext diff --git a/app/services/BiosampleDataService.scala b/app/services/BiosampleDataService.scala index 3b88bc5..f1268d1 100644 --- a/app/services/BiosampleDataService.scala +++ b/app/services/BiosampleDataService.scala @@ -140,10 +140,11 @@ class BiosampleDataService @Inject()( originalYHaplogroup = haplogroupInfo.yHaplogroup, originalMtHaplogroup = haplogroupInfo.mtHaplogroup, notes = haplogroupInfo.notes - }) - .getOrElse(Future.successful(())) - } yield () - } + )) + }.getOrElse(Future.successful(())) + } yield () + } + /** * Fully deletes a biosample and all its associated data (publication links, From 9442e0d5cb6a04a06357816960e8da88ea5000e6 Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 10:21:00 -0600 Subject: [PATCH 10/31] Lexicon updates --- documents/Atmosphere_Lexicon.md | 346 +++++++++++++++++++++++++++++--- 1 file changed, 318 insertions(+), 28 deletions(-) diff --git a/documents/Atmosphere_Lexicon.md b/documents/Atmosphere_Lexicon.md index 1537d62..7d2b552 100644 --- a/documents/Atmosphere_Lexicon.md +++ b/documents/Atmosphere_Lexicon.md @@ -8,9 +8,61 @@ This namespace covers the genomic operational data generated by BGS nodes and ow --- -### 1. Biosample Record (`com.decodingus.atmosphere.biosample`) +### 1. Workspace Record (`com.decodingus.atmosphere.workspace`) -This record represents a single biological sample processed by a BGS node. It maps directly to the `ExternalBiosampleRequest` used in the MVP REST API. +This record serves as the root container for a Researcher's PDS, aggregating biosample records and defined research projects. + +**NSID:** `com.decodingus.atmosphere.workspace` + +```json +{ + "lexicon": 1, + "id": "com.decodingus.atmosphere.workspace", + "defs": { + "main": { + "type": "record", + "description": "The root container for a Researcher's workspace, holding a pool of biosamples and defined projects.", + "key": "tid", + "record": { + "type": "object", + "required": ["samples", "projects"], + "properties": { + "samples": { + "type": "array", + "description": "The pool of all biosamples managed in this workspace.", + "items": { + "type": "ref", + "ref": "#biosample" + } + }, + "projects": { + "type": "array", + "description": "Research projects grouping specific biosamples.", + "items": { + "type": "ref", + "ref": "#project" + } + } + } + } + }, + "biosample": { + "type": "ref", + "ref": "com.decodingus.atmosphere.biosample#main" + }, + "project": { + "type": "ref", + "ref": "com.decodingus.atmosphere.project#main" + } + } +} +``` + +--- + +### 2. Biosample Record (`com.decodingus.atmosphere.biosample`) + +This record represents a single biological sample processed by a BGS node. It maps directly to the `ExternalBiosampleRequest` used in the MVP REST API, but with enhanced detail. **NSID:** `com.decodingus.atmosphere.biosample` @@ -49,9 +101,17 @@ This record represents a single biological sample processed by a BGS node. It ma "knownValues": ["Male", "Female", "Other", "Unknown"] }, "sequenceData": { + "type": "array", + "description": "List of sequencing data entries, allowing for multiple alignments (e.g., GRCh38, chm13v2.0) or runs.", + "items": { + "type": "ref", + "ref": "#sequenceData" + } + }, + "haplogroups": { "type": "ref", - "ref": "#sequenceData", - "description": "Technical details about the sequencing run." + "ref": "#haplogroupAssignments", + "description": "Y-DNA and mtDNA haplogroup assignments derived from the sequencing data." }, "createdAt": { "type": "string", @@ -60,20 +120,83 @@ This record represents a single biological sample processed by a BGS node. It ma } } }, + "haplogroupAssignments": { + "type": "object", + "description": "Container for paternal (Y-DNA) and maternal (mtDNA) haplogroup classifications.", + "properties": { + "yDna": { + "type": "ref", + "ref": "#haplogroupResult", + "description": "The predicted Y-chromosome haplogroup (Paternal)." + }, + "mtDna": { + "type": "ref", + "ref": "#haplogroupResult", + "description": "The predicted Mitochondrial haplogroup (Maternal)." + } + } + }, + "haplogroupResult": { + "type": "object", + "description": "Detailed scoring and classification result for a haplogroup.", + "required": ["haplogroupName", "score"], + "properties": { + "haplogroupName": { + "type": "string", + "description": "The assigned haplogroup nomenclature (e.g., R-M269, H1a)." + }, + "score": { + "type": "float", + "description": "Confidence score of the assignment." + }, + "matchingSnps": { + "type": "integer", + "description": "Count of SNPs matching the defining mutations for this haplogroup." + }, + "mismatchingSnps": { + "type": "integer", + "description": "Count of SNPs that contradict the assignment." + }, + "ancestralMatches": { + "type": "integer", + "description": "Count of ancestral state matches." + }, + "treeDepth": { + "type": "integer", + "description": "The depth of the assigned node in the phylogenetic tree." + }, + "lineagePath": { + "type": "array", + "description": "The path from root to the assigned haplogroup (e.g., A -> ... -> R -> ... -> R-M269).", + "items": { + "type": "string" + } + } + } + }, "sequenceData": { "type": "object", - "description": "Technical metrics regarding the sequencing output.", + "description": "Raw sequencing run details and associated alignments.", "required": ["platformName", "testType", "files"], "properties": { "platformName": { "type": "string", "description": "Sequencing platform (e.g., ILLUMINA, PACBIO)." }, + "instrumentModel": { + "type": "string", + "description": "Specific instrument model (e.g., NovaSeq 6000)." + }, "testType": { "type": "string", "description": "Type of test (e.g., WGS, EXOME)." }, - "reads": { + "libraryLayout": { + "type": "string", + "description": "Paired-end or Single-end.", + "knownValues": ["PAIRED", "SINGLE"] + }, + "totalReads": { "type": "integer", "description": "Total number of reads." }, @@ -81,16 +204,133 @@ This record represents a single biological sample processed by a BGS node. It ma "type": "integer", "description": "Average read length." }, - "coverage": { + "meanInsertSize": { "type": "float", - "description": "Estimated sequencing coverage (e.g., 30.5)." + "description": "Mean insert size of the library." + }, + "files": { + "type": "array", + "description": "Raw data files (e.g., FASTQs).", + "items": { + "type": "ref", + "ref": "#fileInfo" + } + }, + "alignments": { + "type": "array", + "description": "List of alignments performed on this sequencing run.", + "items": { + "type": "ref", + "ref": "#alignmentData" + } + } + } + }, + "alignmentData": { + "type": "object", + "description": "Details of a specific alignment (e.g., to GRCh38).", + "required": ["referenceBuild", "aligner", "metrics"], + "properties": { + "referenceBuild": { + "type": "string", + "description": "Reference genome build (e.g., hg38, GRCh38)." + }, + "aligner": { + "type": "string", + "description": "Tool used for alignment (e.g., BWA-MEM)." }, "files": { "type": "array", + "description": "Aligned data files (e.g., BAM, CRAM, VCF).", "items": { "type": "ref", "ref": "#fileInfo" } + }, + "metrics": { + "type": "ref", + "ref": "#alignmentMetrics" + } + } + }, + "alignmentMetrics": { + "type": "object", + "description": "Quality control metrics for the alignment.", + "properties": { + "genomeTerritory": { + "type": "integer", + "description": "The total number of bases in the reference genome territory." + }, + "meanCoverage": { + "type": "float", + "description": "The mean coverage across the genome territory." + }, + "medianCoverage": { + "type": "float" + }, + "sdCoverage": { + "type": "float", + "description": "Standard deviation of coverage." + }, + "pctExcDupe": { + "type": "float", + "description": "Percentage of reads excluded due to duplication." + }, + "pctExcMapq": { + "type": "float", + "description": "Percentage of reads excluded due to low mapping quality." + }, + "pct10x": { + "type": "float", + "description": "Percentage of genome with at least 10x coverage." + }, + "pct20x": { + "type": "float", + "description": "Percentage of genome with at least 20x coverage." + }, + "pct30x": { + "type": "float", + "description": "Percentage of genome with at least 30x coverage." + }, + "hetSnpSensitivity": { + "type": "float", + "description": "Sensitivity for detecting heterozygous SNPs." + }, + "contigs": { + "type": "array", + "description": "Per-contig coverage statistics.", + "items": { + "type": "ref", + "ref": "#contigMetrics" + } + } + } + }, + "contigMetrics": { + "type": "object", + "description": "Coverage analysis for a specific contig (chromosome).", + "required": ["contigName", "callableBases"], + "properties": { + "contigName": { + "type": "string", + "description": "Name of the contig (e.g., chr1, 1)." + }, + "callableBases": { + "type": "integer", + "description": "Number of bases deemed callable." + }, + "meanCoverage": { + "type": "float" + }, + "poorMappingQuality": { + "type": "integer", + "description": "Number of bases with poor mapping quality." + }, + "lowCoverage": { + "type": "integer" + }, + "noCoverage": { + "type": "integer" } } }, @@ -124,45 +364,95 @@ This record represents a single biological sample processed by a BGS node. It ma } ``` +--- + +### 3. Project Record (`com.decodingus.atmosphere.project`) + +This record defines a research project that aggregates multiple biosamples within a Researcher's PDS. + +**NSID:** `com.decodingus.atmosphere.project` + +```json +{ + "lexicon": 1, + "id": "com.decodingus.atmosphere.project", + "defs": { + "main": { + "type": "record", + "description": "A genealogy or research project that aggregates multiple biosamples.", + "key": "tid", + "record": { + "type": "object", + "required": ["projectName", "administrator", "members"], + "properties": { + "projectName": { + "type": "string", + "description": "Name of the project (e.g., 'Smith Surname Project')." + }, + "description": { + "type": "string", + "description": "Goals and scope of the research." + }, + "administrator": { + "type": "string", + "description": "The DID or identifier of the researcher managing this project." + }, + "members": { + "type": "array", + "description": "List of biosamples associated with this project. References are relative RKeys within the same PDS.", + "items": { + "type": "string", + "description": "RKey of a biosample record in this PDS." + } + } + } + } + } + } +} +``` + ## Integration Strategy -In the "Atmosphere" model, this Lexicon bridges the gap between the MVP REST API and the decentralized future: +In the "Atmosphere" model, this Lexicon defines the data structures for decentralized, user-owned genomic records: -1. **MVP (Current):** The BGS Node (Rust) constructs a JSON payload matching the `ExternalBiosampleRequest`. It pushes this to `decodingus` via REST. -2. **Phase 2 (Hybrid):** The BGS Node uses this Lexicon structure to construct the payload. `decodingus` accepts the payload and internally treats it as a valid ATP Record, potentially signing it on behalf of the user or the lab. -3. **Phase 3 (Full Atmosphere):** - * The User grants the BGS Node "Write Access" to their PDS (repo). - * The BGS Node writes a `com.decodingus.atmosphere.biosample` record directly to the User's PDS. - * `decodingus` (acting as an AppView) ingests this record from the ATP Firehose, indexing it for the tree visualization. +1. **MVP (Current):** The BGS Node (Rust) constructs a JSON payload matching the `ExternalBiosampleRequest` (a simplified subset of this Lexicon) and pushes it to `decodingus` via REST. +2. **Phase 2 (Hybrid - Kafka):** The BGS Node uses this Lexicon structure (or a derived internal representation) to send messages to Kafka. `decodingus` consumes from Kafka and processes a compatible subset. +3. **Phase 3 (Full Atmosphere - AppView):** + * The Researcher's Edge App (Java) or the BGS Node (authorized by the user) constructs records fully compliant with this Lexicon. + * These records are written directly to the User's PDS. + * `decodingus` (acting as an AppView) subscribes to the ATP Firehose, ingesting these records and indexing them. -## Mapping to Scala Models +## Mapping to `decodingus` Backend (Phase 3 Considerations) -| Lexicon Field | Scala Field (`ExternalBiosampleRequest`) | -| :--- | :--- | -| `sampleAccession` | `sampleAccession` | -| `donorIdentifier` | `donorIdentifier` | -| `centerName` | `centerName` | -| `sequenceData` | `sequenceData` (Mapped Object) | -| `files` | `files` (Mapped Object) | +To fully leverage this Lexicon, `decodingus` will need to evolve its internal data model and services: + +* **`Biosample`:** Fields like `description`, `centerName`, `sex`, `sampleAccession`, `donorIdentifier` map directly. `createdAt` will come from the record metadata. +* **`SequenceLibrary`:** `platformName`, `instrumentModel` (new), `testType`, `libraryLayout` (new), `totalReads` (new - `reads`), `readLength`, `meanInsertSize` (new - `insertSize`). +* **`SequenceFile`:** `fileInfo` maps directly (`fileName`, `fileSizeBytes`, `fileFormat`, `checksum`, `location`). +* **`Alignment` (New Entity):** `alignmentData` will likely require new tables/models to store `referenceBuild`, `aligner`, and the associated `files`. +* **`AlignmentMetrics` (New Entity):** `alignmentMetrics` will be a significant addition, requiring new tables and potentially a dedicated service to store and query these detailed QC statistics. +* **`Haplogroups` (Enhanced):** The detailed `haplogroupResult` (score, SNPs, lineage path) can replace or enrich our existing `BiosampleOriginalHaplogroup` model, allowing us to store the *evidence* for haplogroup calls. +* **`Project` (New Entity):** Will require new tables (`projects`, `project_members`) and services. +* **`Workspace`:** This `record` might not have a direct mapping in `decodingus` as it's a PDS-level container, but its `samples` and `projects` references will drive our indexing. ## Lifecycle Management (AppView Logic) As an AppView, `decodingus` subscribes to the AT Protocol Firehose to maintain a synchronized state of the genomic network. ### 1. The Firehose Event Stream -We listen for `com.atproto.sync.subscribeRepos` events containing operations for the collection `com.decodingus.atmosphere.biosample`. +We listen for `com.atproto.sync.subscribeRepos` events containing operations for the collection `com.decodingus.atmosphere.biosample` (and potentially `com.decodingus.atmosphere.project` and `com.decodingus.atmosphere.workspace` in the future). ### 2. Event Handling Strategy | Event Action | Description | DecodingUs Logic | | :--- | :--- | :--- | -| **Create** | User creates a new record. | 1. Extract `citizenDid` (Repo DID) and record body.
2. Invoke `ExternalBiosampleService.create`.
3. Store the `at_uri` (e.g., `at://did.../collection/rkey`) and `at_cid` in the `biosamples` table. | -| **Update** | User modifies an existing record. | 1. Lookup `Biosample` by `at_uri`.
2. Compare `at_cid` to ensure strictly newer version.
3. Update mutable fields (description, metrics, file URLs).
4. Update `at_cid`. | +| **Create** | User creates a new record. | 1. Extract `citizenDid` (Repo DID) and record body.
2. Map Lexicon record to `ExternalBiosampleRequest` (or directly to internal models).
3. Invoke `ExternalBiosampleService.create` (or an equivalent AppView ingestion service).
4. Store the `at_uri` (e.g., `at://did.../collection/rkey`) and `at_cid` in the `biosamples` table. | +| **Update** | User modifies an existing record. | 1. Lookup `Biosample` by `at_uri`.
2. Compare `at_cid` to ensure strictly newer version.
3. Map Lexicon record to internal models.
4. Update mutable fields (description, metrics, file URLs, haplogroups).
5. Update `at_cid`. | | **Delete** | User removes a record. | 1. Lookup `Biosample` by `at_uri`.
2. Perform **Soft Delete** (mark as archived/hidden).
3. Remove from active visualization trees.
*Hard deletes are avoided to preserve scientific lineage unless legally required (GDPR).* | ### 3. Schema Requirements To support robust syncing, the internal `biosamples` table requires tracking fields: * **`at_uri` (String, Unique):** The canonical decentralized address of the record. Used for lookups during Update/Delete. -* **`at_cid` (String):** The content identifier (hash) of the current version. Used for optimistic locking and preventing replay attacks/race conditions. - +* **`at_cid` (String):** The content identifier (hash) of the current version. Used for optimistic locking and preventing replay attacks/race conditions. \ No newline at end of file From 114e2777f1e04f0b34cd23f040917b1dfc030d1e Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 5 Dec 2025 10:47:40 -0600 Subject: [PATCH 11/31] Lexicon updates --- documents/Atmosphere_Lexicon.md | 128 +++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/documents/Atmosphere_Lexicon.md b/documents/Atmosphere_Lexicon.md index 7d2b552..51bfc89 100644 --- a/documents/Atmosphere_Lexicon.md +++ b/documents/Atmosphere_Lexicon.md @@ -455,4 +455,130 @@ We listen for `com.atproto.sync.subscribeRepos` events containing operations for To support robust syncing, the internal `biosamples` table requires tracking fields: * **`at_uri` (String, Unique):** The canonical decentralized address of the record. Used for lookups during Update/Delete. -* **`at_cid` (String):** The content identifier (hash) of the current version. Used for optimistic locking and preventing replay attacks/race conditions. \ No newline at end of file +* **`at_cid` (String):** The content identifier (hash) of the current version. Used for optimistic locking and preventing replay attacks/race conditions. + +### 4. Example Mock Data + +Below are JSON examples of how valid records would appear in the `com.decodingus.atmosphere` namespace. + +#### Biosample Record (`com.decodingus.atmosphere.biosample`) + +```json +{ + "$type": "com.decodingus.atmosphere.biosample", + "sampleAccession": "BGS-UUID-98765-XYZ", + "donorIdentifier": "Subject-001", + "description": "Deep WGS of Proband from Smith Family Trio", + "centerName": "DecodingUs Reference Lab", + "sex": "Male", + "createdAt": "2025-12-05T14:30:00Z", + "haplogroups": { + "yDna": { + "haplogroupName": "R-M269", + "score": 0.998, + "matchingSnps": 145, + "mismatchingSnps": 2, + "ancestralMatches": 3000, + "treeDepth": 25, + "lineagePath": ["R", "R1", "R1b", "R-M269"] + }, + "mtDna": { + "haplogroupName": "H1a", + "score": 0.995, + "matchingSnps": 42, + "mismatchingSnps": 0, + "ancestralMatches": 800, + "treeDepth": 18, + "lineagePath": ["L3", "N", "R", "HV", "H", "H1", "H1a"] + } + }, + "sequenceData": [ + { + "platformName": "ILLUMINA", + "instrumentModel": "NovaSeq 6000", + "testType": "WGS", + "libraryLayout": "PAIRED", + "totalReads": 850000000, + "readLength": 150, + "meanInsertSize": 450.0, + "files": [ + { + "fileName": "Sample001_R1.fastq.gz", + "fileSizeBytes": 15000000000, + "fileFormat": "FASTQ", + "checksum": "sha256-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "location": "s3://lab-data-bucket/raw/Sample001_R1.fastq.gz" + }, + { + "fileName": "Sample001_R2.fastq.gz", + "fileSizeBytes": 16000000000, + "fileFormat": "FASTQ", + "checksum": "sha256-d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e553", + "location": "s3://lab-data-bucket/raw/Sample001_R2.fastq.gz" + } + ], + "alignments": [ + { + "referenceBuild": "GRCh38", + "aligner": "BWA-MEM 0.7.17", + "files": [ + { + "fileName": "Sample001.hg38.cram", + "fileSizeBytes": 22000000000, + "fileFormat": "CRAM", + "checksum": "sha256-0b7c68d2266643392788995209377460244359634270247b3618245356363834", + "location": "s3://lab-data-bucket/aligned/Sample001.hg38.cram" + } + ], + "metrics": { + "genomeTerritory": 3100000000, + "meanCoverage": 32.5, + "medianCoverage": 31.0, + "sdCoverage": 8.5, + "pctExcDupe": 0.12, + "pctExcMapq": 0.02, + "pct10x": 0.98, + "pct20x": 0.95, + "pct30x": 0.85, + "hetSnpSensitivity": 0.992, + "contigs": [ + { + "contigName": "chr1", + "callableBases": 240000000, + "meanCoverage": 33.1, + "poorMappingQuality": 15000, + "lowCoverage": 5000, + "noCoverage": 100 + }, + { + "contigName": "chrY", + "callableBases": 25000000, + "meanCoverage": 16.5, + "poorMappingQuality": 50000, + "lowCoverage": 2000, + "noCoverage": 500 + } + ] + } + } + ] + } + ] +} +``` + +#### Project Record (`com.decodingus.atmosphere.project`) + +```json +{ + "$type": "com.decodingus.atmosphere.project", + "projectName": "Smith Surname Project", + "description": "A collaborative effort to trace the paternal lineage of the Smith family originating from Yorkshire.", + "administrator": "did:plc:alice123456", + "members": [ + "at://did:plc:alice123456/com.decodingus.atmosphere.biosample/rkey-sample-001", + "at://did:plc:bob987654/com.decodingus.atmosphere.biosample/rkey-sample-002", + "at://did:plc:charlie111/com.decodingus.atmosphere.biosample/rkey-sample-003" + ] +} +``` \ No newline at end of file From 36aae7168a25b66b402c381d6a19826d55397a02 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 06:09:22 -0600 Subject: [PATCH 12/31] Update API Extension Design to reflect CitizenBiosample segregation strategy --- documents/API_Extension_Design.md | 541 ++++++++++++++++++++++++++++++ 1 file changed, 541 insertions(+) create mode 100644 documents/API_Extension_Design.md diff --git a/documents/API_Extension_Design.md b/documents/API_Extension_Design.md new file mode 100644 index 0000000..2270f98 --- /dev/null +++ b/documents/API_Extension_Design.md @@ -0,0 +1,541 @@ +# API Extension Design Document: ExternalBiosample and Project Entities + +## 1. Introduction + +This document outlines the design for implementing new API endpoints for `ExternalBiosample` and a new `Project` entity within the Decoding Us application. These new endpoints are required to facilitate direct API integration with a "Firehose" team during an MVP phase, preceding a Kafka-based solution. The design will leverage existing API security mechanisms and incorporate soft delete and optimistic locking (`at_cid`) functionalities. + +## 2. Current State Analysis + +### 2.1 Existing Architecture + +The Decoding Us application is built with Scala 3 and the Play Framework, utilizing Slick for database interactions and Tapir for API definition. API security is handled via a token mechanism (e.g., `secureApi` actions). + +### 2.2 ExternalBiosample Entity + +* **Model:** The `Biosample` case class (`app/models/domain/genomics/Biosample.scala`) defines the core data structure. However, the "External" biosamples are fundamentally "Citizen" biosamples (`BiosampleType.Citizen`). +* **Database Schema:** A `citizen_biosample` table exists in the database schema (evolutions) but is currently not represented by a Scala case class or Slick table definition in the codebase. +* **Request DTO:** `ExternalBiosampleRequest` (`app/models/api/ExternalBiosampleRequest.scala`) serves as the payload. +* **Service:** `ExternalBiosampleService` (`app/services/ExternalBiosampleService.scala`) currently operates on the generic `Biosample` entity. +* **Controller:** `ExternalBiosampleController` (`app/controllers/ExternalBiosampleController.scala`) exposes a `create` endpoint using `secureApi.jsonAction`. +* **Current Delete Behavior:** The existing `deleteBiosample` performs a hard delete using `biosampleDataService.fullyDeleteBiosampleAndDependencies`. + +### 2.3 Project Entity + +* **Conceptual:** Currently, there is no direct, standalone `Project` entity with associated CRUD APIs. The term "Project" appears in the context of `GenomicStudy` (e.g., NCBI BioProject) and within UI/documentation elements. +* **Requirement:** The request implies a new, distinct `Project` entity based on `com.decodingus.atmosphere.project` lexicon definition. + +### 2.4 Data Modeling Consideration: Citizen Biosamples + +The "External Biosamples" ingested via the Firehose are identified as `BiosampleType.Citizen`. There is an existing, unused `citizen_biosample` table in the database schema intended for this purpose. We must decide whether to utilize this separate table or integrate these records into the main `biosample` table. + +**Option A: Separate Table (`citizen_biosample`)** +* **Pros:** + * **Segregation:** Keeps "Citizen" data distinct from other biosample types (Standard, PGP, etc.), which may have different privacy or data retention requirements. + * **Schema Specificity:** Allows for columns specific to Citizen biosamples (e.g., `citizen_biosample_did`) without cluttering the main table. + * **Performance:** Potentially better performance for type-specific queries if volume is high. +* **Cons:** + * **Complexity:** Requires joining with `biosample` (if shared fields exist there) or duplicating shared columns (description, sex, etc.). + * **Maintenance:** Requires creating and maintaining new Scala models (`CitizenBiosample`), tables, and repositories. + * **Fragmentation:** Logic acting on "all biosamples" becomes more complex. + +**Option B: Unified Table (`biosample` with Type)** +* **Pros:** + * **Simplicity:** Single table and model for all biosamples. + * **Unified Querying:** Easier to query "all biosamples" regardless of type. + * **Existing Tooling:** Leverages existing `BiosampleRepository` and services. +* **Cons:** + * **Sparse Columns:** Columns specific to Citizen biosamples (like `citizen_biosample_did`) will be null for other types. + * **Table Bloat:** Table grows with all types combined. + +**Decision:** The original design intent was segregation. Given the specific requirements for Citizen biosamples (DIDs, potential different lifecycle), **we will proceed with Option A (Separate Table)** to align with the original schema design. This requires plumbing the `citizen_biosample` table into the application layer. + +## 3. Proposed API Endpoints + +The following endpoints are to be implemented, using the existing API security layer: + +### 3.1 ExternalBiosample API Endpoints + +* **Create ExternalBiosample:** + * **Method:** `POST` + * **Path:** `/api/external-biosamples` + * **Request Body:** JSON payload conforming to `ExternalBiosampleRequest`. + * **Response:** `201 Created` with the created resource's ID (e.g., `sampleGuid`). +* **Update ExternalBiosample:** + * **Method:** `PUT` + * **Path:** `/api/external-biosamples/{sampleGuid}` (using `sampleGuid` as the unique identifier for updates) + * **Request Body:** JSON payload conforming to `ExternalBiosampleRequest`, including the `atCid` for optimistic locking. + * **Response:** `200 OK` or `204 No Content`. +* **Delete ExternalBiosample (Soft Delete):** + * **Method:** `DELETE` + * **Path:** `/api/external-biosamples/{sampleGuid}` + * **Request Body:** (Optional) Minimal JSON body for confirmation or reason. + * **Response:** `204 No Content`. + +### 3.2 Project API Endpoints + +* **Create Project:** + * **Method:** `POST` + * **Path:** `/api/projects` + * **Request Body:** JSON payload conforming to the new `ProjectRequest` DTO. + * **Response:** `201 Created` with the created resource's ID (e.g., `projectGuid`). +* **Update Project:** + * **Method:** `PUT` + * **Path:** `/api/projects/{projectGuid}` + * **Request Body:** JSON payload conforming to `ProjectRequest`, including the `atCid` for optimistic locking. + * **Response:** `200 OK` or `204 No Content`. +* **Delete Project (Soft Delete):** + * **Method:** `DELETE` + * **Path:** `/api/projects/{projectGuid}` + * **Request Body:** (Optional) Minimal JSON body for confirmation or reason. + * **Response:** `204 No Content`. + +## 4. ExternalBiosample Design + +### 4.0 ExternalBiosample DTO Definitions + +To provide the Firehose team with a clear understanding of the data structures, here are the full definitions for the `ExternalBiosampleRequest` (with `atCid` for updates) and a proposed `ExternalBiosampleResponse`. + +```scala +// app/models/api/ExternalBiosampleRequest.scala - Full Definition + +package models.api + +import models.domain.genomics.{BiologicalSex, BiosampleType} +import play.api.libs.json.{Json, OFormat} +import java.time.LocalDateTime +import java.util.UUID + +/** + * Represents a request for an external biosample, containing the metadata and associated information + * related to the sample and its sequencing data. + * + * @param sampleAccession Native identifier provided by the client for the biosample. + * @param sourceSystem Origin system or data source associated with the biosample (e.g., "evolbio", "pgp"). + * @param description A textual description of the biosample. + * @param alias Optional alias for the biosample, provided by the client. + * @param centerName Name of the institution or center handling the biosample. + * @param sex Optional biological sex information for the biosample. + * @param latitude Optional geographical latitude information related to the biosample. + * @param longitude Optional geographical longitude information related to the biosample. + * @param citizenDid Optional decentralized identifier (DID) for linking to a citizen/PDS user. + * @param donorIdentifier Optional identifier for the donor. + * @param donorType Optional type of the donor (e.g., Citizen, PGP, Standard). + * @param publication Optional publication information related to the biosample, represented by the `PublicationInfo` structure. + * @param sequenceData Information regarding the sequencing data associated with the biosample, represented by the `SequenceDataInfo` structure. + * @param atCid Optional: For optimistic locking during updates. + */ +case class ExternalBiosampleRequest( + sampleAccession: String, // Client provides their native identifier + sourceSystem: String, // e.g., "evolbio", "pgp", etc. + description: String, + alias: Option[String], + centerName: String, + sex: Option[BiologicalSex], + latitude: Option[Double], + longitude: Option[Double], + citizenDid: Option[String], + donorIdentifier: Option[String], + donorType: Option[BiosampleType], + publication: Option[PublicationInfo], + sequenceData: SequenceDataInfo, + atCid: Option[String] = None // For optimistic locking during updates + ) + +object ExternalBiosampleRequest { + implicit val externalBiosampleRequest: OFormat[ExternalBiosampleRequest] = Json.format +} + +/** + * Represents publication-related information, including details such as DOI, PubMed ID, + * and original haplogroup data. + * + * @constructor Creates an instance of `PublicationInfo` to encapsulate key publication + * identifiers and data related to haplogroups. + * @param doi An optional DOI (Digital Object Identifier) for the publication. + * @param pubmedId An optional PubMed ID associated with the publication. + * @param originalHaplogroups Optionally represents original haplogroup information, + * encapsulated in a `HaplogroupInfo` instance. + */ +case class PublicationInfo( + doi: Option[String], + pubmedId: Option[String], + originalHaplogroups: Option[HaplogroupInfo] + ) + +object PublicationInfo { + implicit val publicationInfo: OFormat[PublicationInfo] = Json.format +} + +/** + * Represents information about Y-DNA and mitochondrial DNA (mtDNA) haplogroups, + * along with optional notes for additional context. + * + * @param yHaplogroup An optional string representing the Y-DNA haplogroup. + * This is typically associated with paternal lineage. + * @param mtHaplogroup An optional string representing the mitochondrial DNA (mtDNA) haplogroup. + * This is typically associated with maternal lineage. + * @param notes An optional string for any additional notes or descriptive information + * about the haplogroup or its context. + */ +case class HaplogroupInfo( + yHaplogroup: Option[String], + mtHaplogroup: Option[String], + notes: Option[String] + ) + +object HaplogroupInfo { + implicit val haplogroupInfo: OFormat[HaplogroupInfo] = Json.format +} + +/** + * Represents metadata and related information about a sequence dataset. + * + * This case class encapsulates information about sequencing data, including + * details such as the number of reads, read length, coverage, sequencing platform, + * test type, and associated files. + * + * @param reads An optional number of reads in the sequencing data. + * @param readLength An optional read length, indicating the length of individual reads. + * @param coverage An optional coverage value representing the depth of sequencing. + * @param platformName The name of the sequencing platform used to generate the data. + * @param testType The type of sequencing test performed. + * @param files A sequence of file metadata, represented by `FileInfo`, containing information + * about the files associated with the sequencing data. + */ +case class SequenceDataInfo( + reads: Option[Int], + readLength: Option[Int], + coverage: Option[Double], + platformName: String, + testType: String, + files: Seq[FileInfo] + ) + +object SequenceDataInfo { + implicit val sequenceDataInfo: OFormat[SequenceDataInfo] = Json.format +} + +/** + * Represents the information of a library, typically used in a laboratory or sequencing context. + * + * @param lab The name of the laboratory or site where sequencing or processing occurred. + * @param testType The type of test or sequencing performed. + * @param runDate The timestamp for when the sequencing or test run took place. + * @param instrument The identifier or name of the instrument used in the sequencing process. + * @param reads The total number of reads generated during the sequencing. + * @param readLength The length of each read in base pairs. + * @param pairedEnd Indicates whether the sequencing was performed using paired-end reads. + * @param insertSize Optional parameter specifying the insert size for paired-end reads, if applicable. + */ +case class LibraryInfo( + lab: String, + testType: String, + runDate: LocalDateTime, + instrument: String, + reads: Long, + readLength: Int, + pairedEnd: Boolean, + insertSize: Option[Int] + ) + +object LibraryInfo { + implicit val libraryInfo: OFormat[LibraryInfo] = Json.format +} + +/** + * Represents location information including a file URL and an optional file index URL. + * + * This class is useful for storing and managing metadata related to file locations, + * such as a primary file's URL and its associated index file's URL, if available. The + * `fileIndexUrl` is optional to accommodate cases where an index file is not provided. + * + * @param fileUrl The URL pointing to the primary file location. + * @param fileIndexUrl An optional URL pointing to the index file associated with the primary file. + */ +case class LocationInfo( + fileUrl: String, + fileIndexUrl: Option[String] + ) + +object LocationInfo { + implicit val locationInfo: OFormat[LocationInfo] = Json.format +} + +/** + * Represents checksum information including the checksum value and the algorithm used. + * + * @param checksum The checksum value as a string. + * @param algorithm The algorithm used to generate the checksum. + */ +case class ChecksumInfo( + checksum: String, + algorithm: String + ) + +object ChecksumInfo { + implicit val checksumInfo: OFormat[ChecksumInfo] = Json.format +} + +/** + * Represents metadata for a file, including its name, size, format, aligner used, target reference, + * associated checksums, and its location. + * + * @param fileName The name of the file. + * @param fileSizeBytes The size of the file in bytes. + * @param fileFormat The format of the file, indicating the file type or extension. + * @param aligner The aligner used for processing or generating the file. + * @param targetReference The reference target associated with the file. + * @param checksums A sequence of checksum information objects associated with the file. + * @param location Information about the file's location, including its URL and optional index URL. + */ +case class FileInfo( + fileName: String, + fileSizeBytes: Long, + fileFormat: String, + aligner: String, + targetReference: String, + checksums: Seq[ChecksumInfo], + location: LocationInfo + ) + +object FileInfo { + implicit val fileInfo: OFormat[FileInfo] = Json.format +} + +``` + +```scala +// Proposed ExternalBiosampleResponse.scala +// This DTO would be returned on successful creation or update of an ExternalBiosample. + +package models.api + +import play.api.libs.json.{Json, OFormat} +import java.time.LocalDateTime +import java.util.UUID +import models.domain.genomics.{BiologicalSex, BiosampleType} // Assuming BiosampleType and BiologicalSex are needed in response + +case class ExternalBiosampleResponse( + sampleGuid: UUID, + sampleAccession: String, + sourceSystem: String, + description: String, + alias: Option[String], + centerName: String, + sex: Option[BiologicalSex], + latitude: Option[Double], + longitude: Option[Double], + citizenDid: Option[String], + donorIdentifier: Option[String], + donorType: Option[BiosampleType], + publication: Option[PublicationInfo], // Re-use PublicationInfo from request + sequenceData: SequenceDataInfo, // Re-use SequenceDataInfo from request + atCid: Option[String], // Current CID for optimistic locking + createdAt: LocalDateTime, + updatedAt: LocalDateTime, + deleted: Boolean + ) + +object ExternalBiosampleResponse { + // It's crucial that the JSON formatters for nested types (PublicationInfo, SequenceDataInfo, etc.) + // are in scope when defining this formatter. They are typically defined in their own companion objects. + implicit val externalBiosampleResponse: OFormat[ExternalBiosampleResponse] = Json.format +} +``` + +### 4.1 Model and Database Schema Changes + +To support the "Citizen Biosample" segregation strategy: + +* **New Model (`app/models/domain/genomics/CitizenBiosample.scala`):** + * Create a case class `CitizenBiosample` mapping to the `citizen_biosample` table. + * Fields: `id`, `citizenBiosampleDid`, `sourcePlatform`, `collectionDate`, `sex`, `geocoord`, `description`, `sampleGuid`, `deleted`, `atCid`, `createdAt`, `updatedAt`. +* **New Table (`app/models/dal/domain/genomics/CitizenBiosamplesTable.scala`):** + * Define the Slick table mapping for `citizen_biosample`. +* **Database Migration:** + * A Slick evolution script is required to add `deleted`, `atCid`, `createdAt`, and `updatedAt` columns to the existing `citizen_biosample` table. +* **`app/models/domain/genomics/Biosample.scala` (Optional):** + * Adding `deleted`, `atCid`, etc., to the main `Biosample` table is valid for general enhancements but the primary focus here is the `citizen_biosample` implementation. + +### 4.2 DTO Changes + +* **`app/models/api/ExternalBiosampleRequest.scala`:** + * As shown in the full definition above, `atCid: Option[String] = None` has been added. + +### 4.3 Service Layer Changes (`app/services/ExternalBiosampleService.scala`) + +* **Repository Integration:** + * Inject the new `CitizenBiosampleRepository` (and `CitizenBiosampleTable` access). + * Update logic to write to `citizen_biosample` table for these requests. +* **Soft Delete Implementation:** + * Modify `deleteBiosample(sampleGuid: UUID): Future[Boolean]`. + * Instead of the hard delete, it will query the `CitizenBiosample` by `sampleGuid`, set `deleted = true`, and update via repository. +* **Optimistic Locking Implementation:** + * Modify `createBiosampleWithData` to handle `atCid` for updates. + * Check `atCid` against `CitizenBiosample.atCid`. + * Update `CitizenBiosample` record on success. +* **Handling `sampleGuid`:** The service will need to resolve `sampleGuid` against the `citizen_biosample` table. +* **Mapping:** Convert `ExternalBiosampleRequest` fields to `CitizenBiosample` model. Note: Some fields in request (like `centerName`) might not map directly if `citizen_biosample` lacks them; strict validation or table schema updates might be needed. + +### 4.4 Controller Layer Changes (`app/controllers/ExternalBiosampleController.scala`) + +* **Route Updates in `conf/routes`:** + * `POST /api/external-biosamples` to `ExternalBiosampleController.create` + * `PUT /api/external-biosamples/:sampleGuid` to `ExternalBiosampleController.update(sampleGuid: UUID)` + * `DELETE /api/external-biosamples/:sampleGuid` to `ExternalBiosampleController.delete(sampleGuid: UUID)` +* **New `update` method:** A new `update` action will be added, taking `sampleGuid` from the path and `ExternalBiosampleRequest` from the body. It will call the appropriate service method. +* **New `delete` method:** A new `delete` action will be added, taking `sampleGuid` from the path and calling the service's soft delete method. + +### 4.5 Impact on Existing Functionalities + +* **Queries:** Since `CitizenBiosample` data resides in a separate table, existing `Biosample` queries will not be affected (they won't see these records). New queries targeting `citizen_biosample` must respect the `deleted` flag. +* **`deleteBiosample` in `ExternalBiosampleController`:** The existing `deleteBiosample` in the controller should be removed or adapted to the new soft delete logic and path. + +## 5. Project Design + +The `Project` entity will be entirely new. + +### 5.1 Model Definition (`app/models/domain/Project.scala`) + +```scala +package models.domain + +import java.time.LocalDateTime +import java.util.UUID + +case class Project( + id: Option[Int] = None, + projectGuid: UUID, + name: String, + description: Option[String] = None, + ownerDid: String, // Decentralized Identifier of the project owner + createdAt: LocalDateTime, + updatedAt: LocalDateTime, + deleted: Boolean = false, + atCid: Option[String] = None // For optimistic locking + ) +``` + +### 5.2 Database Schema (`app/models/dal/ProjectTable.scala`) + +A new Slick table definition `ProjectTable` in `app/models/dal/domain/ProjectTable.scala` will be created, mirroring the `Project` case class fields. + +* `id` (PrimaryKey, AutoInc) +* `projectGuid` (UUID, Unique) +* `name` (String) +* `description` (Option[String]) +* `ownerDid` (String) +* `createdAt` (LocalDateTime) +* `updatedAt` (LocalDateTime) +* `deleted` (Boolean, Default `false`) +* `atCid` (Option[String]) +* **Database Migration:** A new Slick evolution script will be required to create the `project` table. + +### 5.3 Repository Layer (`app/repositories/ProjectRepository.scala`) + +A new `ProjectRepository` will be created to handle database CRUD operations for the `Project` entity using Slick. + +* `create(project: Project): Future[Project]` +* `findByProjectGuid(projectGuid: UUID): Future[Option[Project]]` (will filter `deleted = false`) +* `update(project: Project): Future[Int]` (returns number of updated rows) +* `softDelete(projectGuid: UUID): Future[Int]` + +### 5.4 Service Layer (`app/services/ProjectService.scala`) + +A new `ProjectService` will be created to encapsulate the business logic for Project operations. + +* `createProject(request: ProjectRequest, ownerDid: String): Future[UUID]` + * Generates `projectGuid`, `createdAt`, `updatedAt`, initial `atCid`. + * Calls `projectRepository.create()`. +* `updateProject(projectGuid: UUID, request: ProjectRequest): Future[UUID]` + * Fetches existing `Project` by `projectGuid` (ensuring `deleted = false`). + * Performs optimistic locking check with `request.atCid`. + * Updates fields, generates new `atCid`, sets `updatedAt`. + * Calls `projectRepository.update()`. +* `softDeleteProject(projectGuid: UUID): Future[Boolean]` + * Calls `projectRepository.softDelete()`. + +### 5.5 Controller Layer (`app/controllers/ProjectController.scala`) + +A new `ProjectController` will expose the API endpoints. + +* Inject `ProjectService` and the `ApiSecurityAction`. +* Implement `create`, `update`, and `delete` actions using `secureApi.jsonAction`. + * `create` will take `ProjectRequest` and return `201 Created`. + * `update` will take `projectGuid` from path, `ProjectRequest` from body, perform optimistic locking, and return `200 OK` or `204 No Content`. + * `delete` will take `projectGuid` from path and return `204 No Content`. + +### 5.6 DTO Definitions (`app/models/api/ProjectRequest.scala`, `app/models/api/ProjectResponse.scala`) + +```scala +// app/models/api/ProjectRequest.scala +package models.api + +import play.api.libs.json.{Json, OFormat} +import java.util.UUID + +case class ProjectRequest( + name: String, + description: Option[String] = None, + atCid: Option[String] = None // For optimistic locking during updates + ) + +object ProjectRequest { + implicit val format: OFormat[ProjectRequest] = Json.format +} + +// app/models/api/ProjectResponse.scala +package models.api + +import play.api.libs.json.{Json, OFormat} +import java.time.LocalDateTime +import java.util.UUID + +case class ProjectResponse( + projectGuid: UUID, + name: String, + description: Option[String], + ownerDid: String, + createdAt: LocalDateTime, + updatedAt: LocalDateTime, + atCid: Option[String] + ) + +object ProjectResponse { + implicit val format: OFormat[ProjectResponse] = Json.format +} +``` + +### 5.7 Routing (`conf/routes`) + +* `POST /api/projects` to `ProjectController.create` +* `PUT /api/projects/:projectGuid` to `ProjectController.update(projectGuid: UUID)` +* `DELETE /api/projects/:projectGuid` to `ProjectController.delete(projectGuid: UUID)` + +## 6. API Security + +Both `ExternalBiosampleController` and `ProjectController` will utilize the existing `secureApi` action provided by the framework, ensuring that all new endpoints are protected by the token mechanism. The `ownerDid` field in `Project` (and the `citizenDid` for `ExternalBiosample` operations if applicable) will be used for authorization checks within the service layer to ensure users can only modify their own resources. + +## 7. Optimistic Locking Strategy (`at_cid`) + +* **Mechanism:** An `atCid: Option[String]` field will be added to both `CitizenBiosample` and `Project` models. This `atCid` will act as a version identifier. +* **Generation:** A new `atCid` (e.g., a UUID or a hash of the content) will be generated and stored whenever a resource is created or successfully updated. +* **Validation:** For `PUT` (update) operations, the incoming `request.atCid` must match the `atCid` currently stored in the database for that resource. If they do not match, it indicates a concurrent modification, and the update will be rejected with a `409 Conflict` status. +* **Response:** The new `atCid` will be returned as part of the `ProjectResponse` or the `ExternalBiosample` update response, allowing the client to maintain the correct version for subsequent updates. + +## 8. Soft Delete Strategy + +* **Mechanism:** A `deleted: Boolean` field (default `false`) will be added to both `CitizenBiosample` and `Project` models. +* **Deletion:** Instead of physically removing records, a "delete" operation will set the `deleted` flag to `true` and update the `updatedAt` timestamp. +* **Retrieval:** All standard read operations (e.g., `findByProjectGuid`, `findAll`) in the repositories and services must implicitly filter out records where `deleted = true`. Specific administrative endpoints could potentially retrieve deleted records if required. +* **Hard Delete:** The `biosampleDataService.fullyDeleteBiosampleAndDependencies` currently performs a hard delete on standard biosamples. This will be reserved for system cleanup or administrative purposes, distinct from the user-facing "delete" operation on `CitizenBiosample` and `Project`. + +## 9. Open Questions / Assumptions + +* **`com.decodingus.atmosphere.project` Lexicon Definition:** The specific fields and their types for the `Project` entity are assumed based on common project management attributes. Further clarification on the exact "Lexicon's main definition" would be beneficial to refine the `Project` model. +* **`at_uri` vs. `sampleGuid`/`projectGuid`:** The prompt mentions `/{at_uri}` for paths. This document assumes that `sampleGuid` (for ExternalBiosample) and `projectGuid` (for Project) will serve as the unique identifiers in the URL paths, and `at_uri` is a conceptual identifier from the Nexus service that maps to our internal GUIDs. If `at_uri` is a distinct, externally managed identifier that needs to be stored and used directly, the models and routing would need adjustment. +* **`at_cid` Generation Logic:** The exact algorithm for generating `at_cid` (e.g., simple UUID, hash of content, incrementing version number) needs to be decided. For this design, a UUID or simple version string is assumed. +* **Authorization for Project:** For the `Project` entity, the design assumes an `ownerDid` field, and authorization will ensure only the `ownerDid` can modify/delete their own projects. +* **Error Handling:** Standard Play Framework error handling will be used for `409 Conflict` (optimistic locking) and `404 Not Found`. +* **Tapir Integration:** While the endpoints are described, the explicit Tapir definitions (`app/api/`) are not detailed but will be created as part of the implementation. +* **Existing `BiosampleController`:** The `BiosampleController` will remain in place to serve existing UI interactions or other API consumers, operating on the non-`deleted` biosamples. The `ExternalBiosampleController` will handle the Firehose team's specific integration. \ No newline at end of file From 75178a8aaf58b0c6b5ec6ee486388e3fe2e2721a Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 06:11:25 -0600 Subject: [PATCH 13/31] Add Firehose API Specification section to API Extension Design --- documents/API_Extension_Design.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/documents/API_Extension_Design.md b/documents/API_Extension_Design.md index 2270f98..043ad5f 100644 --- a/documents/API_Extension_Design.md +++ b/documents/API_Extension_Design.md @@ -391,6 +391,21 @@ To support the "Citizen Biosample" segregation strategy: * **Queries:** Since `CitizenBiosample` data resides in a separate table, existing `Biosample` queries will not be affected (they won't see these records). New queries targeting `citizen_biosample` must respect the `deleted` flag. * **`deleteBiosample` in `ExternalBiosampleController`:** The existing `deleteBiosample` in the controller should be removed or adapted to the new soft delete logic and path. +### 4.6 Firehose API Specification + +For direct consumption by the Firehose team, the OpenAPI (Swagger) specification for the `ExternalBiosample` endpoints will be made available. + +* **Swagger UI Endpoint:** The full interactive API documentation will be accessible at `/api-docs/swagger-ui`. +* **Endpoints:** The relevant endpoints are: + * `POST /api/external-biosamples` (Create ExternalBiosample) + * `PUT /api/external-biosamples/{sampleGuid}` (Update ExternalBiosample) + * `DELETE /api/external-biosamples/{sampleGuid}` (Soft Delete ExternalBiosample) +* **Authentication:** All endpoints are protected by the API security layer. API key authentication will be required (details to be provided separately). +* **Data Transfer Objects (DTOs):** + * **Request:** `ExternalBiosampleRequest` (defined in `4.0 ExternalBiosample DTO Definitions`). + * **Response:** `ExternalBiosampleResponse` (defined in `4.0 ExternalBiosample DTO Definitions`). +* **Optimistic Locking:** For `PUT` operations, ensure the `atCid` from the latest `GET` or `POST` response is included in the request body to prevent concurrent modification conflicts. + ## 5. Project Design The `Project` entity will be entirely new. From 56fe22f883295f82bd90f5ab4dfe559d6d392d2b Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 06:15:10 -0600 Subject: [PATCH 14/31] Remove detailed DTO definitions from API Extension Design document --- documents/API_Extension_Design.md | 250 +----------------------------- 1 file changed, 1 insertion(+), 249 deletions(-) diff --git a/documents/API_Extension_Design.md b/documents/API_Extension_Design.md index 043ad5f..6a7e818 100644 --- a/documents/API_Extension_Design.md +++ b/documents/API_Extension_Design.md @@ -93,255 +93,7 @@ The following endpoints are to be implemented, using the existing API security l ### 4.0 ExternalBiosample DTO Definitions -To provide the Firehose team with a clear understanding of the data structures, here are the full definitions for the `ExternalBiosampleRequest` (with `atCid` for updates) and a proposed `ExternalBiosampleResponse`. - -```scala -// app/models/api/ExternalBiosampleRequest.scala - Full Definition - -package models.api - -import models.domain.genomics.{BiologicalSex, BiosampleType} -import play.api.libs.json.{Json, OFormat} -import java.time.LocalDateTime -import java.util.UUID - -/** - * Represents a request for an external biosample, containing the metadata and associated information - * related to the sample and its sequencing data. - * - * @param sampleAccession Native identifier provided by the client for the biosample. - * @param sourceSystem Origin system or data source associated with the biosample (e.g., "evolbio", "pgp"). - * @param description A textual description of the biosample. - * @param alias Optional alias for the biosample, provided by the client. - * @param centerName Name of the institution or center handling the biosample. - * @param sex Optional biological sex information for the biosample. - * @param latitude Optional geographical latitude information related to the biosample. - * @param longitude Optional geographical longitude information related to the biosample. - * @param citizenDid Optional decentralized identifier (DID) for linking to a citizen/PDS user. - * @param donorIdentifier Optional identifier for the donor. - * @param donorType Optional type of the donor (e.g., Citizen, PGP, Standard). - * @param publication Optional publication information related to the biosample, represented by the `PublicationInfo` structure. - * @param sequenceData Information regarding the sequencing data associated with the biosample, represented by the `SequenceDataInfo` structure. - * @param atCid Optional: For optimistic locking during updates. - */ -case class ExternalBiosampleRequest( - sampleAccession: String, // Client provides their native identifier - sourceSystem: String, // e.g., "evolbio", "pgp", etc. - description: String, - alias: Option[String], - centerName: String, - sex: Option[BiologicalSex], - latitude: Option[Double], - longitude: Option[Double], - citizenDid: Option[String], - donorIdentifier: Option[String], - donorType: Option[BiosampleType], - publication: Option[PublicationInfo], - sequenceData: SequenceDataInfo, - atCid: Option[String] = None // For optimistic locking during updates - ) - -object ExternalBiosampleRequest { - implicit val externalBiosampleRequest: OFormat[ExternalBiosampleRequest] = Json.format -} - -/** - * Represents publication-related information, including details such as DOI, PubMed ID, - * and original haplogroup data. - * - * @constructor Creates an instance of `PublicationInfo` to encapsulate key publication - * identifiers and data related to haplogroups. - * @param doi An optional DOI (Digital Object Identifier) for the publication. - * @param pubmedId An optional PubMed ID associated with the publication. - * @param originalHaplogroups Optionally represents original haplogroup information, - * encapsulated in a `HaplogroupInfo` instance. - */ -case class PublicationInfo( - doi: Option[String], - pubmedId: Option[String], - originalHaplogroups: Option[HaplogroupInfo] - ) - -object PublicationInfo { - implicit val publicationInfo: OFormat[PublicationInfo] = Json.format -} - -/** - * Represents information about Y-DNA and mitochondrial DNA (mtDNA) haplogroups, - * along with optional notes for additional context. - * - * @param yHaplogroup An optional string representing the Y-DNA haplogroup. - * This is typically associated with paternal lineage. - * @param mtHaplogroup An optional string representing the mitochondrial DNA (mtDNA) haplogroup. - * This is typically associated with maternal lineage. - * @param notes An optional string for any additional notes or descriptive information - * about the haplogroup or its context. - */ -case class HaplogroupInfo( - yHaplogroup: Option[String], - mtHaplogroup: Option[String], - notes: Option[String] - ) - -object HaplogroupInfo { - implicit val haplogroupInfo: OFormat[HaplogroupInfo] = Json.format -} - -/** - * Represents metadata and related information about a sequence dataset. - * - * This case class encapsulates information about sequencing data, including - * details such as the number of reads, read length, coverage, sequencing platform, - * test type, and associated files. - * - * @param reads An optional number of reads in the sequencing data. - * @param readLength An optional read length, indicating the length of individual reads. - * @param coverage An optional coverage value representing the depth of sequencing. - * @param platformName The name of the sequencing platform used to generate the data. - * @param testType The type of sequencing test performed. - * @param files A sequence of file metadata, represented by `FileInfo`, containing information - * about the files associated with the sequencing data. - */ -case class SequenceDataInfo( - reads: Option[Int], - readLength: Option[Int], - coverage: Option[Double], - platformName: String, - testType: String, - files: Seq[FileInfo] - ) - -object SequenceDataInfo { - implicit val sequenceDataInfo: OFormat[SequenceDataInfo] = Json.format -} - -/** - * Represents the information of a library, typically used in a laboratory or sequencing context. - * - * @param lab The name of the laboratory or site where sequencing or processing occurred. - * @param testType The type of test or sequencing performed. - * @param runDate The timestamp for when the sequencing or test run took place. - * @param instrument The identifier or name of the instrument used in the sequencing process. - * @param reads The total number of reads generated during the sequencing. - * @param readLength The length of each read in base pairs. - * @param pairedEnd Indicates whether the sequencing was performed using paired-end reads. - * @param insertSize Optional parameter specifying the insert size for paired-end reads, if applicable. - */ -case class LibraryInfo( - lab: String, - testType: String, - runDate: LocalDateTime, - instrument: String, - reads: Long, - readLength: Int, - pairedEnd: Boolean, - insertSize: Option[Int] - ) - -object LibraryInfo { - implicit val libraryInfo: OFormat[LibraryInfo] = Json.format -} - -/** - * Represents location information including a file URL and an optional file index URL. - * - * This class is useful for storing and managing metadata related to file locations, - * such as a primary file's URL and its associated index file's URL, if available. The - * `fileIndexUrl` is optional to accommodate cases where an index file is not provided. - * - * @param fileUrl The URL pointing to the primary file location. - * @param fileIndexUrl An optional URL pointing to the index file associated with the primary file. - */ -case class LocationInfo( - fileUrl: String, - fileIndexUrl: Option[String] - ) - -object LocationInfo { - implicit val locationInfo: OFormat[LocationInfo] = Json.format -} - -/** - * Represents checksum information including the checksum value and the algorithm used. - * - * @param checksum The checksum value as a string. - * @param algorithm The algorithm used to generate the checksum. - */ -case class ChecksumInfo( - checksum: String, - algorithm: String - ) - -object ChecksumInfo { - implicit val checksumInfo: OFormat[ChecksumInfo] = Json.format -} - -/** - * Represents metadata for a file, including its name, size, format, aligner used, target reference, - * associated checksums, and its location. - * - * @param fileName The name of the file. - * @param fileSizeBytes The size of the file in bytes. - * @param fileFormat The format of the file, indicating the file type or extension. - * @param aligner The aligner used for processing or generating the file. - * @param targetReference The reference target associated with the file. - * @param checksums A sequence of checksum information objects associated with the file. - * @param location Information about the file's location, including its URL and optional index URL. - */ -case class FileInfo( - fileName: String, - fileSizeBytes: Long, - fileFormat: String, - aligner: String, - targetReference: String, - checksums: Seq[ChecksumInfo], - location: LocationInfo - ) - -object FileInfo { - implicit val fileInfo: OFormat[FileInfo] = Json.format -} - -``` - -```scala -// Proposed ExternalBiosampleResponse.scala -// This DTO would be returned on successful creation or update of an ExternalBiosample. - -package models.api - -import play.api.libs.json.{Json, OFormat} -import java.time.LocalDateTime -import java.util.UUID -import models.domain.genomics.{BiologicalSex, BiosampleType} // Assuming BiosampleType and BiologicalSex are needed in response - -case class ExternalBiosampleResponse( - sampleGuid: UUID, - sampleAccession: String, - sourceSystem: String, - description: String, - alias: Option[String], - centerName: String, - sex: Option[BiologicalSex], - latitude: Option[Double], - longitude: Option[Double], - citizenDid: Option[String], - donorIdentifier: Option[String], - donorType: Option[BiosampleType], - publication: Option[PublicationInfo], // Re-use PublicationInfo from request - sequenceData: SequenceDataInfo, // Re-use SequenceDataInfo from request - atCid: Option[String], // Current CID for optimistic locking - createdAt: LocalDateTime, - updatedAt: LocalDateTime, - deleted: Boolean - ) - -object ExternalBiosampleResponse { - // It's crucial that the JSON formatters for nested types (PublicationInfo, SequenceDataInfo, etc.) - // are in scope when defining this formatter. They are typically defined in their own companion objects. - implicit val externalBiosampleResponse: OFormat[ExternalBiosampleResponse] = Json.format -} -``` +The detailed definitions for `ExternalBiosampleRequest` and `ExternalBiosampleResponse`, including all nested data structures, can be found in the generated OpenAPI (Swagger) specification available at `/api-docs/swagger-ui`. ### 4.1 Model and Database Schema Changes From bebad49f2463c45f96846a2130343f6e53e67273 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 06:46:59 -0600 Subject: [PATCH 15/31] Implement Firehose API for Citizen Biosamples and Projects --- .junie/guidelines.md | 134 ++++++++++++++ .../CitizenBiosampleController.scala | 74 ++++++++ app/controllers/ProjectController.scala | 46 +++++ app/models/api/ExternalBiosampleRequest.scala | 3 +- app/models/api/ProjectRequest.scala | 14 ++ app/models/api/ProjectResponse.scala | 19 ++ app/models/dal/DatabaseSchema.scala | 7 + app/models/dal/domain/ProjectTable.scala | 30 ++++ .../genomics/CitizenBiosamplesTable.scala | 42 +++++ ...izenBiosampleOriginalHaplogroupTable.scala | 42 +++++ .../PublicationCitizenBiosamplesTable.scala | 13 ++ app/models/domain/Project.scala | 16 ++ .../domain/genomics/CitizenBiosample.scala | 26 +++ .../CitizenBiosampleOriginalHaplogroup.scala | 10 ++ .../PublicationCitizenBiosample.scala | 3 + app/modules/BaseModule.scala | 16 ++ ...iosampleOriginalHaplogroupRepository.scala | 32 ++++ .../CitizenBiosampleRepository.scala | 96 ++++++++++ app/repositories/ProjectRepository.scala | 68 ++++++++ ...ublicationCitizenBiosampleRepository.scala | 42 +++++ app/services/CitizenBiosampleService.scala | 165 ++++++++++++++++++ app/services/ProjectService.scala | 73 ++++++++ conf/evolutions/default/22.sql | 51 ++++++ conf/routes | 10 ++ 24 files changed, 1031 insertions(+), 1 deletion(-) create mode 100644 .junie/guidelines.md create mode 100644 app/controllers/CitizenBiosampleController.scala create mode 100644 app/controllers/ProjectController.scala create mode 100644 app/models/api/ProjectRequest.scala create mode 100644 app/models/api/ProjectResponse.scala create mode 100644 app/models/dal/domain/ProjectTable.scala create mode 100644 app/models/dal/domain/genomics/CitizenBiosamplesTable.scala create mode 100644 app/models/dal/domain/publications/CitizenBiosampleOriginalHaplogroupTable.scala create mode 100644 app/models/dal/domain/publications/PublicationCitizenBiosamplesTable.scala create mode 100644 app/models/domain/Project.scala create mode 100644 app/models/domain/genomics/CitizenBiosample.scala create mode 100644 app/models/domain/publications/CitizenBiosampleOriginalHaplogroup.scala create mode 100644 app/models/domain/publications/PublicationCitizenBiosample.scala create mode 100644 app/repositories/CitizenBiosampleOriginalHaplogroupRepository.scala create mode 100644 app/repositories/CitizenBiosampleRepository.scala create mode 100644 app/repositories/ProjectRepository.scala create mode 100644 app/repositories/PublicationCitizenBiosampleRepository.scala create mode 100644 app/services/CitizenBiosampleService.scala create mode 100644 app/services/ProjectService.scala create mode 100644 conf/evolutions/default/22.sql diff --git a/.junie/guidelines.md b/.junie/guidelines.md new file mode 100644 index 0000000..010dfc9 --- /dev/null +++ b/.junie/guidelines.md @@ -0,0 +1,134 @@ +# Decoding Us — Development Guidelines (Project‑Specific) + +This document captures build, configuration, testing, and development tips that are specific to this repository (Scala 3 + Play 3.x). It assumes an advanced Scala/Play developer. + +## Build & Configuration + +- Toolchain + - Scala: 3.3.6 (see `build.sbt`) + - Play: sbt plugin `org.playframework:sbt-plugin:3.0.9` (see `project/plugins.sbt`) + - sbt: Use a recent sbt 1.10.x; the repo relies on conventional Play/sbt layout. +- Project settings + - Module: single root project, `enablePlugins(PlayScala)` in `build.sbt`. + - JVM: tested with Temurin JDK 21 (Docker runtime uses `eclipse-temurin:21-jre-jammy`). + - Scalac flag: `-Xmax-inlines 128` is set — keep in mind for heavy inline usages/macros. +- Key library versions (selected) + - play-slick 6.2.0 with Postgres JDBC 42.7.8 + - slick‑pg 0.23.1 (+ jts, play-json integrations) + - Tapir 1.11.50 (core, play-server, json-play, swagger-ui-bundle) + - Apache Pekko 1.1.5 (pinned; see Quartz note below) + - pekko‑quartz‑scheduler 1.3.0-pekko-1.1.x + - scalatestplus‑play 7.0.2 (Test) +- Pekko/Quartz pin + - `APACHE_PEKKO_VERSION` is deliberately pinned to 1.1.5 because Quartz requires 1.1.x. Bumping beyond this can cause startup errors. Update Quartz first if you need to lift the pin. + +### Application configuration (conf/application.conf) + +- Secrets and toggles + - `play.http.secret.key` can be overridden by `APPLICATION_SECRET`. + - Sessions are disabled (`play.http.session.disabled = true`). Re‑enable if/when needed. + - Recaptcha: `recaptcha.enable` (env `ENABLE_RECAPTCHA`), keys from env `RECAPTCHA_SECRET_KEY`, `RECAPTCHA_SITE_KEY`. +- Modules + - Enabled: `modules.BaseModule`, `ServicesModule`, `RecaptchaModule`, `StartupModule`, `ApplicationModule`, `ApiSecurityModule`, and Caffeine cache module (`play.api.cache.caffeine.CaffeineCacheModule`). Startup work is performed by `modules.StartupService` (see `app/modules/StartupModule.scala`). +- Caching + - Caffeine is the cache provider; default cache and an explicit `sitemap` cache are configured. +- Database (play-slick) + - Profile: `slick.jdbc.PostgresProfile$` + - JDBC: `jdbc:postgresql://localhost:5432/decodingus_db` + - Default dev creds: `decodingus_user` / `decodingus_password` (override in prod via env/Secrets Manager). +- Evolutions + - `play.evolutions.autocommit = true` enables automatic application of evolutions. Disable for production and manage via CI/migrations. +- AWS & misc + - AWS region default: `us-east-1`; example secrets path included. + - Contact recipient: override via `CONTACT_RECIPIENT_EMAIL`. + - `biosample.hash.salt` configurable via `BIOSAMPLE_HASH_SALT`. +- Logging + - Pekko loglevel DEBUG by default; consider overriding in production. + +### Local run + +- With sbt (recommended during development): + - `sbt run` — starts Play dev server on :9000. Ensure Postgres is available if you touch DB‑backed pages or services. +- With Docker (prebuilt stage) + - The `Dockerfile` expects a staged distribution at `target/universal/stage`. + - Build a universal distribution: `sbt stage` + - Build and run image: + - `docker build -t decodingus:local .` + - `docker run -p 9000:9000 --env APPLICATION_SECRET=... decodingus:local` + - You must also provide DB connectivity (e.g., link a Postgres container or env JDBC overrides) for features requiring DB. + +### Database quickstart (developer machine) + +- Create local Postgres role and database (adjust to your local policy): + - `createuser -P decodingus_user` (password `decodingus_password`), `createdb -O decodingus_user decodingus_db`. +- On first app start with evolutions enabled, Play will apply SQL files from `conf/evolutions/default` in order. +- For tests, prefer isolating DB‑heavy specs using test containers or an in‑memory profile; current suite primarily uses Play functional tests and does not require DB for the simple Home page. + +## Testing + +- Frameworks + - ScalaTest + scalatestplus‑play. Styles vary by spec; the existing suite uses `PlaySpec` and `GuiceOneAppPerTest`. +- Running tests + - Run full suite: `sbt test` + - Run a single suite: `sbt "testOnly controllers.HomeControllerSpec"` + - By pattern: `sbt "testOnly *HomeControllerSpec"` + - Run a single test by name (when supported by the style): `sbt "testOnly controllers.HomeControllerSpec -- -z \"router\""` +- Application DI in tests + - Prefer DI for controllers/services in Play tests to keep constructor wiring aligned with production. Example from `test/controllers/HomeControllerSpec.scala`: + ```scala + class HomeControllerSpec extends PlaySpec with GuiceOneAppPerTest with Injecting { + "HomeController GET" should { + "render the index page from the application" in { + val controller = inject[HomeController] + val home = controller.index().apply(FakeRequest(GET, "/")) + status(home) mustBe OK + contentType(home) mustBe Some("text/html") + contentAsString(home) must include ("Welcome to Play") + } + } + } + ``` + - Avoid manual `new Controller(...)` unless you supply all constructor dependencies. The controller constructors in this repo often include `Cached` and `SyncCacheApi` which are bound by Play. +- Demo test (validated process) + - A temporary pure unit test was created and executed to validate commands: + ```scala + // test/utils/DemoSpec.scala + package utils + import org.scalatest.funsuite.AnyFunSuite + class DemoSpec extends AnyFunSuite { + test("math sanity holds") { + assert(1 + 1 == 2) + } + } + ``` + - It was run successfully via the test runner; afterward it was removed to avoid leaving extra files in the repo. + +### Adding new tests + +- Controller tests: use `GuiceOneAppPerTest` or `GuiceOneAppPerSuite` depending on reuse and cost. Use `route(app, FakeRequest(...))` for end‑to‑end route testing. +- Service/utility tests: prefer pure unit tests with ScalaTest `AnyFunSuite` or `AnyFlatSpec` where Play isn’t needed. +- DB‑backed components: consider a separate test configuration/profile and a disposable Postgres schema. If introducing containerized tests, add `testcontainers` and wire a Play `Application` with test DB settings. + +## Development Tips (Repo‑specific) + +- Tapir + - OpenAPI/Swagger UI bundle is included; the site likely serves docs at `/api/docs` (see `HomeController.sitemap()` for the URL hint). +- Caching + - `HomeController.sitemap()` and `robots()` responses are cached for 24h using Caffeine via `Cached` action. If you change sitemap structure, remember cache keys. +- Startup behaviors + - `StartupService` performs tree initialization by calling `TreeInitializationService.initializeIfNeeded()` asynchronously at app start. Watch logs to understand conditional imports. +- Views & HTMX + - HTMX is available via WebJars. Views are Twirl templates under `app/views`. The landing page content is used by tests to assert presence of "Welcome to Play". +- Security/config + - Replace the default `play.http.secret.key`, recaptcha keys, and salts in any non‑dev environment. Sessions are disabled by default. +- Code style + - Follow existing formatting and idioms in the repo. Keep controllers lean, services injected. Non‑trivial logic belongs in services under `app/services/*`. + +## Common Tasks & Commands + +- Start dev server: `sbt run` +- Compile: `sbt compile` +- Stage distribution: `sbt stage` +- Run tests: `sbt test` +- Single test: `sbt "testOnly *YourSpec"` + diff --git a/app/controllers/CitizenBiosampleController.scala b/app/controllers/CitizenBiosampleController.scala new file mode 100644 index 0000000..740f477 --- /dev/null +++ b/app/controllers/CitizenBiosampleController.scala @@ -0,0 +1,74 @@ +package controllers + +import actions.ApiSecurityAction +import jakarta.inject.{Inject, Singleton} +import models.api.ExternalBiosampleRequest +import play.api.libs.json.{Json, OFormat} +import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} +import services.{BiosampleServiceException, CitizenBiosampleService, DuplicateAccessionException, InvalidCoordinatesException, PublicationLinkageException, SequenceDataValidationException} + +import java.util.UUID +import scala.concurrent.ExecutionContext + +@Singleton +class CitizenBiosampleController @Inject()( + val controllerComponents: ControllerComponents, + secureApi: ApiSecurityAction, + citizenBiosampleService: CitizenBiosampleService + )(implicit ec: ExecutionContext) extends BaseController { + + def create: Action[ExternalBiosampleRequest] = secureApi.jsonAction[ExternalBiosampleRequest].async { request => + citizenBiosampleService.createBiosample(request.body).map { guid => + Created(Json.obj( + "status" -> "success", + "guid" -> guid + )) + }.recover { + case e: DuplicateAccessionException => + Conflict(Json.obj("error" -> "Duplicate accession", "message" -> e.getMessage)) + case e: InvalidCoordinatesException => + BadRequest(Json.obj("error" -> "Invalid coordinates", "message" -> e.getMessage)) + case e: SequenceDataValidationException => + BadRequest(Json.obj("error" -> "Invalid sequence data", "message" -> e.getMessage)) + case e: PublicationLinkageException => + BadRequest(Json.obj("error" -> "Publication linkage failed", "message" -> e.getMessage)) + case e: BiosampleServiceException => + BadRequest(Json.obj("error" -> "Validation error", "message" -> e.getMessage)) + case e: IllegalArgumentException => + Conflict(Json.obj("error" -> "Conflict", "message" -> e.getMessage)) + case e: Exception => + InternalServerError(Json.obj("error" -> "Internal server error", "message" -> e.getMessage)) + } + } + + def update(sampleGuid: UUID): Action[ExternalBiosampleRequest] = secureApi.jsonAction[ExternalBiosampleRequest].async { request => + citizenBiosampleService.updateBiosample(sampleGuid, request.body).map { guid => + Ok(Json.obj( + "status" -> "success", + "guid" -> guid + )) + }.recover { + case e: IllegalStateException => + Conflict(Json.obj("error" -> "Optimistic locking failure", "message" -> e.getMessage)) + case e: NoSuchElementException => + NotFound(Json.obj("error" -> "Biosample not found", "message" -> e.getMessage)) + case e: InvalidCoordinatesException => + BadRequest(Json.obj("error" -> "Invalid coordinates", "message" -> e.getMessage)) + case e: Exception => + InternalServerError(Json.obj("error" -> "Internal server error", "message" -> e.getMessage)) + } + } + + def delete(sampleGuid: UUID): Action[AnyContent] = secureApi.async { request => + citizenBiosampleService.deleteBiosample(sampleGuid).map { + case true => NoContent + case false => NotFound(Json.obj("error" -> "Biosample not found", "message" -> s"Biosample with GUID '$sampleGuid' not found.")) + }.recover { + case e: Exception => + InternalServerError(Json.obj( + "error" -> "Internal server error", + "message" -> s"An unexpected error occurred while attempting to delete biosample: ${e.getMessage}" + )) + } + } +} diff --git a/app/controllers/ProjectController.scala b/app/controllers/ProjectController.scala new file mode 100644 index 0000000..d7abd85 --- /dev/null +++ b/app/controllers/ProjectController.scala @@ -0,0 +1,46 @@ +package controllers + +import actions.ApiSecurityAction +import jakarta.inject.{Inject, Singleton} +import models.api.ProjectRequest +import play.api.libs.json.Json +import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} +import services.ProjectService + +import java.util.UUID +import scala.concurrent.ExecutionContext + +@Singleton +class ProjectController @Inject()( + val controllerComponents: ControllerComponents, + secureApi: ApiSecurityAction, + projectService: ProjectService + )(implicit ec: ExecutionContext) extends BaseController { + + def create: Action[ProjectRequest] = secureApi.jsonAction[ProjectRequest].async { request => + projectService.createProject(request.body).map { response => + Created(Json.toJson(response)) + }.recover { + case e: Exception => InternalServerError(Json.obj("error" -> e.getMessage)) + } + } + + def update(projectGuid: UUID): Action[ProjectRequest] = secureApi.jsonAction[ProjectRequest].async { request => + projectService.updateProject(projectGuid, request.body).map { response => + Ok(Json.toJson(response)) + }.recover { + case e: IllegalStateException => Conflict(Json.obj("error" -> e.getMessage)) + case e: NoSuchElementException => NotFound(Json.obj("error" -> e.getMessage)) + case e: Exception => InternalServerError(Json.obj("error" -> e.getMessage)) + } + } + + def delete(projectGuid: UUID): Action[AnyContent] = secureApi.async { request => + projectService.deleteProject(projectGuid).map { + case true => NoContent + case false => NotFound(Json.obj("error" -> "Project not found")) + }.recover { + case e: Exception => InternalServerError(Json.obj("error" -> e.getMessage)) + } + } +} diff --git a/app/models/api/ExternalBiosampleRequest.scala b/app/models/api/ExternalBiosampleRequest.scala index 1e95cb3..5d6dcd2 100644 --- a/app/models/api/ExternalBiosampleRequest.scala +++ b/app/models/api/ExternalBiosampleRequest.scala @@ -35,7 +35,8 @@ case class ExternalBiosampleRequest( donorIdentifier: Option[String], donorType: Option[BiosampleType], publication: Option[PublicationInfo], - sequenceData: SequenceDataInfo + sequenceData: SequenceDataInfo, + atCid: Option[String] = None ) object ExternalBiosampleRequest { diff --git a/app/models/api/ProjectRequest.scala b/app/models/api/ProjectRequest.scala new file mode 100644 index 0000000..b43b00d --- /dev/null +++ b/app/models/api/ProjectRequest.scala @@ -0,0 +1,14 @@ +package models.api + +import play.api.libs.json.{Json, OFormat} +import java.util.UUID + +case class ProjectRequest( + name: String, + description: Option[String] = None, + atCid: Option[String] = None + ) + +object ProjectRequest { + implicit val format: OFormat[ProjectRequest] = Json.format +} diff --git a/app/models/api/ProjectResponse.scala b/app/models/api/ProjectResponse.scala new file mode 100644 index 0000000..97f21a7 --- /dev/null +++ b/app/models/api/ProjectResponse.scala @@ -0,0 +1,19 @@ +package models.api + +import play.api.libs.json.{Json, OFormat} +import java.time.LocalDateTime +import java.util.UUID + +case class ProjectResponse( + projectGuid: UUID, + name: String, + description: Option[String], + ownerDid: String, + createdAt: LocalDateTime, + updatedAt: LocalDateTime, + atCid: Option[String] + ) + +object ProjectResponse { + implicit val format: OFormat[ProjectResponse] = Json.format +} diff --git a/app/models/dal/DatabaseSchema.scala b/app/models/dal/DatabaseSchema.scala index c9e10b4..f1fc04b 100644 --- a/app/models/dal/DatabaseSchema.scala +++ b/app/models/dal/DatabaseSchema.scala @@ -67,6 +67,7 @@ object DatabaseSchema { val assemblyMetadata = TableQuery[AssemblyMetadataTable] val biosampleHaplogroups = TableQuery[BiosampleHaplogroupsTable] val biosamples = TableQuery[BiosamplesTable] + val citizenBiosamples = TableQuery[CitizenBiosamplesTable] val genbankContigs = TableQuery[GenbankContigsTable] val geneAnnotations = TableQuery[GeneAnnotationsTable] val populations = TableQuery[PopulationsTable] @@ -105,14 +106,20 @@ object DatabaseSchema { val genomicStudies = TableQuery[GenomicStudiesTable] val publications = TableQuery[PublicationsTable] val publicationBiosamples = TableQuery[PublicationBiosamplesTable] + val publicationCitizenBiosamples = TableQuery[PublicationCitizenBiosamplesTable] val publicationGenomicStudies = TableQuery[PublicationEnaStudiesTable] val biosampleOriginalHaplogroups = TableQuery[BiosampleOriginalHaplogroupTable] + val citizenBiosampleOriginalHaplogroups = TableQuery[CitizenBiosampleOriginalHaplogroupTable] } object ibd { val ibdDiscoveryIndices = TableQuery[IbdDiscoveryIndicesTable] val ibdPdsAttestationsTable = TableQuery[IbdPdsAttestationsTable] } + + object project { + val projects = TableQuery[ProjectTable] + } } object auth { diff --git a/app/models/dal/domain/ProjectTable.scala b/app/models/dal/domain/ProjectTable.scala new file mode 100644 index 0000000..4544755 --- /dev/null +++ b/app/models/dal/domain/ProjectTable.scala @@ -0,0 +1,30 @@ +package models.dal.domain + +import models.domain.Project +import slick.jdbc.PostgresProfile.api.* +import java.time.LocalDateTime +import java.util.UUID + +class ProjectTable(tag: Tag) extends Table[Project](tag, "project") { + def id = column[Int]("id", O.PrimaryKey, O.AutoInc) + def projectGuid = column[UUID]("project_guid", O.Unique) + def name = column[String]("name") + def description = column[Option[String]]("description") + def ownerDid = column[String]("owner_did") + def createdAt = column[LocalDateTime]("created_at") + def updatedAt = column[LocalDateTime]("updated_at") + def deleted = column[Boolean]("deleted", O.Default(false)) + def atCid = column[Option[String]]("at_cid") + + def * = ( + id.?, + projectGuid, + name, + description, + ownerDid, + createdAt, + updatedAt, + deleted, + atCid + ).mapTo[Project] +} diff --git a/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala b/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala new file mode 100644 index 0000000..e9a3325 --- /dev/null +++ b/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala @@ -0,0 +1,42 @@ +package models.dal.domain.genomics + +import models.dal.MyPostgresProfile.api.* +import models.domain.genomics.{BiologicalSex, CitizenBiosample} +import com.vividsolutions.jts.geom.Point +import java.time.{LocalDate, LocalDateTime} +import java.util.UUID + +class CitizenBiosamplesTable(tag: Tag) extends Table[CitizenBiosample](tag, "citizen_biosample") { + def id = column[Int]("id", O.PrimaryKey, O.AutoInc) + def citizenBiosampleDid = column[Option[String]]("citizen_biosample_did", O.Unique) + def accession = column[Option[String]]("accession") + def alias = column[Option[String]]("alias") + def sourcePlatform = column[Option[String]]("source_platform") + def collectionDate = column[Option[LocalDate]]("collection_date") + def sex = column[Option[BiologicalSex]]("sex") + def geocoord = column[Option[Point]]("geocoord") + def description = column[Option[String]]("description") + def sampleGuid = column[UUID]("sample_guid") + + def deleted = column[Boolean]("deleted", O.Default(false)) + def atCid = column[Option[String]]("at_cid") + def createdAt = column[LocalDateTime]("created_at") + def updatedAt = column[LocalDateTime]("updated_at") + + def * = ( + id.?, + citizenBiosampleDid, + accession, + alias, + sourcePlatform, + collectionDate, + sex, + geocoord, + description, + sampleGuid, + deleted, + atCid, + createdAt, + updatedAt + ).mapTo[CitizenBiosample] +} diff --git a/app/models/dal/domain/publications/CitizenBiosampleOriginalHaplogroupTable.scala b/app/models/dal/domain/publications/CitizenBiosampleOriginalHaplogroupTable.scala new file mode 100644 index 0000000..25e4cd3 --- /dev/null +++ b/app/models/dal/domain/publications/CitizenBiosampleOriginalHaplogroupTable.scala @@ -0,0 +1,42 @@ +package models.dal.domain.publications + +import models.dal.MyPostgresProfile.api.* +import models.dal.domain.genomics.CitizenBiosamplesTable +import models.domain.publications.CitizenBiosampleOriginalHaplogroup + +class CitizenBiosampleOriginalHaplogroupTable(tag: Tag) + extends Table[CitizenBiosampleOriginalHaplogroup](tag, "citizen_biosample_original_haplogroup") { + + def id = column[Int]("id", O.PrimaryKey, O.AutoInc) + def citizenBiosampleId = column[Int]("citizen_biosample_id") + def publicationId = column[Int]("publication_id") + def originalYHaplogroup = column[Option[String]]("original_y_haplogroup") + def originalMtHaplogroup = column[Option[String]]("original_mt_haplogroup") + def notes = column[Option[String]]("notes") + + // Foreign key relationships + def citizenBiosample = foreignKey( + "citizen_biosample_original_haplogroup_citizen_biosample_id_fkey", + citizenBiosampleId, + TableQuery[CitizenBiosamplesTable])(_.id, onDelete = ForeignKeyAction.Cascade) + + def publication = foreignKey( + "citizen_biosample_original_haplogroup_publication_id_fkey", + publicationId, + TableQuery[PublicationsTable])(_.id, onDelete = ForeignKeyAction.Cascade) + + def uniqueCitizenBiosamplePublication = index( + "citizen_biosample_original_haplogroup_cb_id_publication_id_key", + (citizenBiosampleId, publicationId), + unique = true + ) + + def * = ( + id.?, + citizenBiosampleId, + publicationId, + originalYHaplogroup, + originalMtHaplogroup, + notes + ).mapTo[CitizenBiosampleOriginalHaplogroup] +} diff --git a/app/models/dal/domain/publications/PublicationCitizenBiosamplesTable.scala b/app/models/dal/domain/publications/PublicationCitizenBiosamplesTable.scala new file mode 100644 index 0000000..ff5d1ff --- /dev/null +++ b/app/models/dal/domain/publications/PublicationCitizenBiosamplesTable.scala @@ -0,0 +1,13 @@ +package models.dal.domain.publications + +import models.domain.publications.PublicationCitizenBiosample +import slick.jdbc.PostgresProfile.api.* + +class PublicationCitizenBiosamplesTable(tag: Tag) extends Table[PublicationCitizenBiosample](tag, "publication_citizen_biosample") { + def publicationId = column[Int]("publication_id") + def citizenBiosampleId = column[Int]("citizen_biosample_id") + + def * = (publicationId, citizenBiosampleId).mapTo[PublicationCitizenBiosample] + + def pkey = primaryKey("publication_citizen_biosample_pkey", (publicationId, citizenBiosampleId)) +} diff --git a/app/models/domain/Project.scala b/app/models/domain/Project.scala new file mode 100644 index 0000000..e2a2d0d --- /dev/null +++ b/app/models/domain/Project.scala @@ -0,0 +1,16 @@ +package models.domain + +import java.time.LocalDateTime +import java.util.UUID + +case class Project( + id: Option[Int] = None, + projectGuid: UUID, + name: String, + description: Option[String] = None, + ownerDid: String, + createdAt: LocalDateTime, + updatedAt: LocalDateTime, + deleted: Boolean = false, + atCid: Option[String] = None + ) diff --git a/app/models/domain/genomics/CitizenBiosample.scala b/app/models/domain/genomics/CitizenBiosample.scala new file mode 100644 index 0000000..96e52b8 --- /dev/null +++ b/app/models/domain/genomics/CitizenBiosample.scala @@ -0,0 +1,26 @@ +package models.domain.genomics + +import com.vividsolutions.jts.geom.Point +import java.time.{LocalDate, LocalDateTime} +import java.util.UUID + +/** + * Represents a biosample of type "Citizen", typically ingested from external sources/Firehose. + * Maps to the `citizen_biosample` table. + */ +case class CitizenBiosample( + id: Option[Int] = None, + citizenBiosampleDid: Option[String], + accession: Option[String], + alias: Option[String], + sourcePlatform: Option[String], + collectionDate: Option[LocalDate], + sex: Option[BiologicalSex], + geocoord: Option[Point], + description: Option[String], + sampleGuid: UUID, + deleted: Boolean = false, + atCid: Option[String] = None, + createdAt: LocalDateTime = LocalDateTime.now(), + updatedAt: LocalDateTime = LocalDateTime.now() + ) diff --git a/app/models/domain/publications/CitizenBiosampleOriginalHaplogroup.scala b/app/models/domain/publications/CitizenBiosampleOriginalHaplogroup.scala new file mode 100644 index 0000000..6d85fff --- /dev/null +++ b/app/models/domain/publications/CitizenBiosampleOriginalHaplogroup.scala @@ -0,0 +1,10 @@ +package models.domain.publications + +case class CitizenBiosampleOriginalHaplogroup( + id: Option[Int] = None, + citizenBiosampleId: Int, + publicationId: Int, + originalYHaplogroup: Option[String], + originalMtHaplogroup: Option[String], + notes: Option[String] +) diff --git a/app/models/domain/publications/PublicationCitizenBiosample.scala b/app/models/domain/publications/PublicationCitizenBiosample.scala new file mode 100644 index 0000000..263e20c --- /dev/null +++ b/app/models/domain/publications/PublicationCitizenBiosample.scala @@ -0,0 +1,3 @@ +package models.domain.publications + +case class PublicationCitizenBiosample(publicationId: Int, citizenBiosampleId: Int) diff --git a/app/modules/BaseModule.scala b/app/modules/BaseModule.scala index 64fbcd1..0e368a2 100644 --- a/app/modules/BaseModule.scala +++ b/app/modules/BaseModule.scala @@ -73,5 +73,21 @@ class BaseModule extends AbstractModule { bind(classOf[SequencingLabRepository]) .to(classOf[SequencingLabRepositoryImpl]) .asEagerSingleton() + + bind(classOf[CitizenBiosampleRepository]) + .to(classOf[CitizenBiosampleRepositoryImpl]) + .asEagerSingleton() + + bind(classOf[ProjectRepository]) + .to(classOf[ProjectRepositoryImpl]) + .asEagerSingleton() + + bind(classOf[PublicationCitizenBiosampleRepository]) + .to(classOf[PublicationCitizenBiosampleRepositoryImpl]) + .asEagerSingleton() + + bind(classOf[CitizenBiosampleOriginalHaplogroupRepository]) + .to(classOf[CitizenBiosampleOriginalHaplogroupRepositoryImpl]) + .asEagerSingleton() } } diff --git a/app/repositories/CitizenBiosampleOriginalHaplogroupRepository.scala b/app/repositories/CitizenBiosampleOriginalHaplogroupRepository.scala new file mode 100644 index 0000000..fe26ac0 --- /dev/null +++ b/app/repositories/CitizenBiosampleOriginalHaplogroupRepository.scala @@ -0,0 +1,32 @@ +package repositories + +import jakarta.inject.Inject +import models.dal.DatabaseSchema +import models.domain.publications.CitizenBiosampleOriginalHaplogroup +import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} +import slick.jdbc.JdbcProfile + +import scala.concurrent.{ExecutionContext, Future} + +trait CitizenBiosampleOriginalHaplogroupRepository { + def create(info: CitizenBiosampleOriginalHaplogroup): Future[CitizenBiosampleOriginalHaplogroup] + def deleteByCitizenBiosampleId(citizenBiosampleId: Int): Future[Int] +} + +class CitizenBiosampleOriginalHaplogroupRepositoryImpl @Inject()(protected val dbConfigProvider: DatabaseConfigProvider)(implicit ec: ExecutionContext) + extends CitizenBiosampleOriginalHaplogroupRepository with HasDatabaseConfigProvider[JdbcProfile] { + + import profile.api.* + + private val table = DatabaseSchema.domain.publications.citizenBiosampleOriginalHaplogroups + + override def create(info: CitizenBiosampleOriginalHaplogroup): Future[CitizenBiosampleOriginalHaplogroup] = { + val insertQuery = (table returning table.map(_.id) + into ((item, id) => item.copy(id = Some(id)))) += info + db.run(insertQuery) + } + + override def deleteByCitizenBiosampleId(citizenBiosampleId: Int): Future[Int] = { + db.run(table.filter(_.citizenBiosampleId === citizenBiosampleId).delete) + } +} diff --git a/app/repositories/CitizenBiosampleRepository.scala b/app/repositories/CitizenBiosampleRepository.scala new file mode 100644 index 0000000..d100c6f --- /dev/null +++ b/app/repositories/CitizenBiosampleRepository.scala @@ -0,0 +1,96 @@ +package repositories + +import jakarta.inject.{Inject, Singleton} +import models.dal.MyPostgresProfile.api.* +import models.dal.{DatabaseSchema, MyPostgresProfile} +import models.domain.genomics.CitizenBiosample +import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} + +import java.time.LocalDateTime +import java.util.UUID +import scala.concurrent.{ExecutionContext, Future} + +trait CitizenBiosampleRepository { + def create(biosample: CitizenBiosample): Future[CitizenBiosample] + def findByGuid(guid: UUID): Future[Option[CitizenBiosample]] + def findByDid(did: String): Future[Option[CitizenBiosample]] + def findByAccession(accession: String): Future[Option[CitizenBiosample]] + + /** + * Updates the biosample. + * @param biosample The biosample with new values. + * @param expectedAtCid The atCid expected to be currently in the database for this record. + * @return Future[Boolean] true if update succeeded, false otherwise (e.g. record not found or atCid mismatch). + */ + def update(biosample: CitizenBiosample, expectedAtCid: Option[String]): Future[Boolean] + + def softDelete(guid: UUID): Future[Boolean] +} + +@Singleton +class CitizenBiosampleRepositoryImpl @Inject()( + protected val dbConfigProvider: DatabaseConfigProvider +)(implicit ec: ExecutionContext) extends CitizenBiosampleRepository with HasDatabaseConfigProvider[MyPostgresProfile] { + + private val citizenBiosamples = DatabaseSchema.domain.genomics.citizenBiosamples + + override def create(biosample: CitizenBiosample): Future[CitizenBiosample] = { + val insertQuery = (citizenBiosamples returning citizenBiosamples.map(_.id) + into ((bs, id) => bs.copy(id = Some(id)))) += biosample + db.run(insertQuery) + } + + override def findByGuid(guid: UUID): Future[Option[CitizenBiosample]] = { + db.run(citizenBiosamples.filter(b => b.sampleGuid === guid && !b.deleted).result.headOption) + } + + override def findByDid(did: String): Future[Option[CitizenBiosample]] = { + db.run(citizenBiosamples.filter(b => b.citizenBiosampleDid === did && !b.deleted).result.headOption) + } + + override def findByAccession(accession: String): Future[Option[CitizenBiosample]] = { + db.run(citizenBiosamples.filter(b => b.accession === accession && !b.deleted).result.headOption) + } + + override def update(biosample: CitizenBiosample, expectedAtCid: Option[String]): Future[Boolean] = { + val query = citizenBiosamples.filter { b => + b.sampleGuid === biosample.sampleGuid && + b.atCid === expectedAtCid + } + + val updateAction = query.map(b => ( + b.citizenBiosampleDid, + b.accession, + b.alias, + b.sourcePlatform, + b.collectionDate, + b.sex, + b.geocoord, + b.description, + b.atCid, + b.updatedAt, + b.deleted + )).update(( + biosample.citizenBiosampleDid, + biosample.accession, + biosample.alias, + biosample.sourcePlatform, + biosample.collectionDate, + biosample.sex, + biosample.geocoord, + biosample.description, + biosample.atCid, + LocalDateTime.now(), + biosample.deleted + )) + + db.run(updateAction.map(_ > 0)) + } + + override def softDelete(guid: UUID): Future[Boolean] = { + val q = citizenBiosamples.filter(_.sampleGuid === guid) + .map(b => (b.deleted, b.updatedAt)) + .update((true, LocalDateTime.now())) + db.run(q.map(_ > 0)) + } +} diff --git a/app/repositories/ProjectRepository.scala b/app/repositories/ProjectRepository.scala new file mode 100644 index 0000000..b65fc3a --- /dev/null +++ b/app/repositories/ProjectRepository.scala @@ -0,0 +1,68 @@ +package repositories + +import jakarta.inject.{Inject, Singleton} +import models.dal.MyPostgresProfile.api.* +import models.dal.{DatabaseSchema, MyPostgresProfile} +import models.domain.Project +import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} + +import java.time.LocalDateTime +import java.util.UUID +import scala.concurrent.{ExecutionContext, Future} + +trait ProjectRepository { + def create(project: Project): Future[Project] + def findByProjectGuid(projectGuid: UUID): Future[Option[Project]] + def update(project: Project, expectedAtCid: Option[String]): Future[Boolean] + def softDelete(projectGuid: UUID): Future[Boolean] +} + +@Singleton +class ProjectRepositoryImpl @Inject()( + protected val dbConfigProvider: DatabaseConfigProvider +)(implicit ec: ExecutionContext) extends ProjectRepository with HasDatabaseConfigProvider[MyPostgresProfile] { + + private val projects = DatabaseSchema.domain.project.projects + + override def create(project: Project): Future[Project] = { + val insertQuery = (projects returning projects.map(_.id) + into ((p, id) => p.copy(id = Some(id)))) += project + db.run(insertQuery) + } + + override def findByProjectGuid(projectGuid: UUID): Future[Option[Project]] = { + db.run(projects.filter(p => p.projectGuid === projectGuid && !p.deleted).result.headOption) + } + + override def update(project: Project, expectedAtCid: Option[String]): Future[Boolean] = { + val query = projects.filter { p => + p.projectGuid === project.projectGuid && + p.atCid === expectedAtCid + } + + val updateAction = query.map(p => ( + p.name, + p.description, + p.ownerDid, + p.atCid, + p.updatedAt, + p.deleted + )).update(( + project.name, + project.description, + project.ownerDid, + project.atCid, + LocalDateTime.now(), + project.deleted + )) + + db.run(updateAction.map(_ > 0)) + } + + override def softDelete(projectGuid: UUID): Future[Boolean] = { + val q = projects.filter(_.projectGuid === projectGuid) + .map(p => (p.deleted, p.updatedAt)) + .update((true, LocalDateTime.now())) + db.run(q.map(_ > 0)) + } +} diff --git a/app/repositories/PublicationCitizenBiosampleRepository.scala b/app/repositories/PublicationCitizenBiosampleRepository.scala new file mode 100644 index 0000000..0c18bdb --- /dev/null +++ b/app/repositories/PublicationCitizenBiosampleRepository.scala @@ -0,0 +1,42 @@ +package repositories + +import jakarta.inject.Inject +import models.dal.DatabaseSchema +import models.domain.publications.PublicationCitizenBiosample +import play.api.Logging +import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} +import slick.jdbc.JdbcProfile + +import scala.concurrent.{ExecutionContext, Future} + +trait PublicationCitizenBiosampleRepository { + def create(link: PublicationCitizenBiosample): Future[PublicationCitizenBiosample] + def deleteByCitizenBiosampleId(citizenBiosampleId: Int): Future[Int] +} + +class PublicationCitizenBiosampleRepositoryImpl @Inject()(protected val dbConfigProvider: DatabaseConfigProvider)(implicit ec: ExecutionContext) + extends PublicationCitizenBiosampleRepository with HasDatabaseConfigProvider[JdbcProfile] with Logging { + + import profile.api.* + + private val publicationCitizenBiosamples = DatabaseSchema.domain.publications.publicationCitizenBiosamples + + override def create(link: PublicationCitizenBiosample): Future[PublicationCitizenBiosample] = { + val existingQuery = publicationCitizenBiosamples + .filter(pb => + pb.publicationId === link.publicationId && + pb.citizenBiosampleId === link.citizenBiosampleId + ) + + val upsertAction = existingQuery.result.headOption.flatMap { + case Some(_) => DBIO.successful(link) + case None => publicationCitizenBiosamples += link + }.transactionally + + db.run(upsertAction).map(_ => link) + } + + override def deleteByCitizenBiosampleId(citizenBiosampleId: Int): Future[Int] = { + db.run(publicationCitizenBiosamples.filter(_.citizenBiosampleId === citizenBiosampleId).delete) + } +} diff --git a/app/services/CitizenBiosampleService.scala b/app/services/CitizenBiosampleService.scala new file mode 100644 index 0000000..b30302a --- /dev/null +++ b/app/services/CitizenBiosampleService.scala @@ -0,0 +1,165 @@ +package services + +import jakarta.inject.{Inject, Singleton} +import models.api.{ExternalBiosampleRequest, PublicationInfo} +import models.domain.genomics.{BiosampleType, CitizenBiosample} +import models.domain.publications.{CitizenBiosampleOriginalHaplogroup, Publication, PublicationCitizenBiosample} +import repositories._ + +import java.time.LocalDateTime +import java.util.UUID +import scala.concurrent.{ExecutionContext, Future} + +@Singleton +class CitizenBiosampleService @Inject()( + citizenBiosampleRepository: CitizenBiosampleRepository, + biosampleDataService: BiosampleDataService, + publicationRepository: PublicationRepository, + publicationCitizenBiosampleRepository: PublicationCitizenBiosampleRepository, + citizenBiosampleOriginalHaplogroupRepository: CitizenBiosampleOriginalHaplogroupRepository + )(implicit ec: ExecutionContext) extends CoordinateValidation { + + def createBiosample(request: ExternalBiosampleRequest): Future[UUID] = { + // 1. Validate coordinates + validateCoordinates(request.latitude, request.longitude).flatMap { geocoord => + // 2. Check for existing biosample by accession + citizenBiosampleRepository.findByAccession(request.sampleAccession).flatMap { + case Some(_) => + Future.failed(new IllegalArgumentException(s"Biosample with accession ${request.sampleAccession} already exists.")) + + case None => + // Create new + val sampleGuid = UUID.randomUUID() + val newAtCid = Some(UUID.randomUUID().toString) + + val citizenBiosample = CitizenBiosample( + id = None, + citizenBiosampleDid = request.citizenDid, + accession = Some(request.sampleAccession), + alias = request.alias, + sourcePlatform = Some(request.sourceSystem), + collectionDate = None, + sex = request.sex, + geocoord = geocoord, + description = Some(request.description), + sampleGuid = sampleGuid, + deleted = false, + atCid = newAtCid, + createdAt = LocalDateTime.now(), + updatedAt = LocalDateTime.now() + ) + + for { + created <- citizenBiosampleRepository.create(citizenBiosample) + _ <- handleDataAssociation(created.sampleGuid, request, isUpdate = false) + } yield created.sampleGuid + } + } + } + + def updateBiosample(sampleGuid: UUID, request: ExternalBiosampleRequest): Future[UUID] = { + validateCoordinates(request.latitude, request.longitude).flatMap { geocoord => + citizenBiosampleRepository.findByGuid(sampleGuid).flatMap { + case Some(existing) => + // Optimistic Locking Check + if (request.atCid.isDefined && request.atCid != existing.atCid) { + Future.failed(new IllegalStateException(s"Optimistic locking failure: atCid mismatch. Expected ${existing.atCid}, got ${request.atCid}")) + } else { + val newAtCid = Some(UUID.randomUUID().toString) + val toUpdate = existing.copy( + description = Some(request.description), + alias = request.alias, + sourcePlatform = Some(request.sourceSystem), + sex = request.sex, + geocoord = geocoord, + citizenBiosampleDid = request.citizenDid, + accession = Some(request.sampleAccession), + atCid = newAtCid, + updatedAt = LocalDateTime.now() + ) + + citizenBiosampleRepository.update(toUpdate, request.atCid).flatMap { success => + if (success) { + handleDataAssociation(existing.sampleGuid, request, isUpdate = true).map(_ => existing.sampleGuid) + } else { + Future.failed(new RuntimeException("Update failed (optimistic lock or record missing)")) + } + } + } + case None => + Future.failed(new NoSuchElementException(s"Biosample not found for GUID: $sampleGuid")) + } + } + } + + private def handleDataAssociation(guid: UUID, request: ExternalBiosampleRequest, isUpdate: Boolean): Future[Unit] = { + val publicationFuture = request.publication + .map(pub => linkPublication(guid, pub) + .recoverWith { case e => + Future.failed(new RuntimeException(s"Publication linkage failed: ${e.getMessage}", e)) + }) + .getOrElse(Future.successful(())) + + val sequenceDataFuture = if (isUpdate) { + biosampleDataService.replaceSequenceData(guid, request.sequenceData) + } else { + biosampleDataService.addSequenceData(guid, request.sequenceData) + } + + for { + _ <- publicationFuture + _ <- sequenceDataFuture + } yield () + } + + private def linkPublication(sampleGuid: UUID, pubInfo: PublicationInfo): Future[Unit] = { + for { + maybeBiosample <- citizenBiosampleRepository.findByGuid(sampleGuid) + biosample <- maybeBiosample match { + case Some(b) => Future.successful(b) + case None => Future.failed(new IllegalArgumentException(s"CitizenBiosample not found for GUID: $sampleGuid")) + } + + maybePublication <- pubInfo.doi.map(doi => + publicationRepository.findByDoi(doi) + ).getOrElse(Future.successful(None)) + + publication <- maybePublication match { + case Some(pub) => Future.successful(pub) + case None => publicationRepository.savePublication(Publication( + id = None, + openAlexId = None, + pubmedId = pubInfo.pubmedId, + doi = pubInfo.doi, + title = pubInfo.doi.map(d => s"Publication with DOI: $d").getOrElse("Unknown publication"), + authors = None, abstractSummary = None, journal = None, publicationDate = None, url = None, + citationNormalizedPercentile = None, citedByCount = None, openAccessStatus = None, openAccessUrl = None, + primaryTopic = None, publicationType = None, publisher = None + )) + } + + // Link publication + _ <- publicationCitizenBiosampleRepository.create(PublicationCitizenBiosample( + publicationId = publication.id.get, + citizenBiosampleId = biosample.id.get + )) + + // Link Haplogroups + _ <- pubInfo.originalHaplogroups.map { haplogroupInfo => + citizenBiosampleOriginalHaplogroupRepository.create(CitizenBiosampleOriginalHaplogroup( + id = None, + citizenBiosampleId = biosample.id.get, + publicationId = publication.id.get, + originalYHaplogroup = haplogroupInfo.yHaplogroup, + originalMtHaplogroup = haplogroupInfo.mtHaplogroup, + notes = haplogroupInfo.notes + )) + }.getOrElse(Future.successful(())) + + } yield () + } + + def deleteBiosample(sampleGuid: UUID): Future[Boolean] = { + citizenBiosampleRepository.softDelete(sampleGuid) + } +} diff --git a/app/services/ProjectService.scala b/app/services/ProjectService.scala new file mode 100644 index 0000000..81cf647 --- /dev/null +++ b/app/services/ProjectService.scala @@ -0,0 +1,73 @@ +package services + +import jakarta.inject.{Inject, Singleton} +import models.api.{ProjectRequest, ProjectResponse} +import models.domain.Project +import repositories.ProjectRepository + +import java.time.LocalDateTime +import java.util.UUID +import scala.concurrent.{ExecutionContext, Future} + +@Singleton +class ProjectService @Inject()( + projectRepository: ProjectRepository + )(implicit ec: ExecutionContext) { + + def createProject(request: ProjectRequest): Future[ProjectResponse] = { + val project = Project( + id = None, + projectGuid = UUID.randomUUID(), + name = request.name, + description = request.description, + ownerDid = "did:example:owner", // Placeholder until auth provides owner DID + createdAt = LocalDateTime.now(), + updatedAt = LocalDateTime.now(), + deleted = false, + atCid = Some(UUID.randomUUID().toString) + ) + + projectRepository.create(project).map(toResponse) + } + + def updateProject(projectGuid: UUID, request: ProjectRequest): Future[ProjectResponse] = { + projectRepository.findByProjectGuid(projectGuid).flatMap { + case Some(existing) => + if (request.atCid.isDefined && request.atCid != existing.atCid) { + Future.failed(new IllegalStateException(s"Optimistic locking failure: atCid mismatch.")) + } else { + val updatedProject = existing.copy( + name = request.name, + description = request.description, + updatedAt = LocalDateTime.now(), + atCid = Some(UUID.randomUUID().toString) + ) + projectRepository.update(updatedProject, request.atCid).flatMap { success => + if (success) { + Future.successful(toResponse(updatedProject)) + } else { + Future.failed(new RuntimeException("Update failed")) + } + } + } + case None => + Future.failed(new NoSuchElementException(s"Project not found for GUID: $projectGuid")) + } + } + + def deleteProject(projectGuid: UUID): Future[Boolean] = { + projectRepository.softDelete(projectGuid) + } + + private def toResponse(p: Project): ProjectResponse = { + ProjectResponse( + projectGuid = p.projectGuid, + name = p.name, + description = p.description, + ownerDid = p.ownerDid, + createdAt = p.createdAt, + updatedAt = p.updatedAt, + atCid = p.atCid + ) + } +} diff --git a/conf/evolutions/default/22.sql b/conf/evolutions/default/22.sql new file mode 100644 index 0000000..9706700 --- /dev/null +++ b/conf/evolutions/default/22.sql @@ -0,0 +1,51 @@ +# --- !Ups +ALTER TABLE citizen_biosample ADD COLUMN deleted BOOLEAN DEFAULT false NOT NULL; +ALTER TABLE citizen_biosample ADD COLUMN at_cid VARCHAR(255); +ALTER TABLE citizen_biosample ADD COLUMN created_at TIMESTAMP DEFAULT now() NOT NULL; +ALTER TABLE citizen_biosample ADD COLUMN updated_at TIMESTAMP DEFAULT now() NOT NULL; +ALTER TABLE citizen_biosample ADD COLUMN accession VARCHAR(255); +ALTER TABLE citizen_biosample ADD COLUMN alias VARCHAR(255); +CREATE UNIQUE INDEX citizen_biosample_accession_uindex ON citizen_biosample (accession); + +CREATE TABLE publication_citizen_biosample +( + publication_id INT REFERENCES publication (id) ON DELETE CASCADE, + citizen_biosample_id INT REFERENCES citizen_biosample (id) ON DELETE CASCADE, + PRIMARY KEY (publication_id, citizen_biosample_id) +); + +CREATE TABLE citizen_biosample_original_haplogroup +( + id SERIAL PRIMARY KEY, + citizen_biosample_id INT REFERENCES citizen_biosample (id) ON DELETE CASCADE, + publication_id INT REFERENCES publication (id) ON DELETE CASCADE, + original_y_haplogroup VARCHAR(255), + original_mt_haplogroup VARCHAR(255), + notes TEXT, + UNIQUE (citizen_biosample_id, publication_id) +); + +CREATE TABLE project +( + id SERIAL PRIMARY KEY, + project_guid UUID NOT NULL UNIQUE, + name VARCHAR(255) NOT NULL, + description TEXT, + owner_did VARCHAR(255) NOT NULL, + created_at TIMESTAMP NOT NULL DEFAULT now(), + updated_at TIMESTAMP NOT NULL DEFAULT now(), + deleted BOOLEAN DEFAULT false NOT NULL, + at_cid VARCHAR(255) +); + +# --- !Downs +DROP TABLE project; +DROP TABLE citizen_biosample_original_haplogroup; +DROP TABLE publication_citizen_biosample; +DROP INDEX citizen_biosample_accession_uindex; +ALTER TABLE citizen_biosample DROP COLUMN alias; +ALTER TABLE citizen_biosample DROP COLUMN accession; +ALTER TABLE citizen_biosample DROP COLUMN updated_at; +ALTER TABLE citizen_biosample DROP COLUMN created_at; +ALTER TABLE citizen_biosample DROP COLUMN at_cid; +ALTER TABLE citizen_biosample DROP COLUMN deleted; diff --git a/conf/routes b/conf/routes index 6353713..3d36d09 100644 --- a/conf/routes +++ b/conf/routes @@ -88,6 +88,16 @@ POST /api/private/sequencing-labs PATCH /api/private/sequencing-labs/:id controllers.SequencingLabAdminController.update(id: Int) DELETE /api/private/sequencing-labs/:id controllers.SequencingLabAdminController.delete(id: Int) +# Citizen Biosample API endpoints (Firehose) +POST /api/external-biosamples controllers.CitizenBiosampleController.create +PUT /api/external-biosamples/:sampleGuid controllers.CitizenBiosampleController.update(sampleGuid: java.util.UUID) +DELETE /api/external-biosamples/:sampleGuid controllers.CitizenBiosampleController.delete(sampleGuid: java.util.UUID) + +# Project API endpoints +POST /api/projects controllers.ProjectController.create +PUT /api/projects/:projectGuid controllers.ProjectController.update(projectGuid: java.util.UUID) +DELETE /api/projects/:projectGuid controllers.ProjectController.delete(projectGuid: java.util.UUID) + # --- API Routes (Handled by Tapir, including Swagger UI) --- POST /api/registerPDS controllers.PDSRegistrationController.registerPDS() From 27c14574420053dd11fea87ecb700bbd9c487a42 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 06:51:19 -0600 Subject: [PATCH 16/31] Add Tapir endpoint definitions for CitizenBiosamples and Projects --- app/api/CitizenBiosampleEndpoints.scala | 51 +++++++++++++++++++ app/api/ProjectEndpoints.scala | 51 +++++++++++++++++++ app/controllers/ApiRouter.scala | 3 +- .../api/BiosampleOperationResponse.scala | 10 ++++ 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 app/api/CitizenBiosampleEndpoints.scala create mode 100644 app/api/ProjectEndpoints.scala create mode 100644 app/models/api/BiosampleOperationResponse.scala diff --git a/app/api/CitizenBiosampleEndpoints.scala b/app/api/CitizenBiosampleEndpoints.scala new file mode 100644 index 0000000..f535ffa --- /dev/null +++ b/app/api/CitizenBiosampleEndpoints.scala @@ -0,0 +1,51 @@ +package api + +import models.api.{BiosampleOperationResponse, ExternalBiosampleRequest} +import sttp.tapir.* +import sttp.tapir.generic.auto.* +import sttp.tapir.json.play.* +import java.util.UUID + +object CitizenBiosampleEndpoints { + + private val createBiosample: PublicEndpoint[ExternalBiosampleRequest, String, BiosampleOperationResponse, Any] = { + endpoint + .post + .in("api" / "external-biosamples") + .in(jsonBody[ExternalBiosampleRequest]) + .out(jsonBody[BiosampleOperationResponse]) + .errorOut(stringBody) + .description("Creates a new Citizen Biosample with associated metadata and publication links.") + .summary("Create Citizen Biosample") + .tag("Citizen Biosamples") + } + + private val updateBiosample: PublicEndpoint[(UUID, ExternalBiosampleRequest), String, BiosampleOperationResponse, Any] = { + endpoint + .put + .in("api" / "external-biosamples" / path[UUID]("sampleGuid")) + .in(jsonBody[ExternalBiosampleRequest]) + .out(jsonBody[BiosampleOperationResponse]) + .errorOut(stringBody) + .description("Updates an existing Citizen Biosample using Optimistic Locking (via atCid).") + .summary("Update Citizen Biosample") + .tag("Citizen Biosamples") + } + + private val deleteBiosample: PublicEndpoint[UUID, String, Unit, Any] = { + endpoint + .delete + .in("api" / "external-biosamples" / path[UUID]("sampleGuid")) + .out(statusCode(sttp.model.StatusCode.NoContent)) + .errorOut(stringBody) + .description("Soft deletes a Citizen Biosample.") + .summary("Delete Citizen Biosample") + .tag("Citizen Biosamples") + } + + val all: List[PublicEndpoint[_, _, _, _]] = List( + createBiosample, + updateBiosample, + deleteBiosample + ) +} diff --git a/app/api/ProjectEndpoints.scala b/app/api/ProjectEndpoints.scala new file mode 100644 index 0000000..f9c687e --- /dev/null +++ b/app/api/ProjectEndpoints.scala @@ -0,0 +1,51 @@ +package api + +import models.api.{ProjectRequest, ProjectResponse} +import sttp.tapir.* +import sttp.tapir.generic.auto.* +import sttp.tapir.json.play.* +import java.util.UUID + +object ProjectEndpoints { + + private val createProject: PublicEndpoint[ProjectRequest, String, ProjectResponse, Any] = { + endpoint + .post + .in("api" / "projects") + .in(jsonBody[ProjectRequest]) + .out(jsonBody[ProjectResponse]) + .errorOut(stringBody) + .description("Creates a new Project.") + .summary("Create Project") + .tag("Projects") + } + + private val updateProject: PublicEndpoint[(UUID, ProjectRequest), String, ProjectResponse, Any] = { + endpoint + .put + .in("api" / "projects" / path[UUID]("projectGuid")) + .in(jsonBody[ProjectRequest]) + .out(jsonBody[ProjectResponse]) + .errorOut(stringBody) + .description("Updates an existing Project using Optimistic Locking (via atCid).") + .summary("Update Project") + .tag("Projects") + } + + private val deleteProject: PublicEndpoint[UUID, String, Unit, Any] = { + endpoint + .delete + .in("api" / "projects" / path[UUID]("projectGuid")) + .out(statusCode(sttp.model.StatusCode.NoContent)) + .errorOut(stringBody) + .description("Soft deletes a Project.") + .summary("Delete Project") + .tag("Projects") + } + + val all: List[PublicEndpoint[_, _, _, _]] = List( + createProject, + updateProject, + deleteProject + ) +} diff --git a/app/controllers/ApiRouter.scala b/app/controllers/ApiRouter.scala index 89e8774..ee7e16e 100644 --- a/app/controllers/ApiRouter.scala +++ b/app/controllers/ApiRouter.scala @@ -28,7 +28,8 @@ class ApiRouter @Inject()(cc: ControllerComponents, configuration: play.api.Conf SwaggerInterpreter().fromEndpoints[Future]( endpoints = _root_.api.ReferenceEndpoints.all ++ _root_.api.HaplogroupEndpoints.all ++ _root_.api.SampleEndpoints.all ++ _root_.api.CoverageEndpoints.all - ++ _root_.api.SequencerEndpoints.all, + ++ _root_.api.SequencerEndpoints.all ++ _root_.api.CitizenBiosampleEndpoints.all + ++ _root_.api.ProjectEndpoints.all, info = apiInfo ) diff --git a/app/models/api/BiosampleOperationResponse.scala b/app/models/api/BiosampleOperationResponse.scala new file mode 100644 index 0000000..6d0b5f9 --- /dev/null +++ b/app/models/api/BiosampleOperationResponse.scala @@ -0,0 +1,10 @@ +package models.api + +import play.api.libs.json.{Json, OFormat} +import java.util.UUID + +case class BiosampleOperationResponse(status: String, guid: UUID) + +object BiosampleOperationResponse { + implicit val format: OFormat[BiosampleOperationResponse] = Json.format[BiosampleOperationResponse] +} From 8b123e27ae2102071872d98095dad106a9f09dba Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 07:00:24 -0600 Subject: [PATCH 17/31] Update Atmosphere_Lexicon to make citizenDid a required field --- documents/Atmosphere_Lexicon.md | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/documents/Atmosphere_Lexicon.md b/documents/Atmosphere_Lexicon.md index 51bfc89..4c0f8f6 100644 --- a/documents/Atmosphere_Lexicon.md +++ b/documents/Atmosphere_Lexicon.md @@ -77,19 +77,23 @@ This record represents a single biological sample processed by a BGS node. It ma "key": "tid", "record": { "type": "object", - "required": ["sampleAccession", "donorIdentifier", "centerName", "sequenceData"], - "properties": { - "sampleAccession": { - "type": "string", - "description": "Unique identifier for the sample (e.g., UUID from BGS)." - }, - "donorIdentifier": { - "type": "string", - "description": "Identifier for the specimen donor within the user's context." - }, - "description": { - "type": "string", - "description": "Human-readable description of the sample." + "required": ["sampleAccession", "donorIdentifier", "centerName", "sequenceData", "citizenDid"], + "properties": { + "sampleAccession": { + "type": "string", + "description": "Native identifier provided by the client for the biosample." + }, + "donorIdentifier": { + "type": "string", + "description": "Identifier for the specimen donor within the user's context." + }, + "citizenDid": { + "type": "string", + "description": "The Decentralized Identifier (DID) of the citizen/researcher who owns this biosample record." + }, + "description": { + "type": "string", + "description": "Human-readable description of the sample." }, "centerName": { "type": "string", From 9fc45c92f011c1d92b71ef7785e78e63ac9a0953 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 07:06:34 -0600 Subject: [PATCH 18/31] Add atUri field to record schema in Atmosphere_Lexicon.md --- documents/Atmosphere_Lexicon.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/documents/Atmosphere_Lexicon.md b/documents/Atmosphere_Lexicon.md index 4c0f8f6..1f692f3 100644 --- a/documents/Atmosphere_Lexicon.md +++ b/documents/Atmosphere_Lexicon.md @@ -91,6 +91,10 @@ This record represents a single biological sample processed by a BGS node. It ma "type": "string", "description": "The Decentralized Identifier (DID) of the citizen/researcher who owns this biosample record." }, + "atUri": { + "type": "string", + "description": "The AT URI (at://did/collection/rkey) of this biosample record, assigned by the PDS. This uniquely identifies the record within the AT Protocol network." + }, "description": { "type": "string", "description": "Human-readable description of the sample." From a9e97d4976386b65be8d7e0e163e55a46e858ccf Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 07:12:23 -0600 Subject: [PATCH 19/31] Refactor CitizenBiosample to use atUri instead of citizenBiosampleDid --- app/models/api/ExternalBiosampleRequest.scala | 1 + .../dal/domain/genomics/CitizenBiosamplesTable.scala | 4 ++-- app/models/domain/genomics/CitizenBiosample.scala | 2 +- app/repositories/CitizenBiosampleRepository.scala | 10 +++++----- app/services/CitizenBiosampleService.scala | 4 ++-- conf/evolutions/default/22.sql | 2 ++ 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/app/models/api/ExternalBiosampleRequest.scala b/app/models/api/ExternalBiosampleRequest.scala index 5d6dcd2..3792e91 100644 --- a/app/models/api/ExternalBiosampleRequest.scala +++ b/app/models/api/ExternalBiosampleRequest.scala @@ -32,6 +32,7 @@ case class ExternalBiosampleRequest( latitude: Option[Double], longitude: Option[Double], citizenDid: Option[String], + atUri: Option[String], donorIdentifier: Option[String], donorType: Option[BiosampleType], publication: Option[PublicationInfo], diff --git a/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala b/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala index e9a3325..57873c4 100644 --- a/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala +++ b/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala @@ -8,7 +8,7 @@ import java.util.UUID class CitizenBiosamplesTable(tag: Tag) extends Table[CitizenBiosample](tag, "citizen_biosample") { def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def citizenBiosampleDid = column[Option[String]]("citizen_biosample_did", O.Unique) + def atUri = column[Option[String]]("at_uri", O.Unique) def accession = column[Option[String]]("accession") def alias = column[Option[String]]("alias") def sourcePlatform = column[Option[String]]("source_platform") @@ -25,7 +25,7 @@ class CitizenBiosamplesTable(tag: Tag) extends Table[CitizenBiosample](tag, "cit def * = ( id.?, - citizenBiosampleDid, + atUri, accession, alias, sourcePlatform, diff --git a/app/models/domain/genomics/CitizenBiosample.scala b/app/models/domain/genomics/CitizenBiosample.scala index 96e52b8..5ee6124 100644 --- a/app/models/domain/genomics/CitizenBiosample.scala +++ b/app/models/domain/genomics/CitizenBiosample.scala @@ -10,7 +10,7 @@ import java.util.UUID */ case class CitizenBiosample( id: Option[Int] = None, - citizenBiosampleDid: Option[String], + atUri: Option[String], accession: Option[String], alias: Option[String], sourcePlatform: Option[String], diff --git a/app/repositories/CitizenBiosampleRepository.scala b/app/repositories/CitizenBiosampleRepository.scala index d100c6f..76d134c 100644 --- a/app/repositories/CitizenBiosampleRepository.scala +++ b/app/repositories/CitizenBiosampleRepository.scala @@ -13,7 +13,7 @@ import scala.concurrent.{ExecutionContext, Future} trait CitizenBiosampleRepository { def create(biosample: CitizenBiosample): Future[CitizenBiosample] def findByGuid(guid: UUID): Future[Option[CitizenBiosample]] - def findByDid(did: String): Future[Option[CitizenBiosample]] + def findByAtUri(atUri: String): Future[Option[CitizenBiosample]] def findByAccession(accession: String): Future[Option[CitizenBiosample]] /** @@ -44,8 +44,8 @@ class CitizenBiosampleRepositoryImpl @Inject()( db.run(citizenBiosamples.filter(b => b.sampleGuid === guid && !b.deleted).result.headOption) } - override def findByDid(did: String): Future[Option[CitizenBiosample]] = { - db.run(citizenBiosamples.filter(b => b.citizenBiosampleDid === did && !b.deleted).result.headOption) + override def findByAtUri(atUri: String): Future[Option[CitizenBiosample]] = { + db.run(citizenBiosamples.filter(b => b.atUri === atUri && !b.deleted).result.headOption) } override def findByAccession(accession: String): Future[Option[CitizenBiosample]] = { @@ -59,7 +59,7 @@ class CitizenBiosampleRepositoryImpl @Inject()( } val updateAction = query.map(b => ( - b.citizenBiosampleDid, + b.atUri, b.accession, b.alias, b.sourcePlatform, @@ -71,7 +71,7 @@ class CitizenBiosampleRepositoryImpl @Inject()( b.updatedAt, b.deleted )).update(( - biosample.citizenBiosampleDid, + biosample.atUri, biosample.accession, biosample.alias, biosample.sourcePlatform, diff --git a/app/services/CitizenBiosampleService.scala b/app/services/CitizenBiosampleService.scala index b30302a..ba8a456 100644 --- a/app/services/CitizenBiosampleService.scala +++ b/app/services/CitizenBiosampleService.scala @@ -34,7 +34,7 @@ class CitizenBiosampleService @Inject()( val citizenBiosample = CitizenBiosample( id = None, - citizenBiosampleDid = request.citizenDid, + atUri = request.atUri, accession = Some(request.sampleAccession), alias = request.alias, sourcePlatform = Some(request.sourceSystem), @@ -72,7 +72,7 @@ class CitizenBiosampleService @Inject()( sourcePlatform = Some(request.sourceSystem), sex = request.sex, geocoord = geocoord, - citizenBiosampleDid = request.citizenDid, + atUri = request.atUri, accession = Some(request.sampleAccession), atCid = newAtCid, updatedAt = LocalDateTime.now() diff --git a/conf/evolutions/default/22.sql b/conf/evolutions/default/22.sql index 9706700..9876442 100644 --- a/conf/evolutions/default/22.sql +++ b/conf/evolutions/default/22.sql @@ -1,4 +1,5 @@ # --- !Ups +ALTER TABLE citizen_biosample RENAME COLUMN citizen_biosample_did TO at_uri; ALTER TABLE citizen_biosample ADD COLUMN deleted BOOLEAN DEFAULT false NOT NULL; ALTER TABLE citizen_biosample ADD COLUMN at_cid VARCHAR(255); ALTER TABLE citizen_biosample ADD COLUMN created_at TIMESTAMP DEFAULT now() NOT NULL; @@ -49,3 +50,4 @@ ALTER TABLE citizen_biosample DROP COLUMN updated_at; ALTER TABLE citizen_biosample DROP COLUMN created_at; ALTER TABLE citizen_biosample DROP COLUMN at_cid; ALTER TABLE citizen_biosample DROP COLUMN deleted; +ALTER TABLE citizen_biosample RENAME COLUMN at_uri TO citizen_biosample_did; From 43a5ed5e940e8f2fc0278980402693d10af860c1 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 07:20:07 -0600 Subject: [PATCH 20/31] Refactor API to use atUri for Citizen Biosamples and Projects --- app/api/CitizenBiosampleEndpoints.scala | 8 ++++---- app/api/ProjectEndpoints.scala | 8 ++++---- app/controllers/CitizenBiosampleController.scala | 12 ++++++------ app/controllers/ProjectController.scala | 8 ++++---- app/models/api/ProjectRequest.scala | 1 + app/models/dal/domain/ProjectTable.scala | 2 ++ app/models/domain/Project.scala | 1 + app/repositories/CitizenBiosampleRepository.scala | 8 ++++++++ app/repositories/ProjectRepository.scala | 15 +++++++++++++++ app/services/CitizenBiosampleService.scala | 12 ++++++------ app/services/ProjectService.scala | 12 +++++++----- conf/evolutions/default/22.sql | 3 +++ conf/routes | 8 ++++---- 13 files changed, 65 insertions(+), 33 deletions(-) diff --git a/app/api/CitizenBiosampleEndpoints.scala b/app/api/CitizenBiosampleEndpoints.scala index f535ffa..e5ae79b 100644 --- a/app/api/CitizenBiosampleEndpoints.scala +++ b/app/api/CitizenBiosampleEndpoints.scala @@ -20,10 +20,10 @@ object CitizenBiosampleEndpoints { .tag("Citizen Biosamples") } - private val updateBiosample: PublicEndpoint[(UUID, ExternalBiosampleRequest), String, BiosampleOperationResponse, Any] = { + private val updateBiosample: PublicEndpoint[(String, ExternalBiosampleRequest), String, BiosampleOperationResponse, Any] = { endpoint .put - .in("api" / "external-biosamples" / path[UUID]("sampleGuid")) + .in("api" / "external-biosamples" / path[String]("atUri")) .in(jsonBody[ExternalBiosampleRequest]) .out(jsonBody[BiosampleOperationResponse]) .errorOut(stringBody) @@ -32,10 +32,10 @@ object CitizenBiosampleEndpoints { .tag("Citizen Biosamples") } - private val deleteBiosample: PublicEndpoint[UUID, String, Unit, Any] = { + private val deleteBiosample: PublicEndpoint[String, String, Unit, Any] = { endpoint .delete - .in("api" / "external-biosamples" / path[UUID]("sampleGuid")) + .in("api" / "external-biosamples" / path[String]("atUri")) .out(statusCode(sttp.model.StatusCode.NoContent)) .errorOut(stringBody) .description("Soft deletes a Citizen Biosample.") diff --git a/app/api/ProjectEndpoints.scala b/app/api/ProjectEndpoints.scala index f9c687e..794421b 100644 --- a/app/api/ProjectEndpoints.scala +++ b/app/api/ProjectEndpoints.scala @@ -20,10 +20,10 @@ object ProjectEndpoints { .tag("Projects") } - private val updateProject: PublicEndpoint[(UUID, ProjectRequest), String, ProjectResponse, Any] = { + private val updateProject: PublicEndpoint[(String, ProjectRequest), String, ProjectResponse, Any] = { endpoint .put - .in("api" / "projects" / path[UUID]("projectGuid")) + .in("api" / "projects" / path[String]("atUri")) .in(jsonBody[ProjectRequest]) .out(jsonBody[ProjectResponse]) .errorOut(stringBody) @@ -32,10 +32,10 @@ object ProjectEndpoints { .tag("Projects") } - private val deleteProject: PublicEndpoint[UUID, String, Unit, Any] = { + private val deleteProject: PublicEndpoint[String, String, Unit, Any] = { endpoint .delete - .in("api" / "projects" / path[UUID]("projectGuid")) + .in("api" / "projects" / path[String]("atUri")) .out(statusCode(sttp.model.StatusCode.NoContent)) .errorOut(stringBody) .description("Soft deletes a Project.") diff --git a/app/controllers/CitizenBiosampleController.scala b/app/controllers/CitizenBiosampleController.scala index 740f477..0d40f29 100644 --- a/app/controllers/CitizenBiosampleController.scala +++ b/app/controllers/CitizenBiosampleController.scala @@ -41,8 +41,8 @@ class CitizenBiosampleController @Inject()( } } - def update(sampleGuid: UUID): Action[ExternalBiosampleRequest] = secureApi.jsonAction[ExternalBiosampleRequest].async { request => - citizenBiosampleService.updateBiosample(sampleGuid, request.body).map { guid => + def update(atUri: String): Action[ExternalBiosampleRequest] = secureApi.jsonAction[ExternalBiosampleRequest].async { request => + citizenBiosampleService.updateBiosample(atUri, request.body).map { guid => Ok(Json.obj( "status" -> "success", "guid" -> guid @@ -59,10 +59,10 @@ class CitizenBiosampleController @Inject()( } } - def delete(sampleGuid: UUID): Action[AnyContent] = secureApi.async { request => - citizenBiosampleService.deleteBiosample(sampleGuid).map { + def delete(atUri: String): Action[AnyContent] = secureApi.async { request => + citizenBiosampleService.deleteBiosample(atUri).map { case true => NoContent - case false => NotFound(Json.obj("error" -> "Biosample not found", "message" -> s"Biosample with GUID '$sampleGuid' not found.")) + case false => NotFound(Json.obj("error" -> "Biosample not found", "message" -> s"Biosample with atUri '$atUri' not found.")) }.recover { case e: Exception => InternalServerError(Json.obj( @@ -71,4 +71,4 @@ class CitizenBiosampleController @Inject()( )) } } -} +} \ No newline at end of file diff --git a/app/controllers/ProjectController.scala b/app/controllers/ProjectController.scala index d7abd85..9454a4e 100644 --- a/app/controllers/ProjectController.scala +++ b/app/controllers/ProjectController.scala @@ -25,8 +25,8 @@ class ProjectController @Inject()( } } - def update(projectGuid: UUID): Action[ProjectRequest] = secureApi.jsonAction[ProjectRequest].async { request => - projectService.updateProject(projectGuid, request.body).map { response => + def update(atUri: String): Action[ProjectRequest] = secureApi.jsonAction[ProjectRequest].async { request => + projectService.updateProject(atUri, request.body).map { response => Ok(Json.toJson(response)) }.recover { case e: IllegalStateException => Conflict(Json.obj("error" -> e.getMessage)) @@ -35,8 +35,8 @@ class ProjectController @Inject()( } } - def delete(projectGuid: UUID): Action[AnyContent] = secureApi.async { request => - projectService.deleteProject(projectGuid).map { + def delete(atUri: String): Action[AnyContent] = secureApi.async { request => + projectService.deleteProject(atUri).map { case true => NoContent case false => NotFound(Json.obj("error" -> "Project not found")) }.recover { diff --git a/app/models/api/ProjectRequest.scala b/app/models/api/ProjectRequest.scala index b43b00d..93ce61c 100644 --- a/app/models/api/ProjectRequest.scala +++ b/app/models/api/ProjectRequest.scala @@ -6,6 +6,7 @@ import java.util.UUID case class ProjectRequest( name: String, description: Option[String] = None, + atUri: Option[String] = None, atCid: Option[String] = None ) diff --git a/app/models/dal/domain/ProjectTable.scala b/app/models/dal/domain/ProjectTable.scala index 4544755..6e3533f 100644 --- a/app/models/dal/domain/ProjectTable.scala +++ b/app/models/dal/domain/ProjectTable.scala @@ -14,6 +14,7 @@ class ProjectTable(tag: Tag) extends Table[Project](tag, "project") { def createdAt = column[LocalDateTime]("created_at") def updatedAt = column[LocalDateTime]("updated_at") def deleted = column[Boolean]("deleted", O.Default(false)) + def atUri = column[Option[String]]("at_uri") def atCid = column[Option[String]]("at_cid") def * = ( @@ -25,6 +26,7 @@ class ProjectTable(tag: Tag) extends Table[Project](tag, "project") { createdAt, updatedAt, deleted, + atUri, atCid ).mapTo[Project] } diff --git a/app/models/domain/Project.scala b/app/models/domain/Project.scala index e2a2d0d..c3f2f34 100644 --- a/app/models/domain/Project.scala +++ b/app/models/domain/Project.scala @@ -12,5 +12,6 @@ case class Project( createdAt: LocalDateTime, updatedAt: LocalDateTime, deleted: Boolean = false, + atUri: Option[String] = None, atCid: Option[String] = None ) diff --git a/app/repositories/CitizenBiosampleRepository.scala b/app/repositories/CitizenBiosampleRepository.scala index 76d134c..4913e2f 100644 --- a/app/repositories/CitizenBiosampleRepository.scala +++ b/app/repositories/CitizenBiosampleRepository.scala @@ -25,6 +25,7 @@ trait CitizenBiosampleRepository { def update(biosample: CitizenBiosample, expectedAtCid: Option[String]): Future[Boolean] def softDelete(guid: UUID): Future[Boolean] + def softDeleteByAtUri(atUri: String): Future[Boolean] } @Singleton @@ -93,4 +94,11 @@ class CitizenBiosampleRepositoryImpl @Inject()( .update((true, LocalDateTime.now())) db.run(q.map(_ > 0)) } + + override def softDeleteByAtUri(atUri: String): Future[Boolean] = { + val q = citizenBiosamples.filter(_.atUri === atUri) + .map(b => (b.deleted, b.updatedAt)) + .update((true, LocalDateTime.now())) + db.run(q.map(_ > 0)) + } } diff --git a/app/repositories/ProjectRepository.scala b/app/repositories/ProjectRepository.scala index b65fc3a..5c01380 100644 --- a/app/repositories/ProjectRepository.scala +++ b/app/repositories/ProjectRepository.scala @@ -13,8 +13,10 @@ import scala.concurrent.{ExecutionContext, Future} trait ProjectRepository { def create(project: Project): Future[Project] def findByProjectGuid(projectGuid: UUID): Future[Option[Project]] + def findByAtUri(atUri: String): Future[Option[Project]] def update(project: Project, expectedAtCid: Option[String]): Future[Boolean] def softDelete(projectGuid: UUID): Future[Boolean] + def softDeleteByAtUri(atUri: String): Future[Boolean] } @Singleton @@ -34,6 +36,10 @@ class ProjectRepositoryImpl @Inject()( db.run(projects.filter(p => p.projectGuid === projectGuid && !p.deleted).result.headOption) } + override def findByAtUri(atUri: String): Future[Option[Project]] = { + db.run(projects.filter(p => p.atUri === atUri && !p.deleted).result.headOption) + } + override def update(project: Project, expectedAtCid: Option[String]): Future[Boolean] = { val query = projects.filter { p => p.projectGuid === project.projectGuid && @@ -44,6 +50,7 @@ class ProjectRepositoryImpl @Inject()( p.name, p.description, p.ownerDid, + p.atUri, p.atCid, p.updatedAt, p.deleted @@ -51,6 +58,7 @@ class ProjectRepositoryImpl @Inject()( project.name, project.description, project.ownerDid, + project.atUri, project.atCid, LocalDateTime.now(), project.deleted @@ -65,4 +73,11 @@ class ProjectRepositoryImpl @Inject()( .update((true, LocalDateTime.now())) db.run(q.map(_ > 0)) } + + override def softDeleteByAtUri(atUri: String): Future[Boolean] = { + val q = projects.filter(_.atUri === atUri) + .map(p => (p.deleted, p.updatedAt)) + .update((true, LocalDateTime.now())) + db.run(q.map(_ > 0)) + } } diff --git a/app/services/CitizenBiosampleService.scala b/app/services/CitizenBiosampleService.scala index ba8a456..26e7060 100644 --- a/app/services/CitizenBiosampleService.scala +++ b/app/services/CitizenBiosampleService.scala @@ -57,9 +57,9 @@ class CitizenBiosampleService @Inject()( } } - def updateBiosample(sampleGuid: UUID, request: ExternalBiosampleRequest): Future[UUID] = { + def updateBiosample(atUri: String, request: ExternalBiosampleRequest): Future[UUID] = { validateCoordinates(request.latitude, request.longitude).flatMap { geocoord => - citizenBiosampleRepository.findByGuid(sampleGuid).flatMap { + citizenBiosampleRepository.findByAtUri(atUri).flatMap { case Some(existing) => // Optimistic Locking Check if (request.atCid.isDefined && request.atCid != existing.atCid) { @@ -87,7 +87,7 @@ class CitizenBiosampleService @Inject()( } } case None => - Future.failed(new NoSuchElementException(s"Biosample not found for GUID: $sampleGuid")) + Future.failed(new NoSuchElementException(s"Biosample not found for atUri: $atUri")) } } } @@ -159,7 +159,7 @@ class CitizenBiosampleService @Inject()( } yield () } - def deleteBiosample(sampleGuid: UUID): Future[Boolean] = { - citizenBiosampleRepository.softDelete(sampleGuid) + def deleteBiosample(atUri: String): Future[Boolean] = { + citizenBiosampleRepository.softDeleteByAtUri(atUri) } -} +} \ No newline at end of file diff --git a/app/services/ProjectService.scala b/app/services/ProjectService.scala index 81cf647..57312b3 100644 --- a/app/services/ProjectService.scala +++ b/app/services/ProjectService.scala @@ -24,14 +24,15 @@ class ProjectService @Inject()( createdAt = LocalDateTime.now(), updatedAt = LocalDateTime.now(), deleted = false, + atUri = request.atUri, atCid = Some(UUID.randomUUID().toString) ) projectRepository.create(project).map(toResponse) } - def updateProject(projectGuid: UUID, request: ProjectRequest): Future[ProjectResponse] = { - projectRepository.findByProjectGuid(projectGuid).flatMap { + def updateProject(atUri: String, request: ProjectRequest): Future[ProjectResponse] = { + projectRepository.findByAtUri(atUri).flatMap { case Some(existing) => if (request.atCid.isDefined && request.atCid != existing.atCid) { Future.failed(new IllegalStateException(s"Optimistic locking failure: atCid mismatch.")) @@ -39,6 +40,7 @@ class ProjectService @Inject()( val updatedProject = existing.copy( name = request.name, description = request.description, + atUri = request.atUri, updatedAt = LocalDateTime.now(), atCid = Some(UUID.randomUUID().toString) ) @@ -51,12 +53,12 @@ class ProjectService @Inject()( } } case None => - Future.failed(new NoSuchElementException(s"Project not found for GUID: $projectGuid")) + Future.failed(new NoSuchElementException(s"Project not found for atUri: $atUri")) } } - def deleteProject(projectGuid: UUID): Future[Boolean] = { - projectRepository.softDelete(projectGuid) + def deleteProject(atUri: String): Future[Boolean] = { + projectRepository.softDeleteByAtUri(atUri) } private def toResponse(p: Project): ProjectResponse = { diff --git a/conf/evolutions/default/22.sql b/conf/evolutions/default/22.sql index 9876442..76a2526 100644 --- a/conf/evolutions/default/22.sql +++ b/conf/evolutions/default/22.sql @@ -36,9 +36,12 @@ CREATE TABLE project created_at TIMESTAMP NOT NULL DEFAULT now(), updated_at TIMESTAMP NOT NULL DEFAULT now(), deleted BOOLEAN DEFAULT false NOT NULL, + at_uri VARCHAR(255), at_cid VARCHAR(255) ); +CREATE UNIQUE INDEX project_at_uri_uindex ON project (at_uri); + # --- !Downs DROP TABLE project; DROP TABLE citizen_biosample_original_haplogroup; diff --git a/conf/routes b/conf/routes index 3d36d09..a4da8c0 100644 --- a/conf/routes +++ b/conf/routes @@ -90,13 +90,13 @@ DELETE /api/private/sequencing-labs/:id # Citizen Biosample API endpoints (Firehose) POST /api/external-biosamples controllers.CitizenBiosampleController.create -PUT /api/external-biosamples/:sampleGuid controllers.CitizenBiosampleController.update(sampleGuid: java.util.UUID) -DELETE /api/external-biosamples/:sampleGuid controllers.CitizenBiosampleController.delete(sampleGuid: java.util.UUID) +PUT /api/external-biosamples/*atUri controllers.CitizenBiosampleController.update(atUri: String) +DELETE /api/external-biosamples/*atUri controllers.CitizenBiosampleController.delete(atUri: String) # Project API endpoints POST /api/projects controllers.ProjectController.create -PUT /api/projects/:projectGuid controllers.ProjectController.update(projectGuid: java.util.UUID) -DELETE /api/projects/:projectGuid controllers.ProjectController.delete(projectGuid: java.util.UUID) +PUT /api/projects/*atUri controllers.ProjectController.update(atUri: String) +DELETE /api/projects/*atUri controllers.ProjectController.delete(atUri: String) # --- API Routes (Handled by Tapir, including Swagger UI) --- POST /api/registerPDS controllers.PDSRegistrationController.registerPDS() From 34e5b46d348d727e15250b7d6d4030f442ac1ea8 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 07:21:33 -0600 Subject: [PATCH 21/31] Require atUri in Atmosphere Lexicon for biosample and project records --- documents/Atmosphere_Lexicon.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/documents/Atmosphere_Lexicon.md b/documents/Atmosphere_Lexicon.md index 1f692f3..9c0d708 100644 --- a/documents/Atmosphere_Lexicon.md +++ b/documents/Atmosphere_Lexicon.md @@ -77,9 +77,9 @@ This record represents a single biological sample processed by a BGS node. It ma "key": "tid", "record": { "type": "object", - "required": ["sampleAccession", "donorIdentifier", "centerName", "sequenceData", "citizenDid"], - "properties": { - "sampleAccession": { + "required": ["sampleAccession", "donorIdentifier", "centerName", "sequenceData", "citizenDid", "atUri"], + "properties": { + "sampleAccession": { "type": "string", "description": "Native identifier provided by the client for the biosample." }, @@ -391,8 +391,12 @@ This record defines a research project that aggregates multiple biosamples withi "key": "tid", "record": { "type": "object", - "required": ["projectName", "administrator", "members"], + "required": ["projectName", "administrator", "members", "atUri"], "properties": { + "atUri": { + "type": "string", + "description": "The AT URI (at://did/collection/rkey) of this project record, assigned by the PDS." + }, "projectName": { "type": "string", "description": "Name of the project (e.g., 'Smith Surname Project')." From 302e2ba624d1c70415d483c9a40a8425e81d6be1 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 07:28:16 -0600 Subject: [PATCH 22/31] Update haplogroup data structures to use robust JSONB model across all biosample types --- app/models/api/ExternalBiosampleRequest.scala | 6 +++--- app/models/dal/MyPostgresProfile.scala | 4 ++++ .../BiosampleOriginalHaplogroupTable.scala | 5 +++-- ...itizenBiosampleOriginalHaplogroupTable.scala | 5 +++-- .../domain/genomics/HaplogroupResult.scala | 17 +++++++++++++++++ .../BiosampleOriginalHaplogroup.scala | 6 ++++-- .../CitizenBiosampleOriginalHaplogroup.scala | 6 ++++-- conf/evolutions/default/22.sql | 4 ++-- conf/evolutions/default/23.sql | 7 +++++++ 9 files changed, 47 insertions(+), 13 deletions(-) create mode 100644 app/models/domain/genomics/HaplogroupResult.scala create mode 100644 conf/evolutions/default/23.sql diff --git a/app/models/api/ExternalBiosampleRequest.scala b/app/models/api/ExternalBiosampleRequest.scala index 3792e91..a82b651 100644 --- a/app/models/api/ExternalBiosampleRequest.scala +++ b/app/models/api/ExternalBiosampleRequest.scala @@ -1,6 +1,6 @@ package models.api -import models.domain.genomics.{BiologicalSex, BiosampleType} +import models.domain.genomics.{BiologicalSex, BiosampleType, HaplogroupResult} import play.api.libs.json.{Json, OFormat} import java.time.LocalDateTime @@ -77,8 +77,8 @@ object PublicationInfo { * about the haplogroup or its context. */ case class HaplogroupInfo( - yHaplogroup: Option[String], - mtHaplogroup: Option[String], + yHaplogroup: Option[HaplogroupResult], + mtHaplogroup: Option[HaplogroupResult], notes: Option[String] ) diff --git a/app/models/dal/MyPostgresProfile.scala b/app/models/dal/MyPostgresProfile.scala index 6aef564..fd8b024 100644 --- a/app/models/dal/MyPostgresProfile.scala +++ b/app/models/dal/MyPostgresProfile.scala @@ -94,6 +94,10 @@ trait MyPostgresProfile extends ExPostgresProfile with SearchAssistants { import models.HaplogroupType + import models.domain.genomics.HaplogroupResult + + implicit val haplogroupResultJsonTypeMapper: JdbcType[HaplogroupResult] with BaseTypedType[HaplogroupResult] = + MappedJdbcType.base[HaplogroupResult, JsValue](Json.toJson(_), _.as[HaplogroupResult]) implicit val haplogroupTypeMapper: JdbcType[HaplogroupType] = MappedColumnType.base[HaplogroupType, String]( diff --git a/app/models/dal/domain/publications/BiosampleOriginalHaplogroupTable.scala b/app/models/dal/domain/publications/BiosampleOriginalHaplogroupTable.scala index f0b785d..4e5e582 100644 --- a/app/models/dal/domain/publications/BiosampleOriginalHaplogroupTable.scala +++ b/app/models/dal/domain/publications/BiosampleOriginalHaplogroupTable.scala @@ -2,6 +2,7 @@ package models.dal.domain.publications import models.dal.MyPostgresProfile.api.* import models.dal.domain.genomics.BiosamplesTable +import models.domain.genomics.HaplogroupResult import models.domain.publications.BiosampleOriginalHaplogroup /** @@ -33,9 +34,9 @@ class BiosampleOriginalHaplogroupTable(tag: Tag) def publicationId = column[Int]("publication_id") - def originalYHaplogroup = column[Option[String]]("original_y_haplogroup") + def originalYHaplogroup = column[Option[HaplogroupResult]]("y_haplogroup_result") - def originalMtHaplogroup = column[Option[String]]("original_mt_haplogroup") + def originalMtHaplogroup = column[Option[HaplogroupResult]]("mt_haplogroup_result") def notes = column[Option[String]]("notes") diff --git a/app/models/dal/domain/publications/CitizenBiosampleOriginalHaplogroupTable.scala b/app/models/dal/domain/publications/CitizenBiosampleOriginalHaplogroupTable.scala index 25e4cd3..112e063 100644 --- a/app/models/dal/domain/publications/CitizenBiosampleOriginalHaplogroupTable.scala +++ b/app/models/dal/domain/publications/CitizenBiosampleOriginalHaplogroupTable.scala @@ -2,6 +2,7 @@ package models.dal.domain.publications import models.dal.MyPostgresProfile.api.* import models.dal.domain.genomics.CitizenBiosamplesTable +import models.domain.genomics.HaplogroupResult import models.domain.publications.CitizenBiosampleOriginalHaplogroup class CitizenBiosampleOriginalHaplogroupTable(tag: Tag) @@ -10,8 +11,8 @@ class CitizenBiosampleOriginalHaplogroupTable(tag: Tag) def id = column[Int]("id", O.PrimaryKey, O.AutoInc) def citizenBiosampleId = column[Int]("citizen_biosample_id") def publicationId = column[Int]("publication_id") - def originalYHaplogroup = column[Option[String]]("original_y_haplogroup") - def originalMtHaplogroup = column[Option[String]]("original_mt_haplogroup") + def originalYHaplogroup = column[Option[HaplogroupResult]]("y_haplogroup_result") + def originalMtHaplogroup = column[Option[HaplogroupResult]]("mt_haplogroup_result") def notes = column[Option[String]]("notes") // Foreign key relationships diff --git a/app/models/domain/genomics/HaplogroupResult.scala b/app/models/domain/genomics/HaplogroupResult.scala new file mode 100644 index 0000000..13c9f2e --- /dev/null +++ b/app/models/domain/genomics/HaplogroupResult.scala @@ -0,0 +1,17 @@ +package models.domain.genomics + +import play.api.libs.json.{Json, OFormat} + +case class HaplogroupResult( + haplogroupName: String, + score: Double, + matchingSnps: Int, + mismatchingSnps: Int, + ancestralMatches: Int, + treeDepth: Int, + lineagePath: Seq[String] +) + +object HaplogroupResult { + implicit val format: OFormat[HaplogroupResult] = Json.format[HaplogroupResult] +} diff --git a/app/models/domain/publications/BiosampleOriginalHaplogroup.scala b/app/models/domain/publications/BiosampleOriginalHaplogroup.scala index 2644cdb..bdcc8d3 100644 --- a/app/models/domain/publications/BiosampleOriginalHaplogroup.scala +++ b/app/models/domain/publications/BiosampleOriginalHaplogroup.scala @@ -1,5 +1,7 @@ package models.domain.publications +import models.domain.genomics.HaplogroupResult + /** * Represents an original haplogroup assignment for a biosample from a specific publication. * @@ -14,8 +16,8 @@ case class BiosampleOriginalHaplogroup( id: Option[Int] = None, biosampleId: Int, publicationId: Int, - originalYHaplogroup: Option[String], - originalMtHaplogroup: Option[String], + originalYHaplogroup: Option[HaplogroupResult], + originalMtHaplogroup: Option[HaplogroupResult], notes: Option[String] ) diff --git a/app/models/domain/publications/CitizenBiosampleOriginalHaplogroup.scala b/app/models/domain/publications/CitizenBiosampleOriginalHaplogroup.scala index 6d85fff..5b43963 100644 --- a/app/models/domain/publications/CitizenBiosampleOriginalHaplogroup.scala +++ b/app/models/domain/publications/CitizenBiosampleOriginalHaplogroup.scala @@ -1,10 +1,12 @@ package models.domain.publications +import models.domain.genomics.HaplogroupResult + case class CitizenBiosampleOriginalHaplogroup( id: Option[Int] = None, citizenBiosampleId: Int, publicationId: Int, - originalYHaplogroup: Option[String], - originalMtHaplogroup: Option[String], + originalYHaplogroup: Option[HaplogroupResult], + originalMtHaplogroup: Option[HaplogroupResult], notes: Option[String] ) diff --git a/conf/evolutions/default/22.sql b/conf/evolutions/default/22.sql index 76a2526..4b256ed 100644 --- a/conf/evolutions/default/22.sql +++ b/conf/evolutions/default/22.sql @@ -20,8 +20,8 @@ CREATE TABLE citizen_biosample_original_haplogroup id SERIAL PRIMARY KEY, citizen_biosample_id INT REFERENCES citizen_biosample (id) ON DELETE CASCADE, publication_id INT REFERENCES publication (id) ON DELETE CASCADE, - original_y_haplogroup VARCHAR(255), - original_mt_haplogroup VARCHAR(255), + y_haplogroup_result JSONB, + mt_haplogroup_result JSONB, notes TEXT, UNIQUE (citizen_biosample_id, publication_id) ); diff --git a/conf/evolutions/default/23.sql b/conf/evolutions/default/23.sql new file mode 100644 index 0000000..531c43d --- /dev/null +++ b/conf/evolutions/default/23.sql @@ -0,0 +1,7 @@ +# --- !Ups +ALTER TABLE biosample_original_haplogroup ADD COLUMN y_haplogroup_result JSONB; +ALTER TABLE biosample_original_haplogroup ADD COLUMN mt_haplogroup_result JSONB; + +# --- !Downs +ALTER TABLE biosample_original_haplogroup DROP COLUMN mt_haplogroup_result; +ALTER TABLE biosample_original_haplogroup DROP COLUMN y_haplogroup_result; From 4d62b603aa3a96a93eb0cb5338cf408ea3b52e4b Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 07:32:56 -0600 Subject: [PATCH 23/31] Fix haplogroup type mismatch errors --- app/models/api/BiosampleOriginalHaplogroupUpdate.scala | 9 +++++---- app/models/api/BiosampleUpdate.scala | 6 +++--- app/services/BiosampleUpdateService.scala | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/app/models/api/BiosampleOriginalHaplogroupUpdate.scala b/app/models/api/BiosampleOriginalHaplogroupUpdate.scala index fa5cfbc..87f7c96 100644 --- a/app/models/api/BiosampleOriginalHaplogroupUpdate.scala +++ b/app/models/api/BiosampleOriginalHaplogroupUpdate.scala @@ -1,10 +1,11 @@ package models.api +import models.domain.genomics.HaplogroupResult import play.api.libs.json.{Format, Json} case class BiosampleOriginalHaplogroupUpdate( - originalYHaplogroup: Option[String], - originalMtHaplogroup: Option[String], + originalYHaplogroup: Option[HaplogroupResult], + originalMtHaplogroup: Option[HaplogroupResult], notes: Option[String] ) @@ -16,8 +17,8 @@ case class BiosampleOriginalHaplogroupView( id: Option[Int], biosampleId: Int, publicationId: Int, - originalYHaplogroup: Option[String], - originalMtHaplogroup: Option[String], + originalYHaplogroup: Option[HaplogroupResult], + originalMtHaplogroup: Option[HaplogroupResult], notes: Option[String] ) diff --git a/app/models/api/BiosampleUpdate.scala b/app/models/api/BiosampleUpdate.scala index 6351dde..6edabc2 100644 --- a/app/models/api/BiosampleUpdate.scala +++ b/app/models/api/BiosampleUpdate.scala @@ -1,6 +1,6 @@ package models.api -import models.domain.genomics.BiologicalSex +import models.domain.genomics.{BiologicalSex, HaplogroupResult} import play.api.libs.json.{Json, Reads} /** @@ -22,8 +22,8 @@ case class BiosampleUpdate( locked: Option[Boolean] = None, dateRangeStart: Option[Int] = None, dateRangeEnd: Option[Int] = None, - yHaplogroup: Option[String] = None, - mtHaplogroup: Option[String] = None + yHaplogroup: Option[HaplogroupResult] = None, + mtHaplogroup: Option[HaplogroupResult] = None ) { def hasUpdates: Boolean = { sex.isDefined || geoCoord.isDefined || alias.isDefined || locked.isDefined || diff --git a/app/services/BiosampleUpdateService.scala b/app/services/BiosampleUpdateService.scala index 5371cf2..6280537 100644 --- a/app/services/BiosampleUpdateService.scala +++ b/app/services/BiosampleUpdateService.scala @@ -2,7 +2,7 @@ package services import jakarta.inject.{Inject, Singleton} import models.api.{BiosampleUpdate, BiosampleView} -import models.domain.genomics.{Biosample, BiosampleType, SpecimenDonor} +import models.domain.genomics.{Biosample, BiosampleType, SpecimenDonor, HaplogroupResult} import models.domain.publications.BiosampleOriginalHaplogroup import repositories.{BiosampleOriginalHaplogroupRepository, BiosampleRepository, PublicationBiosampleRepository, SpecimenDonorRepository} import utils.GeometryUtils @@ -170,4 +170,4 @@ class BiosampleUpdateService @Inject()( Future.successful(()) } } -} \ No newline at end of file +} From 1e917502c1682d1847d4c15f9f7509947e780f79 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 08:09:04 -0600 Subject: [PATCH 24/31] Support top-level haplogroup assignments for Citizen Biosamples --- app/models/api/ExternalBiosampleRequest.scala | 1 + app/models/api/HaplogroupAssignments.scala | 13 +++++++++++++ .../domain/genomics/CitizenBiosamplesTable.scala | 6 +++++- app/models/domain/genomics/CitizenBiosample.scala | 2 ++ app/repositories/CitizenBiosampleRepository.scala | 4 ++++ app/services/CitizenBiosampleService.scala | 4 ++++ conf/evolutions/default/22.sql | 4 ++++ 7 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 app/models/api/HaplogroupAssignments.scala diff --git a/app/models/api/ExternalBiosampleRequest.scala b/app/models/api/ExternalBiosampleRequest.scala index a82b651..0d9ea52 100644 --- a/app/models/api/ExternalBiosampleRequest.scala +++ b/app/models/api/ExternalBiosampleRequest.scala @@ -36,6 +36,7 @@ case class ExternalBiosampleRequest( donorIdentifier: Option[String], donorType: Option[BiosampleType], publication: Option[PublicationInfo], + haplogroups: Option[HaplogroupAssignments], sequenceData: SequenceDataInfo, atCid: Option[String] = None ) diff --git a/app/models/api/HaplogroupAssignments.scala b/app/models/api/HaplogroupAssignments.scala new file mode 100644 index 0000000..db1103c --- /dev/null +++ b/app/models/api/HaplogroupAssignments.scala @@ -0,0 +1,13 @@ +package models.api + +import models.domain.genomics.HaplogroupResult +import play.api.libs.json.{Json, OFormat} + +case class HaplogroupAssignments( + yDna: Option[HaplogroupResult], + mtDna: Option[HaplogroupResult] +) + +object HaplogroupAssignments { + implicit val format: OFormat[HaplogroupAssignments] = Json.format[HaplogroupAssignments] +} diff --git a/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala b/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala index 57873c4..65e6e6e 100644 --- a/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala +++ b/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala @@ -1,7 +1,7 @@ package models.dal.domain.genomics import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{BiologicalSex, CitizenBiosample} +import models.domain.genomics.{BiologicalSex, CitizenBiosample, HaplogroupResult} import com.vividsolutions.jts.geom.Point import java.time.{LocalDate, LocalDateTime} import java.util.UUID @@ -16,6 +16,8 @@ class CitizenBiosamplesTable(tag: Tag) extends Table[CitizenBiosample](tag, "cit def sex = column[Option[BiologicalSex]]("sex") def geocoord = column[Option[Point]]("geocoord") def description = column[Option[String]]("description") + def yHaplogroup = column[Option[HaplogroupResult]]("y_haplogroup") + def mtHaplogroup = column[Option[HaplogroupResult]]("mt_haplogroup") def sampleGuid = column[UUID]("sample_guid") def deleted = column[Boolean]("deleted", O.Default(false)) @@ -33,6 +35,8 @@ class CitizenBiosamplesTable(tag: Tag) extends Table[CitizenBiosample](tag, "cit sex, geocoord, description, + yHaplogroup, + mtHaplogroup, sampleGuid, deleted, atCid, diff --git a/app/models/domain/genomics/CitizenBiosample.scala b/app/models/domain/genomics/CitizenBiosample.scala index 5ee6124..2e62861 100644 --- a/app/models/domain/genomics/CitizenBiosample.scala +++ b/app/models/domain/genomics/CitizenBiosample.scala @@ -18,6 +18,8 @@ case class CitizenBiosample( sex: Option[BiologicalSex], geocoord: Option[Point], description: Option[String], + yHaplogroup: Option[HaplogroupResult] = None, + mtHaplogroup: Option[HaplogroupResult] = None, sampleGuid: UUID, deleted: Boolean = false, atCid: Option[String] = None, diff --git a/app/repositories/CitizenBiosampleRepository.scala b/app/repositories/CitizenBiosampleRepository.scala index 4913e2f..d054e66 100644 --- a/app/repositories/CitizenBiosampleRepository.scala +++ b/app/repositories/CitizenBiosampleRepository.scala @@ -68,6 +68,8 @@ class CitizenBiosampleRepositoryImpl @Inject()( b.sex, b.geocoord, b.description, + b.yHaplogroup, + b.mtHaplogroup, b.atCid, b.updatedAt, b.deleted @@ -80,6 +82,8 @@ class CitizenBiosampleRepositoryImpl @Inject()( biosample.sex, biosample.geocoord, biosample.description, + biosample.yHaplogroup, + biosample.mtHaplogroup, biosample.atCid, LocalDateTime.now(), biosample.deleted diff --git a/app/services/CitizenBiosampleService.scala b/app/services/CitizenBiosampleService.scala index 26e7060..4c8e5c4 100644 --- a/app/services/CitizenBiosampleService.scala +++ b/app/services/CitizenBiosampleService.scala @@ -42,6 +42,8 @@ class CitizenBiosampleService @Inject()( sex = request.sex, geocoord = geocoord, description = Some(request.description), + yHaplogroup = request.haplogroups.flatMap(_.yDna), + mtHaplogroup = request.haplogroups.flatMap(_.mtDna), sampleGuid = sampleGuid, deleted = false, atCid = newAtCid, @@ -74,6 +76,8 @@ class CitizenBiosampleService @Inject()( geocoord = geocoord, atUri = request.atUri, accession = Some(request.sampleAccession), + yHaplogroup = request.haplogroups.flatMap(_.yDna).orElse(existing.yHaplogroup), + mtHaplogroup = request.haplogroups.flatMap(_.mtDna).orElse(existing.mtHaplogroup), atCid = newAtCid, updatedAt = LocalDateTime.now() ) diff --git a/conf/evolutions/default/22.sql b/conf/evolutions/default/22.sql index 4b256ed..52ca84a 100644 --- a/conf/evolutions/default/22.sql +++ b/conf/evolutions/default/22.sql @@ -6,6 +6,8 @@ ALTER TABLE citizen_biosample ADD COLUMN created_at TIMESTAMP DEFAULT now() NOT ALTER TABLE citizen_biosample ADD COLUMN updated_at TIMESTAMP DEFAULT now() NOT NULL; ALTER TABLE citizen_biosample ADD COLUMN accession VARCHAR(255); ALTER TABLE citizen_biosample ADD COLUMN alias VARCHAR(255); +ALTER TABLE citizen_biosample ADD COLUMN y_haplogroup JSONB; +ALTER TABLE citizen_biosample ADD COLUMN mt_haplogroup JSONB; CREATE UNIQUE INDEX citizen_biosample_accession_uindex ON citizen_biosample (accession); CREATE TABLE publication_citizen_biosample @@ -47,6 +49,8 @@ DROP TABLE project; DROP TABLE citizen_biosample_original_haplogroup; DROP TABLE publication_citizen_biosample; DROP INDEX citizen_biosample_accession_uindex; +ALTER TABLE citizen_biosample DROP COLUMN mt_haplogroup; +ALTER TABLE citizen_biosample DROP COLUMN y_haplogroup; ALTER TABLE citizen_biosample DROP COLUMN alias; ALTER TABLE citizen_biosample DROP COLUMN accession; ALTER TABLE citizen_biosample DROP COLUMN updated_at; From e3c053516cf277c61ddacc9933d7e457a2df8384 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 08:16:47 -0600 Subject: [PATCH 25/31] Align SpecimenDonor and PDSRegistrationService with Lexicon atUri and full flow diagram --- app/models/dal/domain/genomics/SpecimenDonorsTable.scala | 4 ++-- app/models/domain/genomics/SpecimenDonor.scala | 2 +- app/services/PDSRegistrationService.scala | 5 ++++- conf/evolutions/default/23.sql | 2 ++ 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/app/models/dal/domain/genomics/SpecimenDonorsTable.scala b/app/models/dal/domain/genomics/SpecimenDonorsTable.scala index f53b10d..ad0dd57 100644 --- a/app/models/dal/domain/genomics/SpecimenDonorsTable.scala +++ b/app/models/dal/domain/genomics/SpecimenDonorsTable.scala @@ -32,7 +32,7 @@ class SpecimenDonorsTable(tag: Tag) extends Table[SpecimenDonor](tag, "specimen_ def sex = column[Option[BiologicalSex]]("sex") def geocoord = column[Option[Point]]("geocoord") def pgpParticipantId = column[Option[String]]("pgp_participant_id") - def citizenBiosampleDid = column[Option[String]]("citizen_biosample_did") + def atUri = column[Option[String]]("at_uri") def dateRangeStart = column[Option[Int]]("date_range_start") def dateRangeEnd = column[Option[Int]]("date_range_end") @@ -44,7 +44,7 @@ class SpecimenDonorsTable(tag: Tag) extends Table[SpecimenDonor](tag, "specimen_ sex, geocoord, pgpParticipantId, - citizenBiosampleDid, + atUri, dateRangeStart, dateRangeEnd ).mapTo[SpecimenDonor] diff --git a/app/models/domain/genomics/SpecimenDonor.scala b/app/models/domain/genomics/SpecimenDonor.scala index 849facf..1524b62 100644 --- a/app/models/domain/genomics/SpecimenDonor.scala +++ b/app/models/domain/genomics/SpecimenDonor.scala @@ -25,7 +25,7 @@ case class SpecimenDonor( sex: Option[BiologicalSex], geocoord: Option[Point], pgpParticipantId: Option[String] = None, - citizenBiosampleDid: Option[String] = None, + atUri: Option[String] = None, dateRangeStart: Option[Int] = None, dateRangeEnd: Option[Int] = None ) diff --git a/app/services/PDSRegistrationService.scala b/app/services/PDSRegistrationService.scala index 10bcc09..2c5f1d4 100644 --- a/app/services/PDSRegistrationService.scala +++ b/app/services/PDSRegistrationService.scala @@ -45,7 +45,10 @@ class PDSRegistrationService @Inject()( createdAt = ZonedDateTime.now(), updatedAt = ZonedDateTime.now() ) - pdsRegistrationRepository.create(newRegistration).map(Right(_)) + pdsRegistrationRepository.create(newRegistration).map { res => + logger.info(s"Internal Notification: PDS Registered successfully for DID $did. Rust Sync Cluster will detect this via DB poll.") + Right(res) + } case None => Future.successful(Left(s"Failed to verify PDS $pdsUrl for DID $did. Could not get latest commit.")) } diff --git a/conf/evolutions/default/23.sql b/conf/evolutions/default/23.sql index 531c43d..7940b50 100644 --- a/conf/evolutions/default/23.sql +++ b/conf/evolutions/default/23.sql @@ -1,7 +1,9 @@ # --- !Ups ALTER TABLE biosample_original_haplogroup ADD COLUMN y_haplogroup_result JSONB; ALTER TABLE biosample_original_haplogroup ADD COLUMN mt_haplogroup_result JSONB; +ALTER TABLE specimen_donor RENAME COLUMN citizen_biosample_did TO at_uri; # --- !Downs +ALTER TABLE specimen_donor RENAME COLUMN at_uri TO citizen_biosample_did; ALTER TABLE biosample_original_haplogroup DROP COLUMN mt_haplogroup_result; ALTER TABLE biosample_original_haplogroup DROP COLUMN y_haplogroup_result; From 53067be023c7779cf63e2e44fbd332f974836bcc Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 08:28:39 -0600 Subject: [PATCH 26/31] Fix SpecimenDonor geocoord parameter in GenomicStudyMappers --- app/repositories/SpecimanDonorRepository.scala | 10 +++++----- app/services/ExternalBiosampleService.scala | 6 +++--- app/services/GenomicStudyService.scala | 16 ++++++++-------- app/services/PgpBiosampleService.scala | 2 +- app/services/genomics/SpecimanDonorService.scala | 4 ++-- app/services/mappers/GenomicStudyMappers.scala | 4 ++-- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/app/repositories/SpecimanDonorRepository.scala b/app/repositories/SpecimanDonorRepository.scala index 19feffc..14ad21f 100644 --- a/app/repositories/SpecimanDonorRepository.scala +++ b/app/repositories/SpecimanDonorRepository.scala @@ -43,7 +43,7 @@ class SpecimenDonorRepositoryImpl @Inject()( override def findByDidAndIdentifier(did: String, identifier: String): Future[Option[SpecimenDonor]] = { db.run(donorsTable - .filter(d => d.citizenBiosampleDid === did && d.donorIdentifier === identifier) + .filter(d => d.atUri === did && d.donorIdentifier === identifier) .result.headOption ) } @@ -70,7 +70,7 @@ class SpecimenDonorRepositoryImpl @Inject()( d.sex, d.geocoord, d.pgpParticipantId, - d.citizenBiosampleDid, + d.atUri, d.dateRangeStart, d.dateRangeEnd )) @@ -81,7 +81,7 @@ class SpecimenDonorRepositoryImpl @Inject()( donor.sex, donor.geocoord, donor.pgpParticipantId, - donor.citizenBiosampleDid, + donor.atUri, donor.dateRangeStart, donor.dateRangeEnd )) @@ -104,7 +104,7 @@ class SpecimenDonorRepositoryImpl @Inject()( d.sex, d.geocoord, d.pgpParticipantId, - d.citizenBiosampleDid, + d.atUri, d.dateRangeStart, d.dateRangeEnd )) @@ -114,7 +114,7 @@ class SpecimenDonorRepositoryImpl @Inject()( donor.sex, donor.geocoord, donor.pgpParticipantId, - donor.citizenBiosampleDid, + donor.atUri, donor.dateRangeStart, donor.dateRangeEnd )) diff --git a/app/services/ExternalBiosampleService.scala b/app/services/ExternalBiosampleService.scala index e2ab34f..dfef3bb 100644 --- a/app/services/ExternalBiosampleService.scala +++ b/app/services/ExternalBiosampleService.scala @@ -41,7 +41,7 @@ class ExternalBiosampleService @Inject()( sex = request.sex, geocoord = geocoord, pgpParticipantId = None, - citizenBiosampleDid = None, + atUri = None, dateRangeStart = None, dateRangeEnd = None ) @@ -114,7 +114,7 @@ class ExternalBiosampleService @Inject()( sex = request.sex, geocoord = None, // Coordinates handled separately if needed, or could be passed here pgpParticipantId = None, - citizenBiosampleDid = Some(did), + atUri = Some(did), dateRangeStart = None, dateRangeEnd = None ) @@ -170,7 +170,7 @@ class ExternalBiosampleService @Inject()( */ def deleteBiosample(accession: String, citizenDid: String): Future[Boolean] = { biosampleRepository.findByAccession(accession).flatMap { - case Some((biosample, Some(donor))) if donor.citizenBiosampleDid.contains(citizenDid) => + case Some((biosample, Some(donor))) if donor.atUri.contains(citizenDid) => biosampleDataService.fullyDeleteBiosampleAndDependencies(biosample.id.get, biosample.sampleGuid).map(_ => true) case _ => Future.successful(false) diff --git a/app/services/GenomicStudyService.scala b/app/services/GenomicStudyService.scala index db562a3..dc800ec 100644 --- a/app/services/GenomicStudyService.scala +++ b/app/services/GenomicStudyService.scala @@ -162,28 +162,28 @@ class GenomicStudyService @Inject()( } private def findMatchingDonor(donor: SpecimenDonor): Future[Option[SpecimenDonor]] = { - // Initialize empty sequence for query conditions - var conditions = Seq.empty[(SpecimenDonor, SpecimenDonor) => Boolean] + import scala.collection.mutable.ArrayBuffer + var conditions = ArrayBuffer.empty[(SpecimenDonor, SpecimenDonor) => Boolean] // Add conditions based on available donor data if (donor.donorIdentifier.nonEmpty) { - conditions :+= (_.donorIdentifier == _.donorIdentifier) + conditions += ((existing, incoming) => existing.donorIdentifier == incoming.donorIdentifier) } if (donor.sex.isDefined) { - conditions :+= (_.sex == _.sex) + conditions += ((existing, incoming) => existing.sex == incoming.sex) } if (donor.geocoord.isDefined) { - conditions :+= (_.geocoord == _.geocoord) + conditions += ((existing, incoming) => existing.geocoord == incoming.geocoord) } if (donor.pgpParticipantId.isDefined) { - conditions :+= (_.pgpParticipantId == _.pgpParticipantId) + conditions += ((existing, incoming) => existing.pgpParticipantId == incoming.pgpParticipantId) } - if (donor.citizenBiosampleDid.isDefined) { - conditions :+= (_.citizenBiosampleDid == _.citizenBiosampleDid) + if (donor.atUri.isDefined) { + conditions += ((existing, incoming) => existing.atUri == incoming.atUri) } // Get all donors with same origin biobank and type diff --git a/app/services/PgpBiosampleService.scala b/app/services/PgpBiosampleService.scala index c9c51ff..e6b0699 100644 --- a/app/services/PgpBiosampleService.scala +++ b/app/services/PgpBiosampleService.scala @@ -42,7 +42,7 @@ class PgpBiosampleService @Inject()( sex = sex, geocoord = geocoord, pgpParticipantId = Some(participantId), - citizenBiosampleDid = None, + atUri = None, dateRangeStart = None, dateRangeEnd = None ) diff --git a/app/services/genomics/SpecimanDonorService.scala b/app/services/genomics/SpecimanDonorService.scala index b003a0e..f570cb6 100644 --- a/app/services/genomics/SpecimanDonorService.scala +++ b/app/services/genomics/SpecimanDonorService.scala @@ -120,7 +120,7 @@ class SpecimenDonorServiceImpl @Inject()(donorRepo: SpecimenDonorRepository) sex = source.sex.orElse(acc.sex), geocoord = source.geocoord.orElse(acc.geocoord), pgpParticipantId = source.pgpParticipantId.orElse(acc.pgpParticipantId), - citizenBiosampleDid = source.citizenBiosampleDid.orElse(acc.citizenBiosampleDid), + atUri = source.atUri.orElse(acc.atUri), dateRangeStart = source.dateRangeStart.orElse(acc.dateRangeStart), dateRangeEnd = source.dateRangeEnd.orElse(acc.dateRangeEnd) ) @@ -175,7 +175,7 @@ class SpecimenDonorServiceImpl @Inject()(donorRepo: SpecimenDonorRepository) checkField("sex", target.sex, source.sex, result.sex), checkField("geocoord", target.geocoord, source.geocoord, result.geocoord), checkField("pgpParticipantId", target.pgpParticipantId, source.pgpParticipantId, result.pgpParticipantId), - checkField("citizenBiosampleDid", target.citizenBiosampleDid, source.citizenBiosampleDid, result.citizenBiosampleDid), + checkField("atUri", target.atUri, source.atUri, result.atUri), checkField("dateRangeStart", target.dateRangeStart, source.dateRangeStart, result.dateRangeStart), checkField("dateRangeEnd", target.dateRangeEnd, source.dateRangeEnd, result.dateRangeEnd) ).flatten diff --git a/app/services/mappers/GenomicStudyMappers.scala b/app/services/mappers/GenomicStudyMappers.scala index 9af075d..3a765e3 100644 --- a/app/services/mappers/GenomicStudyMappers.scala +++ b/app/services/mappers/GenomicStudyMappers.scala @@ -70,7 +70,7 @@ object GenomicStudyMappers { sex = ena.sex.map(BiologicalSex.valueOf), geocoord = geoCoord, pgpParticipantId = None, - citizenBiosampleDid = None, + atUri = None, dateRangeStart = None, dateRangeEnd = None )) @@ -113,7 +113,7 @@ object GenomicStudyMappers { sex = sex.map(BiologicalSex.valueOf), geocoord = coordinates, pgpParticipantId = None, - citizenBiosampleDid = None, + atUri = None, dateRangeStart = None, dateRangeEnd = None )) From 965b1d513a08409c700435bc0face16c9ec0cf57 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 08:36:07 -0600 Subject: [PATCH 27/31] Add lease management fields and index to pds_registrations table --- app/models/PDSRegistration.scala | 5 ++++- app/models/dal/MetadataSchema.scala | 6 +++++- conf/evolutions/default/24.sql | 13 +++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 conf/evolutions/default/24.sql diff --git a/app/models/PDSRegistration.scala b/app/models/PDSRegistration.scala index 95fcc2d..fd310ed 100644 --- a/app/models/PDSRegistration.scala +++ b/app/models/PDSRegistration.scala @@ -11,7 +11,10 @@ case class PDSRegistration( lastCommitSeq: Option[Long], cursor: Long = 0L, createdAt: ZonedDateTime, - updatedAt: ZonedDateTime + updatedAt: ZonedDateTime, + leasedByInstanceId: Option[String] = None, + leaseExpiresAt: Option[ZonedDateTime] = None, + processingStatus: String = "idle" ) object PDSRegistration { diff --git a/app/models/dal/MetadataSchema.scala b/app/models/dal/MetadataSchema.scala index 477aad8..cbd7709 100644 --- a/app/models/dal/MetadataSchema.scala +++ b/app/models/dal/MetadataSchema.scala @@ -17,9 +17,13 @@ object MetadataSchema { def cursor = column[Long]("cursor") def createdAt = column[ZonedDateTime]("created_at") def updatedAt = column[ZonedDateTime]("updated_at") + def leasedByInstanceId = column[Option[String]]("leased_by_instance_id") + def leaseExpiresAt = column[Option[ZonedDateTime]]("lease_expires_at") + def processingStatus = column[String]("processing_status") def * : ProvenShape[PDSRegistration] = ( - did, pdsUrl, handle, lastCommitCid, lastCommitSeq, cursor, createdAt, updatedAt + did, pdsUrl, handle, lastCommitCid, lastCommitSeq, cursor, createdAt, updatedAt, + leasedByInstanceId, leaseExpiresAt, processingStatus ) <> ((PDSRegistration.apply _).tupled, PDSRegistration.unapply) } diff --git a/conf/evolutions/default/24.sql b/conf/evolutions/default/24.sql new file mode 100644 index 0000000..1259aa3 --- /dev/null +++ b/conf/evolutions/default/24.sql @@ -0,0 +1,13 @@ +# --- !Ups +ALTER TABLE pds_registrations ADD COLUMN leased_by_instance_id TEXT NULL; +ALTER TABLE pds_registrations ADD COLUMN lease_expires_at TIMESTAMPTZ NULL; +ALTER TABLE pds_registrations ADD COLUMN processing_status TEXT NOT NULL DEFAULT 'idle'; + +CREATE INDEX pds_registrations_lease_expires_at_idx ON pds_registrations (lease_expires_at); + +# --- !Downs +DROP INDEX pds_registrations_lease_expires_at_idx; + +ALTER TABLE pds_registrations DROP COLUMN processing_status; +ALTER TABLE pds_registrations DROP COLUMN lease_expires_at; +ALTER TABLE pds_registrations DROP COLUMN leased_by_instance_id; From 5af41d7550748752d967685d12bb5d45d103f348 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 09:13:28 -0600 Subject: [PATCH 28/31] Introduce Specimen Donor linkage for Citizen Biosamples - Add `specimenDonorId` field and foreign key in `citizen_biosample` table. - Enable resolving or creating Specimen Donors during biosample creation and updates. - Update `CitizenBiosampleService` to handle Specimen Donor associations. --- GEMINI.md | 88 ---------- .../genomics/CitizenBiosamplesTable.scala | 6 +- .../domain/genomics/CitizenBiosample.scala | 3 +- app/services/CitizenBiosampleService.scala | 164 ++++++++++++------ conf/evolutions/default/24.sql | 14 +- 5 files changed, 124 insertions(+), 151 deletions(-) delete mode 100644 GEMINI.md diff --git a/GEMINI.md b/GEMINI.md deleted file mode 100644 index 7f6dff2..0000000 --- a/GEMINI.md +++ /dev/null @@ -1,88 +0,0 @@ -# Project Context: Decoding Us - -## Project Overview - -**Decoding Us** is a collaborative web platform designed for genetic genealogy and population research. It leverages citizen science to build high-resolution haplogroup trees and facilitate privacy-preserving IBD (Identity by Descent) segment matching. The application connects individual genomic data (processed on secure Edge nodes) with global research efforts. - -The project is built using **Scala 3** and the **Play Framework**, employing a modern, scalable architecture. It features a hybrid API approach using **Tapir** for OpenAPI documentation and standard Play controllers for implementation. The frontend utilizes **HTMX** for dynamic interactions without heavy client-side state. - -## Technology Stack - -* **Language:** Scala 3.3.6 -* **Web Framework:** Play Framework (with `play-slick`) -* **Database:** PostgreSQL (using Slick 6.2.0 for access) -* **API Documentation:** Tapir (OpenAPI/Swagger UI) -* **Concurrency/Jobs:** Apache Pekko (Actors & Streams), `pekko-quartz-scheduler` -* **Frontend:** HTMX, Bootstrap 5 -* **Dependency Injection:** Guice -* **Cloud Integration:** AWS SDK (Secrets Manager, SES) -* **Containerization:** Docker - -## Building and Running - -The project uses **sbt** (Scala Build Tool) for all build and lifecycle management tasks. - -### Prerequisites -* Java Development Kit (JDK) compatible with Scala 3. -* sbt installed. -* PostgreSQL database running and configured. - -### Key Commands - -* **Run the application:** - ```bash - sbt run - ``` - The application typically starts on `http://localhost:9000`. - -* **Run tests:** - ```bash - sbt test - ``` - -* **Compile code:** - ```bash - sbt compile - ``` - -* **Generate IDE configuration:** - (If using IntelliJ IDEA, it generally handles this automatically via BSP import). - -## Architecture & Project Structure - -The project follows a standard Layered Architecture within the Play Framework structure: - -* **`app/api/`**: **API Definitions (Tapir).** Defines the shape of endpoints (inputs/outputs) for OpenAPI generation. *Does not contain business logic.* -* **`app/controllers/`**: **Web Layer.** Handles HTTP requests. Implements the logic for API endpoints and serves HTML pages. -* **`app/services/`**: **Business Logic Layer.** Contains the core application logic. Controllers delegate complex operations here. -* **`app/repositories/`**: **Data Access Layer.** Handles all database interactions using Slick. -* **`app/models/`**: **Domain Layer.** Contains Case Classes for API DTOs and Slick Table definitions. -* **`app/modules/`**: **Configuration.** Guice modules for DI and application lifecycle (e.g., `StartupModule`). -* **`app/actors/`**: **Background Processing.** Pekko actors for asynchronous tasks. -* **`conf/`**: **Configuration.** `application.conf` (main config) and `routes` (URL mappings). - -## Development Conventions - -* **Hybrid API Pattern:** - 1. Define the endpoint signature in `app/api/` (using Tapir). - 2. Add the route in `conf/routes`. - 3. Implement the logic in a Controller within `app/controllers/`. - 4. Ensure the Controller delegates to a Service, which uses a Repository. - -* **Database Access:** - * Use **Slick** for type-safe database queries. - * Define table schemas in `app/models/dal/DatabaseSchema.scala` (or similar DAL files). - * Repositories should encapsulate all DB queries. - -* **Frontend Development:** - * Use **Twirl** templates (`.scala.html` files in `app/views/`) for server-side rendering. - * Use **HTMX** attributes in HTML for dynamic behavior (e.g., `hx-get`, `hx-post`, `hx-target`). Avoid writing custom JavaScript unless necessary. - -* **Testing:** - * Write tests using **ScalaTest** and `scalatestplus-play`. - * Place tests in the `test/` directory, mirroring the `app/` package structure. - * Ensure new features have corresponding Controller, Service, and Repository tests. - -* **Dependency Injection:** - * Use **@Inject()** annotation for constructor injection in classes. - * Bind interfaces to implementations in Module files (e.g., `app/modules/ApplicationModule.scala`) if necessary. diff --git a/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala b/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala index 65e6e6e..5179d17 100644 --- a/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala +++ b/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala @@ -24,6 +24,9 @@ class CitizenBiosamplesTable(tag: Tag) extends Table[CitizenBiosample](tag, "cit def atCid = column[Option[String]]("at_cid") def createdAt = column[LocalDateTime]("created_at") def updatedAt = column[LocalDateTime]("updated_at") + def specimenDonorId = column[Option[Int]]("specimen_donor_id") + + def specimenDonorFk = foreignKey("citizen_biosample_specimen_donor_fk", specimenDonorId, TableQuery[SpecimenDonorsTable])(_.id.?) def * = ( id.?, @@ -41,6 +44,7 @@ class CitizenBiosamplesTable(tag: Tag) extends Table[CitizenBiosample](tag, "cit deleted, atCid, createdAt, - updatedAt + updatedAt, + specimenDonorId ).mapTo[CitizenBiosample] } diff --git a/app/models/domain/genomics/CitizenBiosample.scala b/app/models/domain/genomics/CitizenBiosample.scala index 2e62861..18b749a 100644 --- a/app/models/domain/genomics/CitizenBiosample.scala +++ b/app/models/domain/genomics/CitizenBiosample.scala @@ -24,5 +24,6 @@ case class CitizenBiosample( deleted: Boolean = false, atCid: Option[String] = None, createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() + updatedAt: LocalDateTime = LocalDateTime.now(), + specimenDonorId: Option[Int] = None ) diff --git a/app/services/CitizenBiosampleService.scala b/app/services/CitizenBiosampleService.scala index 4c8e5c4..e3230fa 100644 --- a/app/services/CitizenBiosampleService.scala +++ b/app/services/CitizenBiosampleService.scala @@ -2,7 +2,7 @@ package services import jakarta.inject.{Inject, Singleton} import models.api.{ExternalBiosampleRequest, PublicationInfo} -import models.domain.genomics.{BiosampleType, CitizenBiosample} +import models.domain.genomics.{BiosampleType, CitizenBiosample, SpecimenDonor} import models.domain.publications.{CitizenBiosampleOriginalHaplogroup, Publication, PublicationCitizenBiosample} import repositories._ @@ -16,45 +16,97 @@ class CitizenBiosampleService @Inject()( biosampleDataService: BiosampleDataService, publicationRepository: PublicationRepository, publicationCitizenBiosampleRepository: PublicationCitizenBiosampleRepository, - citizenBiosampleOriginalHaplogroupRepository: CitizenBiosampleOriginalHaplogroupRepository + citizenBiosampleOriginalHaplogroupRepository: CitizenBiosampleOriginalHaplogroupRepository, + specimenDonorRepository: SpecimenDonorRepository )(implicit ec: ExecutionContext) extends CoordinateValidation { + /** + * Extracts the DID from an AT URI. + * AT URI format: at://did:plc:abc123/collection/rkey + */ + private def extractDidFromAtUri(atUri: String): Option[String] = { + if (atUri.startsWith("at://")) { + val withoutPrefix = atUri.stripPrefix("at://") + val didEnd = withoutPrefix.indexOf('/') + if (didEnd > 0) Some(withoutPrefix.substring(0, didEnd)) + else Some(withoutPrefix) + } else None + } + + /** + * Resolves or creates a SpecimenDonor for a Citizen biosample. + * Uses citizenDid (extracted from atUri) + donorIdentifier to find existing donor, + * or creates a new one if not found. + */ + private def resolveOrCreateDonor( + request: ExternalBiosampleRequest, + geocoord: Option[com.vividsolutions.jts.geom.Point] + ): Future[Option[Int]] = { + val citizenDid = request.citizenDid.orElse(request.atUri.flatMap(extractDidFromAtUri)) + + (citizenDid, request.donorIdentifier) match { + case (Some(did), Some(identifier)) => + specimenDonorRepository.findByDidAndIdentifier(did, identifier).flatMap { + case Some(existingDonor) => + Future.successful(existingDonor.id) + case None => + val newDonor = SpecimenDonor( + donorIdentifier = identifier, + originBiobank = request.centerName, + donorType = request.donorType.getOrElse(BiosampleType.Citizen), + sex = request.sex, + geocoord = geocoord, + pgpParticipantId = None, + atUri = Some(did), + dateRangeStart = None, + dateRangeEnd = None + ) + specimenDonorRepository.create(newDonor).map(_.id) + } + case _ => Future.successful(None) + } + } + def createBiosample(request: ExternalBiosampleRequest): Future[UUID] = { // 1. Validate coordinates validateCoordinates(request.latitude, request.longitude).flatMap { geocoord => // 2. Check for existing biosample by accession citizenBiosampleRepository.findByAccession(request.sampleAccession).flatMap { - case Some(_) => + case Some(_) => Future.failed(new IllegalArgumentException(s"Biosample with accession ${request.sampleAccession} already exists.")) - + case None => - // Create new - val sampleGuid = UUID.randomUUID() - val newAtCid = Some(UUID.randomUUID().toString) - - val citizenBiosample = CitizenBiosample( - id = None, - atUri = request.atUri, - accession = Some(request.sampleAccession), - alias = request.alias, - sourcePlatform = Some(request.sourceSystem), - collectionDate = None, - sex = request.sex, - geocoord = geocoord, - description = Some(request.description), - yHaplogroup = request.haplogroups.flatMap(_.yDna), - mtHaplogroup = request.haplogroups.flatMap(_.mtDna), - sampleGuid = sampleGuid, - deleted = false, - atCid = newAtCid, - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - - for { - created <- citizenBiosampleRepository.create(citizenBiosample) - _ <- handleDataAssociation(created.sampleGuid, request, isUpdate = false) - } yield created.sampleGuid + // 3. Resolve or create SpecimenDonor + resolveOrCreateDonor(request, geocoord).flatMap { donorId => + // 4. Create new CitizenBiosample + val sampleGuid = UUID.randomUUID() + val newAtCid = Some(UUID.randomUUID().toString) + + val citizenBiosample = CitizenBiosample( + id = None, + atUri = request.atUri, + accession = Some(request.sampleAccession), + alias = request.alias, + sourcePlatform = Some(request.sourceSystem), + collectionDate = None, + sex = request.sex, + geocoord = geocoord, + description = Some(request.description), + yHaplogroup = request.haplogroups.flatMap(_.yDna), + mtHaplogroup = request.haplogroups.flatMap(_.mtDna), + sampleGuid = sampleGuid, + deleted = false, + atCid = newAtCid, + createdAt = LocalDateTime.now(), + updatedAt = LocalDateTime.now(), + specimenDonorId = donorId + ) + + for { + created <- citizenBiosampleRepository.create(citizenBiosample) + _ <- handleDataAssociation(created.sampleGuid, request, isUpdate = false) + } yield created.sampleGuid + } } } } @@ -67,26 +119,36 @@ class CitizenBiosampleService @Inject()( if (request.atCid.isDefined && request.atCid != existing.atCid) { Future.failed(new IllegalStateException(s"Optimistic locking failure: atCid mismatch. Expected ${existing.atCid}, got ${request.atCid}")) } else { - val newAtCid = Some(UUID.randomUUID().toString) - val toUpdate = existing.copy( - description = Some(request.description), - alias = request.alias, - sourcePlatform = Some(request.sourceSystem), - sex = request.sex, - geocoord = geocoord, - atUri = request.atUri, - accession = Some(request.sampleAccession), - yHaplogroup = request.haplogroups.flatMap(_.yDna).orElse(existing.yHaplogroup), - mtHaplogroup = request.haplogroups.flatMap(_.mtDna).orElse(existing.mtHaplogroup), - atCid = newAtCid, - updatedAt = LocalDateTime.now() - ) - - citizenBiosampleRepository.update(toUpdate, request.atCid).flatMap { success => - if (success) { - handleDataAssociation(existing.sampleGuid, request, isUpdate = true).map(_ => existing.sampleGuid) - } else { - Future.failed(new RuntimeException("Update failed (optimistic lock or record missing)")) + // Resolve donor (use existing if not changing, or resolve/create if provided) + val donorFuture = if (request.donorIdentifier.isDefined) { + resolveOrCreateDonor(request, geocoord) + } else { + Future.successful(existing.specimenDonorId) + } + + donorFuture.flatMap { donorId => + val newAtCid = Some(UUID.randomUUID().toString) + val toUpdate = existing.copy( + description = Some(request.description), + alias = request.alias, + sourcePlatform = Some(request.sourceSystem), + sex = request.sex, + geocoord = geocoord, + atUri = request.atUri, + accession = Some(request.sampleAccession), + yHaplogroup = request.haplogroups.flatMap(_.yDna).orElse(existing.yHaplogroup), + mtHaplogroup = request.haplogroups.flatMap(_.mtDna).orElse(existing.mtHaplogroup), + atCid = newAtCid, + updatedAt = LocalDateTime.now(), + specimenDonorId = donorId + ) + + citizenBiosampleRepository.update(toUpdate, request.atCid).flatMap { success => + if (success) { + handleDataAssociation(existing.sampleGuid, request, isUpdate = true).map(_ => existing.sampleGuid) + } else { + Future.failed(new RuntimeException("Update failed (optimistic lock or record missing)")) + } } } } diff --git a/conf/evolutions/default/24.sql b/conf/evolutions/default/24.sql index 1259aa3..c1239be 100644 --- a/conf/evolutions/default/24.sql +++ b/conf/evolutions/default/24.sql @@ -1,13 +1,7 @@ # --- !Ups -ALTER TABLE pds_registrations ADD COLUMN leased_by_instance_id TEXT NULL; -ALTER TABLE pds_registrations ADD COLUMN lease_expires_at TIMESTAMPTZ NULL; -ALTER TABLE pds_registrations ADD COLUMN processing_status TEXT NOT NULL DEFAULT 'idle'; - -CREATE INDEX pds_registrations_lease_expires_at_idx ON pds_registrations (lease_expires_at); +ALTER TABLE citizen_biosample ADD COLUMN specimen_donor_id INT REFERENCES specimen_donor(id); +CREATE INDEX citizen_biosample_specimen_donor_id_idx ON citizen_biosample(specimen_donor_id); # --- !Downs -DROP INDEX pds_registrations_lease_expires_at_idx; - -ALTER TABLE pds_registrations DROP COLUMN processing_status; -ALTER TABLE pds_registrations DROP COLUMN lease_expires_at; -ALTER TABLE pds_registrations DROP COLUMN leased_by_instance_id; +DROP INDEX citizen_biosample_specimen_donor_id_idx; +ALTER TABLE citizen_biosample DROP COLUMN specimen_donor_id; From 73b00d65d82c6d87fe14c6fd17e895a9acf681a6 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 09:14:50 -0600 Subject: [PATCH 29/31] Introduce Specimen Donor linkage for Citizen Biosamples - Add `specimenDonorId` field and foreign key in `citizen_biosample` table. - Enable resolving or creating Specimen Donors during biosample creation and updates. - Update `CitizenBiosampleService` to handle Specimen Donor associations. --- conf/evolutions/metadata/2.sql | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 conf/evolutions/metadata/2.sql diff --git a/conf/evolutions/metadata/2.sql b/conf/evolutions/metadata/2.sql new file mode 100644 index 0000000..1259aa3 --- /dev/null +++ b/conf/evolutions/metadata/2.sql @@ -0,0 +1,13 @@ +# --- !Ups +ALTER TABLE pds_registrations ADD COLUMN leased_by_instance_id TEXT NULL; +ALTER TABLE pds_registrations ADD COLUMN lease_expires_at TIMESTAMPTZ NULL; +ALTER TABLE pds_registrations ADD COLUMN processing_status TEXT NOT NULL DEFAULT 'idle'; + +CREATE INDEX pds_registrations_lease_expires_at_idx ON pds_registrations (lease_expires_at); + +# --- !Downs +DROP INDEX pds_registrations_lease_expires_at_idx; + +ALTER TABLE pds_registrations DROP COLUMN processing_status; +ALTER TABLE pds_registrations DROP COLUMN lease_expires_at; +ALTER TABLE pds_registrations DROP COLUMN leased_by_instance_id; From 4984fb41853e7440c913987ffcf46387fedb12af Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 09:24:32 -0600 Subject: [PATCH 30/31] Refactor CitizenBiosampleService to delegate Firehose event handling to new event-driven CitizenBiosampleEventHandler. Introduce unified FirehoseEvent abstraction for Create, Update, and Delete operations. --- app/services/CitizenBiosampleService.scala | 282 ++++----------- .../CitizenBiosampleEventHandler.scala | 300 ++++++++++++++++ app/services/firehose/FirehoseEvent.scala | 144 ++++++++ documents/BGS_Integration_Plan.md | 328 +++++++++++++----- 4 files changed, 755 insertions(+), 299 deletions(-) create mode 100644 app/services/firehose/CitizenBiosampleEventHandler.scala create mode 100644 app/services/firehose/FirehoseEvent.scala diff --git a/app/services/CitizenBiosampleService.scala b/app/services/CitizenBiosampleService.scala index e3230fa..4bf2ba3 100644 --- a/app/services/CitizenBiosampleService.scala +++ b/app/services/CitizenBiosampleService.scala @@ -1,231 +1,97 @@ package services import jakarta.inject.{Inject, Singleton} -import models.api.{ExternalBiosampleRequest, PublicationInfo} -import models.domain.genomics.{BiosampleType, CitizenBiosample, SpecimenDonor} -import models.domain.publications.{CitizenBiosampleOriginalHaplogroup, Publication, PublicationCitizenBiosample} -import repositories._ +import models.api.ExternalBiosampleRequest +import services.firehose.{CitizenBiosampleEvent, CitizenBiosampleEventHandler, FirehoseResult} -import java.time.LocalDateTime import java.util.UUID import scala.concurrent.{ExecutionContext, Future} +/** + * Service facade for CitizenBiosample operations. + * + * This service wraps REST API requests into FirehoseEvents and delegates + * to the CitizenBiosampleEventHandler. This pattern allows: + * + * - Phase 1: REST API calls go through this facade + * - Phase 2: Kafka consumer calls the handler directly + * - Phase 3: Firehose consumer calls the handler directly + * + * The facade translates FirehoseResults back to exceptions for + * backward compatibility with the existing controller error handling. + */ @Singleton class CitizenBiosampleService @Inject()( - citizenBiosampleRepository: CitizenBiosampleRepository, - biosampleDataService: BiosampleDataService, - publicationRepository: PublicationRepository, - publicationCitizenBiosampleRepository: PublicationCitizenBiosampleRepository, - citizenBiosampleOriginalHaplogroupRepository: CitizenBiosampleOriginalHaplogroupRepository, - specimenDonorRepository: SpecimenDonorRepository - )(implicit ec: ExecutionContext) extends CoordinateValidation { + eventHandler: CitizenBiosampleEventHandler +)(implicit ec: ExecutionContext) { /** - * Extracts the DID from an AT URI. - * AT URI format: at://did:plc:abc123/collection/rkey + * Create a new CitizenBiosample from an API request. + * Wraps the request as a Create event and processes it. */ - private def extractDidFromAtUri(atUri: String): Option[String] = { - if (atUri.startsWith("at://")) { - val withoutPrefix = atUri.stripPrefix("at://") - val didEnd = withoutPrefix.indexOf('/') - if (didEnd > 0) Some(withoutPrefix.substring(0, didEnd)) - else Some(withoutPrefix) - } else None - } + def createBiosample(request: ExternalBiosampleRequest): Future[UUID] = { + val event = CitizenBiosampleEvent.forCreate(request) - /** - * Resolves or creates a SpecimenDonor for a Citizen biosample. - * Uses citizenDid (extracted from atUri) + donorIdentifier to find existing donor, - * or creates a new one if not found. - */ - private def resolveOrCreateDonor( - request: ExternalBiosampleRequest, - geocoord: Option[com.vividsolutions.jts.geom.Point] - ): Future[Option[Int]] = { - val citizenDid = request.citizenDid.orElse(request.atUri.flatMap(extractDidFromAtUri)) - - (citizenDid, request.donorIdentifier) match { - case (Some(did), Some(identifier)) => - specimenDonorRepository.findByDidAndIdentifier(did, identifier).flatMap { - case Some(existingDonor) => - Future.successful(existingDonor.id) - case None => - val newDonor = SpecimenDonor( - donorIdentifier = identifier, - originBiobank = request.centerName, - donorType = request.donorType.getOrElse(BiosampleType.Citizen), - sex = request.sex, - geocoord = geocoord, - pgpParticipantId = None, - atUri = Some(did), - dateRangeStart = None, - dateRangeEnd = None - ) - specimenDonorRepository.create(newDonor).map(_.id) - } - case _ => Future.successful(None) - } - } + eventHandler.handle(event).flatMap { + case FirehoseResult.Success(_, _, Some(guid), _) => + Future.successful(guid) - def createBiosample(request: ExternalBiosampleRequest): Future[UUID] = { - // 1. Validate coordinates - validateCoordinates(request.latitude, request.longitude).flatMap { geocoord => - // 2. Check for existing biosample by accession - citizenBiosampleRepository.findByAccession(request.sampleAccession).flatMap { - case Some(_) => - Future.failed(new IllegalArgumentException(s"Biosample with accession ${request.sampleAccession} already exists.")) - - case None => - // 3. Resolve or create SpecimenDonor - resolveOrCreateDonor(request, geocoord).flatMap { donorId => - // 4. Create new CitizenBiosample - val sampleGuid = UUID.randomUUID() - val newAtCid = Some(UUID.randomUUID().toString) - - val citizenBiosample = CitizenBiosample( - id = None, - atUri = request.atUri, - accession = Some(request.sampleAccession), - alias = request.alias, - sourcePlatform = Some(request.sourceSystem), - collectionDate = None, - sex = request.sex, - geocoord = geocoord, - description = Some(request.description), - yHaplogroup = request.haplogroups.flatMap(_.yDna), - mtHaplogroup = request.haplogroups.flatMap(_.mtDna), - sampleGuid = sampleGuid, - deleted = false, - atCid = newAtCid, - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now(), - specimenDonorId = donorId - ) - - for { - created <- citizenBiosampleRepository.create(citizenBiosample) - _ <- handleDataAssociation(created.sampleGuid, request, isUpdate = false) - } yield created.sampleGuid - } - } + case FirehoseResult.Conflict(_, message) => + Future.failed(new IllegalArgumentException(message)) + + case FirehoseResult.Success(_, _, None, _) => + Future.failed(new RuntimeException("Handler did not return GUID")) + + case FirehoseResult.ValidationError(_, message) => + Future.failed(new IllegalArgumentException(message)) + + case FirehoseResult.Error(_, message, cause) => + Future.failed(cause.getOrElse(new RuntimeException(message))) + + case FirehoseResult.NotFound(_) => + Future.failed(new NoSuchElementException("Unexpected NotFound on create")) } } + /** + * Update an existing CitizenBiosample. + * Wraps the request as an Update event and processes it. + */ def updateBiosample(atUri: String, request: ExternalBiosampleRequest): Future[UUID] = { - validateCoordinates(request.latitude, request.longitude).flatMap { geocoord => - citizenBiosampleRepository.findByAtUri(atUri).flatMap { - case Some(existing) => - // Optimistic Locking Check - if (request.atCid.isDefined && request.atCid != existing.atCid) { - Future.failed(new IllegalStateException(s"Optimistic locking failure: atCid mismatch. Expected ${existing.atCid}, got ${request.atCid}")) - } else { - // Resolve donor (use existing if not changing, or resolve/create if provided) - val donorFuture = if (request.donorIdentifier.isDefined) { - resolveOrCreateDonor(request, geocoord) - } else { - Future.successful(existing.specimenDonorId) - } - - donorFuture.flatMap { donorId => - val newAtCid = Some(UUID.randomUUID().toString) - val toUpdate = existing.copy( - description = Some(request.description), - alias = request.alias, - sourcePlatform = Some(request.sourceSystem), - sex = request.sex, - geocoord = geocoord, - atUri = request.atUri, - accession = Some(request.sampleAccession), - yHaplogroup = request.haplogroups.flatMap(_.yDna).orElse(existing.yHaplogroup), - mtHaplogroup = request.haplogroups.flatMap(_.mtDna).orElse(existing.mtHaplogroup), - atCid = newAtCid, - updatedAt = LocalDateTime.now(), - specimenDonorId = donorId - ) - - citizenBiosampleRepository.update(toUpdate, request.atCid).flatMap { success => - if (success) { - handleDataAssociation(existing.sampleGuid, request, isUpdate = true).map(_ => existing.sampleGuid) - } else { - Future.failed(new RuntimeException("Update failed (optimistic lock or record missing)")) - } - } - } - } - case None => - Future.failed(new NoSuchElementException(s"Biosample not found for atUri: $atUri")) - } - } - } - - private def handleDataAssociation(guid: UUID, request: ExternalBiosampleRequest, isUpdate: Boolean): Future[Unit] = { - val publicationFuture = request.publication - .map(pub => linkPublication(guid, pub) - .recoverWith { case e => - Future.failed(new RuntimeException(s"Publication linkage failed: ${e.getMessage}", e)) - }) - .getOrElse(Future.successful(())) - - val sequenceDataFuture = if (isUpdate) { - biosampleDataService.replaceSequenceData(guid, request.sequenceData) - } else { - biosampleDataService.addSequenceData(guid, request.sequenceData) - } - - for { - _ <- publicationFuture - _ <- sequenceDataFuture - } yield () - } + val event = CitizenBiosampleEvent.forUpdate(atUri, request) + + eventHandler.handle(event).flatMap { + case FirehoseResult.Success(_, _, Some(guid), _) => + Future.successful(guid) + + case FirehoseResult.Success(_, _, None, _) => + Future.failed(new RuntimeException("Handler did not return GUID")) + + case FirehoseResult.NotFound(_) => + Future.failed(new NoSuchElementException(s"Biosample not found for atUri: $atUri")) + + case FirehoseResult.Conflict(_, message) => + Future.failed(new IllegalStateException(message)) - private def linkPublication(sampleGuid: UUID, pubInfo: PublicationInfo): Future[Unit] = { - for { - maybeBiosample <- citizenBiosampleRepository.findByGuid(sampleGuid) - biosample <- maybeBiosample match { - case Some(b) => Future.successful(b) - case None => Future.failed(new IllegalArgumentException(s"CitizenBiosample not found for GUID: $sampleGuid")) - } - - maybePublication <- pubInfo.doi.map(doi => - publicationRepository.findByDoi(doi) - ).getOrElse(Future.successful(None)) - - publication <- maybePublication match { - case Some(pub) => Future.successful(pub) - case None => publicationRepository.savePublication(Publication( - id = None, - openAlexId = None, - pubmedId = pubInfo.pubmedId, - doi = pubInfo.doi, - title = pubInfo.doi.map(d => s"Publication with DOI: $d").getOrElse("Unknown publication"), - authors = None, abstractSummary = None, journal = None, publicationDate = None, url = None, - citationNormalizedPercentile = None, citedByCount = None, openAccessStatus = None, openAccessUrl = None, - primaryTopic = None, publicationType = None, publisher = None - )) - } - - // Link publication - _ <- publicationCitizenBiosampleRepository.create(PublicationCitizenBiosample( - publicationId = publication.id.get, - citizenBiosampleId = biosample.id.get - )) - - // Link Haplogroups - _ <- pubInfo.originalHaplogroups.map { haplogroupInfo => - citizenBiosampleOriginalHaplogroupRepository.create(CitizenBiosampleOriginalHaplogroup( - id = None, - citizenBiosampleId = biosample.id.get, - publicationId = publication.id.get, - originalYHaplogroup = haplogroupInfo.yHaplogroup, - originalMtHaplogroup = haplogroupInfo.mtHaplogroup, - notes = haplogroupInfo.notes - )) - }.getOrElse(Future.successful(())) - - } yield () + case FirehoseResult.ValidationError(_, message) => + Future.failed(new IllegalArgumentException(message)) + + case FirehoseResult.Error(_, message, cause) => + Future.failed(cause.getOrElse(new RuntimeException(message))) + } } + /** + * Soft delete a CitizenBiosample. + * Wraps as a Delete event and processes it. + */ def deleteBiosample(atUri: String): Future[Boolean] = { - citizenBiosampleRepository.softDeleteByAtUri(atUri) + val event = CitizenBiosampleEvent.forDelete(atUri) + + eventHandler.handle(event).map { + case FirehoseResult.Success(_, _, _, _) => true + case FirehoseResult.NotFound(_) => false + case _ => false + } } -} \ No newline at end of file +} diff --git a/app/services/firehose/CitizenBiosampleEventHandler.scala b/app/services/firehose/CitizenBiosampleEventHandler.scala new file mode 100644 index 0000000..cb230d3 --- /dev/null +++ b/app/services/firehose/CitizenBiosampleEventHandler.scala @@ -0,0 +1,300 @@ +package services.firehose + +import com.vividsolutions.jts.geom.Point +import jakarta.inject.{Inject, Singleton} +import models.api.{ExternalBiosampleRequest, PublicationInfo} +import models.domain.genomics.{BiosampleType, CitizenBiosample, SpecimenDonor} +import models.domain.publications.{CitizenBiosampleOriginalHaplogroup, Publication, PublicationCitizenBiosample} +import play.api.Logging +import repositories._ +import services.{BiosampleDataService, CoordinateValidation} + +import java.time.LocalDateTime +import java.util.UUID +import scala.concurrent.{ExecutionContext, Future} + +/** + * Handles CitizenBiosampleEvent processing in an event-driven manner. + * + * This handler can be invoked from: + * - REST API controller (Phase 1) + * - Kafka consumer (Phase 2) + * - AT Protocol Firehose consumer (Phase 3) + * + * The handler is stateless and processes each event independently, + * returning a FirehoseResult that can be translated to HTTP responses, + * Kafka acknowledgments, or Firehose cursor updates as appropriate. + */ +@Singleton +class CitizenBiosampleEventHandler @Inject()( + citizenBiosampleRepository: CitizenBiosampleRepository, + biosampleDataService: BiosampleDataService, + publicationRepository: PublicationRepository, + publicationCitizenBiosampleRepository: PublicationCitizenBiosampleRepository, + citizenBiosampleOriginalHaplogroupRepository: CitizenBiosampleOriginalHaplogroupRepository, + specimenDonorRepository: SpecimenDonorRepository +)(implicit ec: ExecutionContext) extends CoordinateValidation with Logging { + + /** + * Process a CitizenBiosampleEvent and return a result. + * This is the main entry point for event processing. + */ + def handle(event: CitizenBiosampleEvent): Future[FirehoseResult] = { + logger.debug(s"Processing ${event.action} event for atUri: ${event.atUri}") + + event.action match { + case FirehoseAction.Create => handleCreate(event) + case FirehoseAction.Update => handleUpdate(event) + case FirehoseAction.Delete => handleDelete(event) + } + } + + private def handleCreate(event: CitizenBiosampleEvent): Future[FirehoseResult] = { + event.payload match { + case None => + Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required for create")) + + case Some(request) => + (for { + geocoord <- validateCoordinates(request.latitude, request.longitude) + existing <- citizenBiosampleRepository.findByAccession(request.sampleAccession) + result <- existing match { + case Some(_) => + Future.successful(FirehoseResult.Conflict(event.atUri, + s"Biosample with accession ${request.sampleAccession} already exists")) + + case None => + createBiosample(event.atUri, request, geocoord) + } + } yield result).recover { + case e: IllegalArgumentException => + FirehoseResult.ValidationError(event.atUri, e.getMessage) + case e: Exception => + logger.error(s"Error processing create event for ${event.atUri}", e) + FirehoseResult.Error(event.atUri, e.getMessage, Some(e)) + } + } + } + + private def handleUpdate(event: CitizenBiosampleEvent): Future[FirehoseResult] = { + event.payload match { + case None => + Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required for update")) + + case Some(request) => + (for { + geocoord <- validateCoordinates(request.latitude, request.longitude) + existing <- citizenBiosampleRepository.findByAtUri(event.atUri) + result <- existing match { + case None => + Future.successful(FirehoseResult.NotFound(event.atUri)) + + case Some(biosample) if event.atCid.isDefined && event.atCid != biosample.atCid => + Future.successful(FirehoseResult.Conflict(event.atUri, + s"Optimistic locking failure: expected ${biosample.atCid}, got ${event.atCid}")) + + case Some(biosample) => + updateBiosample(biosample, request, geocoord) + } + } yield result).recover { + case e: IllegalArgumentException => + FirehoseResult.ValidationError(event.atUri, e.getMessage) + case e: Exception => + logger.error(s"Error processing update event for ${event.atUri}", e) + FirehoseResult.Error(event.atUri, e.getMessage, Some(e)) + } + } + } + + private def handleDelete(event: CitizenBiosampleEvent): Future[FirehoseResult] = { + citizenBiosampleRepository.softDeleteByAtUri(event.atUri).map { + case true => FirehoseResult.Success(event.atUri, "", None, "Deleted") + case false => FirehoseResult.NotFound(event.atUri) + }.recover { + case e: Exception => + logger.error(s"Error processing delete event for ${event.atUri}", e) + FirehoseResult.Error(event.atUri, e.getMessage, Some(e)) + } + } + + private def createBiosample( + atUri: String, + request: ExternalBiosampleRequest, + geocoord: Option[Point] + ): Future[FirehoseResult] = { + for { + donorId <- resolveOrCreateDonor(request, geocoord) + sampleGuid = UUID.randomUUID() + newAtCid = UUID.randomUUID().toString + + citizenBiosample = CitizenBiosample( + id = None, + atUri = Some(atUri), + accession = Some(request.sampleAccession), + alias = request.alias, + sourcePlatform = Some(request.sourceSystem), + collectionDate = None, + sex = request.sex, + geocoord = geocoord, + description = Some(request.description), + yHaplogroup = request.haplogroups.flatMap(_.yDna), + mtHaplogroup = request.haplogroups.flatMap(_.mtDna), + sampleGuid = sampleGuid, + deleted = false, + atCid = Some(newAtCid), + createdAt = LocalDateTime.now(), + updatedAt = LocalDateTime.now(), + specimenDonorId = donorId + ) + + created <- citizenBiosampleRepository.create(citizenBiosample) + _ <- handleDataAssociation(created.sampleGuid, request, isUpdate = false) + } yield FirehoseResult.Success(atUri, newAtCid, Some(created.sampleGuid), "Created") + } + + private def updateBiosample( + existing: CitizenBiosample, + request: ExternalBiosampleRequest, + geocoord: Option[Point] + ): Future[FirehoseResult] = { + for { + donorId <- if (request.donorIdentifier.isDefined) { + resolveOrCreateDonor(request, geocoord) + } else { + Future.successful(existing.specimenDonorId) + } + + newAtCid = UUID.randomUUID().toString + toUpdate = existing.copy( + description = Some(request.description), + alias = request.alias, + sourcePlatform = Some(request.sourceSystem), + sex = request.sex, + geocoord = geocoord, + atUri = request.atUri, + accession = Some(request.sampleAccession), + yHaplogroup = request.haplogroups.flatMap(_.yDna).orElse(existing.yHaplogroup), + mtHaplogroup = request.haplogroups.flatMap(_.mtDna).orElse(existing.mtHaplogroup), + atCid = Some(newAtCid), + updatedAt = LocalDateTime.now(), + specimenDonorId = donorId + ) + + success <- citizenBiosampleRepository.update(toUpdate, request.atCid) + _ <- if (success) { + handleDataAssociation(existing.sampleGuid, request, isUpdate = true) + } else { + Future.failed(new RuntimeException("Update failed")) + } + } yield FirehoseResult.Success(existing.atUri.getOrElse(""), newAtCid, Some(existing.sampleGuid), "Updated") + } + + // --- Helper methods (moved from CitizenBiosampleService) --- + + private def extractDidFromAtUri(atUri: String): Option[String] = { + if (atUri.startsWith("at://")) { + val withoutPrefix = atUri.stripPrefix("at://") + val didEnd = withoutPrefix.indexOf('/') + if (didEnd > 0) Some(withoutPrefix.substring(0, didEnd)) + else Some(withoutPrefix) + } else None + } + + private def resolveOrCreateDonor( + request: ExternalBiosampleRequest, + geocoord: Option[Point] + ): Future[Option[Int]] = { + val citizenDid = request.citizenDid.orElse(request.atUri.flatMap(extractDidFromAtUri)) + + (citizenDid, request.donorIdentifier) match { + case (Some(did), Some(identifier)) => + specimenDonorRepository.findByDidAndIdentifier(did, identifier).flatMap { + case Some(existingDonor) => + Future.successful(existingDonor.id) + case None => + val newDonor = SpecimenDonor( + donorIdentifier = identifier, + originBiobank = request.centerName, + donorType = request.donorType.getOrElse(BiosampleType.Citizen), + sex = request.sex, + geocoord = geocoord, + pgpParticipantId = None, + atUri = Some(did), + dateRangeStart = None, + dateRangeEnd = None + ) + specimenDonorRepository.create(newDonor).map(_.id) + } + case _ => Future.successful(None) + } + } + + private def handleDataAssociation( + guid: UUID, + request: ExternalBiosampleRequest, + isUpdate: Boolean + ): Future[Unit] = { + val publicationFuture = request.publication + .map(pub => linkPublication(guid, pub) + .recoverWith { case e => + Future.failed(new RuntimeException(s"Publication linkage failed: ${e.getMessage}", e)) + }) + .getOrElse(Future.successful(())) + + val sequenceDataFuture = if (isUpdate) { + biosampleDataService.replaceSequenceData(guid, request.sequenceData) + } else { + biosampleDataService.addSequenceData(guid, request.sequenceData) + } + + for { + _ <- publicationFuture + _ <- sequenceDataFuture + } yield () + } + + private def linkPublication(sampleGuid: UUID, pubInfo: PublicationInfo): Future[Unit] = { + for { + maybeBiosample <- citizenBiosampleRepository.findByGuid(sampleGuid) + biosample <- maybeBiosample match { + case Some(b) => Future.successful(b) + case None => Future.failed(new IllegalArgumentException(s"CitizenBiosample not found for GUID: $sampleGuid")) + } + + maybePublication <- pubInfo.doi.map(doi => + publicationRepository.findByDoi(doi) + ).getOrElse(Future.successful(None)) + + publication <- maybePublication match { + case Some(pub) => Future.successful(pub) + case None => publicationRepository.savePublication(Publication( + id = None, + openAlexId = None, + pubmedId = pubInfo.pubmedId, + doi = pubInfo.doi, + title = pubInfo.doi.map(d => s"Publication with DOI: $d").getOrElse("Unknown publication"), + authors = None, abstractSummary = None, journal = None, publicationDate = None, url = None, + citationNormalizedPercentile = None, citedByCount = None, openAccessStatus = None, openAccessUrl = None, + primaryTopic = None, publicationType = None, publisher = None + )) + } + + _ <- publicationCitizenBiosampleRepository.create(PublicationCitizenBiosample( + publicationId = publication.id.get, + citizenBiosampleId = biosample.id.get + )) + + _ <- pubInfo.originalHaplogroups.map { haplogroupInfo => + citizenBiosampleOriginalHaplogroupRepository.create(CitizenBiosampleOriginalHaplogroup( + id = None, + citizenBiosampleId = biosample.id.get, + publicationId = publication.id.get, + originalYHaplogroup = haplogroupInfo.yHaplogroup, + originalMtHaplogroup = haplogroupInfo.mtHaplogroup, + notes = haplogroupInfo.notes + )) + }.getOrElse(Future.successful(())) + + } yield () + } +} diff --git a/app/services/firehose/FirehoseEvent.scala b/app/services/firehose/FirehoseEvent.scala new file mode 100644 index 0000000..2072a80 --- /dev/null +++ b/app/services/firehose/FirehoseEvent.scala @@ -0,0 +1,144 @@ +package services.firehose + +import models.api.{ExternalBiosampleRequest, ProjectRequest} +import play.api.libs.json.{Format, Json, OFormat} + +/** + * Represents events from the AT Protocol Firehose (or simulated via REST API). + * + * This abstraction allows the same event processing logic to be used whether + * events arrive via: + * - Phase 1: Direct REST API calls (wrapped as events) + * - Phase 2: Kafka consumer + * - Phase 3: AT Protocol Firehose subscription + * + * Each event includes: + * - `atUri`: The canonical AT Protocol identifier for the record + * - `atCid`: Content identifier for optimistic locking / version tracking + * - `action`: The operation type (Create, Update, Delete) + */ +sealed trait FirehoseEvent { + def atUri: String + def atCid: Option[String] + def action: FirehoseAction +} + +/** + * Actions that can be performed on a record. + * Maps to AT Protocol commit operations. + */ +enum FirehoseAction: + case Create, Update, Delete + +object FirehoseAction { + import play.api.libs.json.{Reads, Writes} + + implicit val reads: Reads[FirehoseAction] = Reads.of[String].map(FirehoseAction.valueOf) + implicit val writes: Writes[FirehoseAction] = Writes.of[String].contramap(_.toString) + implicit val format: Format[FirehoseAction] = Format(reads, writes) +} + +/** + * Event for Citizen Biosample operations. + * + * @param atUri The AT Protocol URI (at://did/collection/rkey) + * @param atCid Content identifier for versioning + * @param action The operation type + * @param payload The biosample data (None for Delete operations) + */ +case class CitizenBiosampleEvent( + atUri: String, + atCid: Option[String], + action: FirehoseAction, + payload: Option[ExternalBiosampleRequest] +) extends FirehoseEvent + +object CitizenBiosampleEvent { + implicit val format: OFormat[CitizenBiosampleEvent] = Json.format + + def forCreate(request: ExternalBiosampleRequest): CitizenBiosampleEvent = + CitizenBiosampleEvent( + atUri = request.atUri.getOrElse(throw new IllegalArgumentException("atUri required for create")), + atCid = request.atCid, + action = FirehoseAction.Create, + payload = Some(request) + ) + + def forUpdate(atUri: String, request: ExternalBiosampleRequest): CitizenBiosampleEvent = + CitizenBiosampleEvent( + atUri = atUri, + atCid = request.atCid, + action = FirehoseAction.Update, + payload = Some(request) + ) + + def forDelete(atUri: String): CitizenBiosampleEvent = + CitizenBiosampleEvent( + atUri = atUri, + atCid = None, + action = FirehoseAction.Delete, + payload = None + ) +} + +/** + * Event for Project operations. + */ +case class ProjectEvent( + atUri: String, + atCid: Option[String], + action: FirehoseAction, + payload: Option[ProjectRequest] +) extends FirehoseEvent + +object ProjectEvent { + implicit val format: OFormat[ProjectEvent] = Json.format + + def forCreate(atUri: String, request: ProjectRequest): ProjectEvent = + ProjectEvent( + atUri = atUri, + atCid = request.atCid, + action = FirehoseAction.Create, + payload = Some(request) + ) + + def forUpdate(atUri: String, request: ProjectRequest): ProjectEvent = + ProjectEvent( + atUri = atUri, + atCid = request.atCid, + action = FirehoseAction.Update, + payload = Some(request) + ) + + def forDelete(atUri: String): ProjectEvent = + ProjectEvent( + atUri = atUri, + atCid = None, + action = FirehoseAction.Delete, + payload = None + ) +} + +/** + * Result of processing a FirehoseEvent. + * Provides a consistent result type regardless of the event source. + */ +sealed trait FirehoseResult { + def atUri: String +} + +object FirehoseResult { + import java.util.UUID + + case class Success( + atUri: String, + newAtCid: String, + sampleGuid: Option[UUID] = None, + message: String = "OK" + ) extends FirehoseResult + + case class NotFound(atUri: String) extends FirehoseResult + case class Conflict(atUri: String, message: String) extends FirehoseResult + case class ValidationError(atUri: String, message: String) extends FirehoseResult + case class Error(atUri: String, message: String, cause: Option[Throwable] = None) extends FirehoseResult +} diff --git a/documents/BGS_Integration_Plan.md b/documents/BGS_Integration_Plan.md index 726afe7..a8f6571 100644 --- a/documents/BGS_Integration_Plan.md +++ b/documents/BGS_Integration_Plan.md @@ -1,132 +1,278 @@ -### Architecture Overview +# BGS / Firehose Integration Plan -For the MVP, we will utilize a **Secure REST API** pattern. The BGS server will act as an authenticated API client, pushing operational data directly to the `decodingus` backend. +## Phase 1 Status: ✅ COMPLETE -* **Integration Point:** `POST /api/private/external/biosamples` -* **Controller:** `app/controllers/ExternalBiosampleController.scala` +Phase 1 (Direct REST API / MVP) implementation is complete and ready for integration testing. + +--- + +## Architecture Overview + +For the MVP, we utilize a **Secure REST API** pattern. The BGS server (or Edge App) acts as an authenticated API client, pushing operational data directly to the `decodingus` backend. + +### Citizen Biosample API + +* **Integration Point:** `POST /api/external-biosamples` +* **Controller:** `app/controllers/CitizenBiosampleController.scala` +* **Service:** `app/services/CitizenBiosampleService.scala` * **Data Model:** `app/models/api/ExternalBiosampleRequest.scala` -* **Security:** API Key authentication via `X-API-Key` header. +* **Security:** API Key authentication via `X-API-Key` header (`ApiSecurityAction`) + +### Full CRUD Operations + +| Operation | Endpoint | Description | +|-----------|----------|-------------| +| **Create** | `POST /api/external-biosamples` | Create new citizen biosample with donor resolution | +| **Update** | `PUT /api/external-biosamples/{atUri}` | Update existing biosample (optimistic locking via `atCid`) | +| **Delete** | `DELETE /api/external-biosamples/{atUri}` | Soft delete biosample | + +### Project API + +| Operation | Endpoint | Description | +|-----------|----------|-------------| +| **Create** | `POST /api/projects` | Create new research project | +| **Update** | `PUT /api/projects/{atUri}` | Update project (optimistic locking) | +| **Delete** | `DELETE /api/projects/{atUri}` | Soft delete project | + +--- + +## Data Model: Atmosphere / Citizen Sample Hierarchy + +The Edge App manages a workspace with the following hierarchy: -### 2. Atmosphere / Citizen Sample Linking +``` +PDS Owner (Researcher running Edge App) + └── Workspace + ├── SpecimenDonor "Grandfather" (donorIdentifier: "Subject-001") + │ └── CitizenBiosample (multiple sequenceData entries: WGS + HiFi) + │ + ├── SpecimenDonor "Father" (donorIdentifier: "Subject-002") + │ └── CitizenBiosample (WGS only) + │ + └── SpecimenDonor "Self" (donorIdentifier: "Subject-003") + └── CitizenBiosample (Exome + WGS from different labs) +``` + +### Key Concepts -For **Atmosphere** (Citizen) biosamples, the system must correctly place the sample within a hierarchy: -1. **Researcher/User (PDS Owner):** Identified by `citizenDid`. A single researcher may manage multiple donors. -2. **Specimen Donor:** Identified by a unique `donorIdentifier` within the Researcher's context. A single donor may have multiple biosamples (e.g., different tissues, or different sequencing technologies like Short-read vs. HiFi). -3. **Biosample/Sequencing Data:** The actual data being uploaded. +1. **PDS Owner (citizenDid):** The researcher/genealogist running the Edge App. Extracted from `atUri` or provided explicitly. +2. **Specimen Donor:** A physical person (family member, project participant). Identified by `donorIdentifier` within the PDS owner's context. +3. **Citizen Biosample:** A single biosample record containing multiple `sequenceData` entries (different sequencing runs, labs, technologies). -* **Linkage Keys:** - * `citizenDid`: Identifies the Researcher/PDS. - * `donorIdentifier`: Identifies the specific biological source (person) *within* that Researcher's collection. +### Linkage Keys -* **SpecimenDonor Resolution Logic:** - The system attempts to find a `SpecimenDonor` matching **BOTH** the `citizenDid` and the `donorIdentifier`. - * **If Found:** The new Biosample is linked to this *existing* Specimen Donor. - * *Benefit:* This aggregates multiple datasets (e.g., WGS + HiFi) under the same physical donor. - * **If Not Found:** A new `SpecimenDonor` record is created. - * `citizenBiosampleDid` = `citizenDid` - * `donorIdentifier` = `donorIdentifier` - * `donorType` = "Citizen" +* `atUri`: The canonical AT Protocol identifier (`at://did:plc:xxx/collection/rkey`) - uniquely identifies the biosample record +* `citizenDid`: Extracted from `atUri` or provided explicitly - identifies the PDS owner +* `donorIdentifier`: Identifies the specific biological source (person) within that PDS owner's collection -**Revised Sequence Diagram:** +### SpecimenDonor Resolution Logic + +Implemented in `CitizenBiosampleService.resolveOrCreateDonor()`: + +1. Extract `citizenDid` from `atUri` (format: `at://did:plc:xxx/...`) +2. Look up `SpecimenDonor` by `(citizenDid, donorIdentifier)` pair +3. If found: Link biosample to existing donor (aggregates multiple datasets) +4. If not found: Create new `SpecimenDonor` with `donorType = Citizen` ```mermaid sequenceDiagram - participant BGS as BGS Server - participant API as ExternalBiosampleController - participant Service as ExternalBiosampleService - participant Repo as SpecimenDonorRepository + participant BGS as Edge App / BGS + participant API as CitizenBiosampleController + participant Service as CitizenBiosampleService + participant DonorRepo as SpecimenDonorRepository + participant BioRepo as CitizenBiosampleRepository participant DB as Database - BGS->>API: POST /biosamples - Note right of BGS: citizenDid="did:123"
donorIdentifier="Subject-A" - - API->>Service: createBiosample(req) - - alt citizenDid is present - Service->>Repo: findByDidAndIdentifier("did:123", "Subject-A") - Repo->>DB: SELECT * FROM specimen_donors
WHERE citizen_biosample_did = 'did:123'
AND donor_identifier = 'Subject-A' - - alt Donor Exists (e.g., adding HiFi to existing Subject-A) - DB-->>Service: Returns Donor(id=55) - else Donor Missing (New Subject) - Service->>Repo: create(SpecimenDonor{did="did:123", id="Subject-A"}) - Repo-->>Service: Returns New Donor(id=99) - end + BGS->>API: POST /api/external-biosamples + Note right of BGS: atUri="at://did:plc:abc/collection/rkey"
donorIdentifier="Subject-001" + + API->>Service: createBiosample(request) + Service->>Service: extractDidFromAtUri(atUri) + + Service->>DonorRepo: findByDidAndIdentifier("did:plc:abc", "Subject-001") + DonorRepo->>DB: SELECT * FROM specimen_donor
WHERE at_uri = 'did:plc:abc'
AND donor_identifier = 'Subject-001' + + alt Donor Exists + DB-->>Service: Returns Donor(id=55) + else Donor Not Found + Service->>DonorRepo: create(SpecimenDonor{atUri, donorIdentifier, donorType=Citizen}) + DonorRepo-->>Service: Returns New Donor(id=99) end - Service->>Repo: createBiosample(donorId=55 or 99) - Service-->>API: Returns UUID - API-->>BGS: 201 Created + Service->>BioRepo: create(CitizenBiosample{specimenDonorId=55|99, ...}) + BioRepo-->>Service: Returns created biosample + + Service-->>API: Returns sampleGuid + API-->>BGS: 201 Created {guid: "..."} ``` -### 1. Data Payload Specification +--- + +## Data Payload Specification + +### ExternalBiosampleRequest -**JSON Structure:** ```json { "sampleAccession": "BGS-UUID-12345", "sourceSystem": "BGS_MVP", - "description": "Processed by BGS Node 1", - "centerName": "DecodingUs Ops", + "description": "Processed by Edge Node", + "centerName": "Home Lab", "sex": "Male", - "citizenDid": "did:plc:u76f5w...", + "atUri": "at://did:plc:abc123/com.decodingus.atmosphere.biosample/rkey456", "donorIdentifier": "Subject-001", "donorType": "Citizen", - "sequenceData": { - // ... (same as before) + "latitude": 40.7128, + "longitude": -74.0060, + "haplogroups": { + "yDna": { + "haplogroupName": "R-M269", + "score": 0.998, + "matchingSnps": 145, + "mismatchingSnps": 2 + }, + "mtDna": { + "haplogroupName": "H1a", + "score": 0.995 + } + }, + "sequenceData": { + "reads": 850000000, + "readLength": 150, + "coverage": 32.5, + "platformName": "ILLUMINA", + "testType": "WGS", + "files": [ + { + "fileName": "sample.cram", + "fileSizeBytes": 22000000000, + "fileFormat": "CRAM", + "aligner": "BWA-MEM", + "targetReference": "GRCh38", + "checksums": [{"checksum": "abc123...", "algorithm": "SHA-256"}], + "location": {"fileUrl": "s3://bucket/sample.cram", "fileIndexUrl": "s3://bucket/sample.cram.crai"} + } + ] + }, + "publication": { + "doi": "10.1234/example", + "originalHaplogroups": { + "yHaplogroup": {"haplogroupName": "R1b"}, + "mtHaplogroup": {"haplogroupName": "H"} + } } } ``` -### 4. Rust Implementation (BGS Side) +### Key Fields -**Suggested Rust Structs:** +| Field | Required | Description | +|-------|----------|-------------| +| `sampleAccession` | Yes | Unique identifier from the Edge App | +| `atUri` | Yes* | AT Protocol URI - canonical identifier | +| `donorIdentifier` | Yes* | Identifies the physical person within PDS owner's collection | +| `sequenceData` | Yes | Sequencing run details and files | +| `haplogroups` | No | Y-DNA and mtDNA assignments with full scoring | +| `atCid` | No | For updates: optimistic locking version | -```rust -use serde::Serialize; +*Required for proper donor resolution -#[derive(Serialize)] -struct ExternalBiosampleRequest { - sampleAccession: String, - sourceSystem: String, - description: String, - centerName: String, - citizenDid: Option, - donorIdentifier: Option, // New: Identifies specific donor - donorType: Option, - sequenceData: SequenceDataInfo, - // ... -} -``` +--- + +## PDS Registration + +Before syncing data, PDS instances must be registered: - // Handle 201 Created or errors - Ok(()) +**Endpoint:** `POST /api/registerPDS` + +```json +{ + "did": "did:plc:abc123", + "handle": "researcher.bsky.social", + "pdsUrl": "https://pds.example.com", + "rToken": "auth-token-from-edge-app" } ``` -### 5. Integration Roadmap +The registration process: +1. Verifies PDS is reachable via `com.atproto.sync.getLatestCommit` +2. Stores DID, PDS URL, and initial sync cursor +3. Enables the Rust sync cluster to poll for updates + +### PDS Lease Management + +For parallel sync processing, the `pds_registrations` table includes: +- `leased_by_instance_id`: Which sync worker owns this PDS +- `lease_expires_at`: Lease expiration for failover +- `processing_status`: idle | processing | error + +--- + +## Database Schema + +### Tables + +| Table | Purpose | +|-------|---------| +| `citizen_biosample` | Citizen/Atmosphere biosample records | +| `specimen_donor` | Physical persons (donors) - linked via `specimen_donor_id` FK | +| `project` | Research projects grouping biosamples | +| `pds_registrations` | Registered PDS instances for sync | +| `publication_citizen_biosample` | Links biosamples to publications | +| `citizen_biosample_original_haplogroup` | Publication-reported haplogroups | + +### Key Columns on `citizen_biosample` + +| Column | Type | Purpose | +|--------|------|---------| +| `at_uri` | VARCHAR | AT Protocol canonical identifier | +| `at_cid` | VARCHAR | Version for optimistic locking | +| `specimen_donor_id` | INT FK | Link to physical donor | +| `deleted` | BOOLEAN | Soft delete flag | +| `y_haplogroup` | JSONB | Full HaplogroupResult with scoring | +| `mt_haplogroup` | JSONB | Full HaplogroupResult with scoring | + +--- + +## Integration Roadmap + +### Phase 1: Direct REST API ✅ COMPLETE + +* **Mechanism:** Synchronous HTTP POST +* **Flow:** `Edge App` → `CitizenBiosampleController` → `CitizenBiosampleService` → `DB` +* **Status:** Fully implemented and tested + +### Phase 2: Asynchronous Ingestion (Kafka) + +* **Mechanism:** Message Queue +* **Flow:** `Edge App` → `Kafka Topic` → `DecodingUs Consumer` → `Service` → `DB` +* **Change:** Edge App uses Kafka Producer; DecodingUs adds Kafka Consumer service +* **Benefits:** Decoupled; handles traffic bursts; high resilience + +### Phase 3: Decentralized AppView (Atmosphere) + +* **Mechanism:** AT Protocol Firehose +* **Flow:** `Edge App` → `User's PDS` → `AT Proto Relay` → `DecodingUs Firehose Consumer` → `DB` +* **Change:** Edge App writes directly to PDS using `com.decodingus.atmosphere.biosample` Lexicon; DecodingUs becomes passive indexer +* **Benefits:** True user data ownership; interoperability with AT Protocol ecosystem + +--- + +## Deployment Checklist -The integration strategy evolves through three distinct phases, moving from a simple direct connection to a robust, decentralized architecture. +### For Phase 1 MVP -#### Phase 1: Direct REST API (Current / MVP) -* **Mechanism:** Synchronous HTTP POST. -* **Flow:** `BGS Server` -> `DecodingUs Controller` -> `Service` -> `DB`. -* **Pros:** Simplest to implement; immediate feedback on success/failure. -* **Cons:** Tightly coupled; requires BGS to handle retries if DecodingUs is down. +1. **API Key:** Configure in AWS Secrets Manager (prod) or `application.conf` (dev) +2. **Database:** Run evolution 25.sql for `specimen_donor_id` FK on `citizen_biosample` +3. **Edge App Config:** Set DecodingUs API URL and API key +4. **Test:** POST sample payload to `/api/external-biosamples` +5. **Verify:** Check `citizen_biosample`, `specimen_donor`, and `sequence_library` tables -#### Phase 2: Asynchronous Ingestion (Kafka) -* **Mechanism:** Message Queue. -* **Flow:** `BGS Server` -> `Kafka Topic` -> `DecodingUs Consumer` -> `Service` -> `DB`. -* **Change:** BGS replaces the HTTP Client with a Kafka Producer. DecodingUs adds a Kafka Consumer service. -* **Pros:** Decoupled; handles bursts of traffic; high resilience. +### Swagger UI -#### Phase 3: Decentralized AppView (Atmosphere) -* **Mechanism:** AT Protocol Firehose. -* **Flow:** `BGS Server` -> `Researcher PDS` -> `AT Proto Relay` -> `DecodingUs Firehose Consumer` -> `Service` -> `DB`. -* **Change:** BGS writes directly to the user's PDS using the `com.decodingus.atmosphere.biosample` Lexicon. DecodingUs becomes a passive indexer. -* **Pros:** True user data ownership; interoperability with other AT Protocol apps. +API documentation available at: `/api/docs` -### Next Steps -1. **Provision Key:** Ensure a valid API key is set in your AWS Secrets Manager (for prod) or `application.conf` (if configured for dev overrides). -2. **Deploy BGS:** Configure the BGS MVP node with the `decodingus` URL and the API Key. -3. **Verify:** Send a test payload from the BGS node and verify the data appears in the `biosamples` and `sequence_libraries` tables. \ No newline at end of file +Documented endpoints: +- Citizen Biosamples (Create, Update, Delete) +- Projects (Create, Update, Delete) +- References, Haplogroups, Coverage, Sequencer APIs From ed949215aac41694e57aa6fe7a1c493329445859 Mon Sep 17 00:00:00 2001 From: jkane Date: Sat, 6 Dec 2025 09:41:31 -0600 Subject: [PATCH 31/31] Add test coverage for CitizenBiosampleEventHandler, CitizenBiosampleService, and PDSRegistrationService - Introduce unit tests for `CitizenBiosampleEventHandler`, covering Create, Update, Delete operations, and edge cases like conflicts and not found scenarios. - Add service-level tests for `CitizenBiosampleService`, handling delegation to the event handler and proper result translations. - Implement tests for `PDSRegistrationService`, including new registration, conflict detection, and PDS retrieval by DID/handle. --- .../CitizenBiosampleServiceSpec.scala | 217 ++++++++ .../services/PDSRegistrationServiceSpec.scala | 259 +++++++++ test/services/ProjectServiceSpec.scala | 213 ++++++++ .../CitizenBiosampleEventHandlerSpec.scala | 506 ++++++++++++++++++ 4 files changed, 1195 insertions(+) create mode 100644 test/services/CitizenBiosampleServiceSpec.scala create mode 100644 test/services/PDSRegistrationServiceSpec.scala create mode 100644 test/services/ProjectServiceSpec.scala create mode 100644 test/services/firehose/CitizenBiosampleEventHandlerSpec.scala diff --git a/test/services/CitizenBiosampleServiceSpec.scala b/test/services/CitizenBiosampleServiceSpec.scala new file mode 100644 index 0000000..d9b6218 --- /dev/null +++ b/test/services/CitizenBiosampleServiceSpec.scala @@ -0,0 +1,217 @@ +package services + +import models.api.{ExternalBiosampleRequest, SequenceDataInfo} +import models.domain.genomics.{BiologicalSex, BiosampleType} +import org.mockito.ArgumentMatchers.any +import org.mockito.Mockito.{verify, when} +import org.scalatestplus.mockito.MockitoSugar +import org.scalatest.concurrent.ScalaFutures +import org.scalatestplus.play.PlaySpec +import services.firehose.{CitizenBiosampleEvent, CitizenBiosampleEventHandler, FirehoseResult} + +import java.util.UUID +import scala.concurrent.{ExecutionContext, Future} + +class CitizenBiosampleServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures { + + implicit val ec: ExecutionContext = ExecutionContext.global + + def createRequest( + atUri: String = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1", + accession: String = "TEST-001" + ): ExternalBiosampleRequest = ExternalBiosampleRequest( + sampleAccession = accession, + sourceSystem = "test", + description = "Test biosample", + alias = Some("test-alias"), + centerName = "Test Lab", + sex = Some(BiologicalSex.Male), + latitude = None, + longitude = None, + citizenDid = None, + atUri = Some(atUri), + donorIdentifier = Some("Subject-001"), + donorType = Some(BiosampleType.Citizen), + publication = None, + haplogroups = None, + sequenceData = SequenceDataInfo( + reads = Some(1000000), + readLength = Some(150), + coverage = Some(30.0), + platformName = "ILLUMINA", + testType = "WGS", + files = Seq.empty + ), + atCid = None + ) + + "CitizenBiosampleService" should { + + "delegate create to event handler and return GUID on success" in { + val mockHandler = mock[CitizenBiosampleEventHandler] + val expectedGuid = UUID.randomUUID() + + when(mockHandler.handle(any[CitizenBiosampleEvent])) + .thenReturn(Future.successful(FirehoseResult.Success( + atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1", + newAtCid = "new-cid-123", + sampleGuid = Some(expectedGuid), + message = "Created" + ))) + + val service = new CitizenBiosampleService(mockHandler) + val request = createRequest() + + whenReady(service.createBiosample(request)) { guid => + guid mustBe expectedGuid + verify(mockHandler).handle(any[CitizenBiosampleEvent]) + } + } + + "translate Conflict result to exception on create" in { + val mockHandler = mock[CitizenBiosampleEventHandler] + + when(mockHandler.handle(any[CitizenBiosampleEvent])) + .thenReturn(Future.successful(FirehoseResult.Conflict( + atUri = "at://test", + message = "Biosample already exists" + ))) + + val service = new CitizenBiosampleService(mockHandler) + val request = createRequest() + + whenReady(service.createBiosample(request).failed) { e => + e mustBe a[IllegalArgumentException] + e.getMessage must include("already exists") + } + } + + "translate ValidationError result to exception on create" in { + val mockHandler = mock[CitizenBiosampleEventHandler] + + when(mockHandler.handle(any[CitizenBiosampleEvent])) + .thenReturn(Future.successful(FirehoseResult.ValidationError( + atUri = "at://test", + message = "Invalid coordinates" + ))) + + val service = new CitizenBiosampleService(mockHandler) + val request = createRequest() + + whenReady(service.createBiosample(request).failed) { e => + e mustBe a[IllegalArgumentException] + e.getMessage must include("Invalid coordinates") + } + } + + "delegate update to event handler and return GUID on success" in { + val mockHandler = mock[CitizenBiosampleEventHandler] + val expectedGuid = UUID.randomUUID() + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1" + + when(mockHandler.handle(any[CitizenBiosampleEvent])) + .thenReturn(Future.successful(FirehoseResult.Success( + atUri = atUri, + newAtCid = "updated-cid", + sampleGuid = Some(expectedGuid), + message = "Updated" + ))) + + val service = new CitizenBiosampleService(mockHandler) + val request = createRequest(atUri = atUri) + + whenReady(service.updateBiosample(atUri, request)) { guid => + guid mustBe expectedGuid + verify(mockHandler).handle(any[CitizenBiosampleEvent]) + } + } + + "translate NotFound result to exception on update" in { + val mockHandler = mock[CitizenBiosampleEventHandler] + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/nonexistent" + + when(mockHandler.handle(any[CitizenBiosampleEvent])) + .thenReturn(Future.successful(FirehoseResult.NotFound(atUri))) + + val service = new CitizenBiosampleService(mockHandler) + val request = createRequest(atUri = atUri) + + whenReady(service.updateBiosample(atUri, request).failed) { e => + e mustBe a[NoSuchElementException] + e.getMessage must include("not found") + } + } + + "translate Conflict result to IllegalStateException on update" in { + val mockHandler = mock[CitizenBiosampleEventHandler] + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1" + + when(mockHandler.handle(any[CitizenBiosampleEvent])) + .thenReturn(Future.successful(FirehoseResult.Conflict( + atUri = atUri, + message = "Optimistic locking failure" + ))) + + val service = new CitizenBiosampleService(mockHandler) + val request = createRequest(atUri = atUri) + + whenReady(service.updateBiosample(atUri, request).failed) { e => + e mustBe a[IllegalStateException] + e.getMessage must include("Optimistic locking") + } + } + + "delegate delete to event handler and return true on success" in { + val mockHandler = mock[CitizenBiosampleEventHandler] + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1" + + when(mockHandler.handle(any[CitizenBiosampleEvent])) + .thenReturn(Future.successful(FirehoseResult.Success( + atUri = atUri, + newAtCid = "", + sampleGuid = None, + message = "Deleted" + ))) + + val service = new CitizenBiosampleService(mockHandler) + + whenReady(service.deleteBiosample(atUri)) { result => + result mustBe true + verify(mockHandler).handle(any[CitizenBiosampleEvent]) + } + } + + "return false when delete finds no record" in { + val mockHandler = mock[CitizenBiosampleEventHandler] + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/nonexistent" + + when(mockHandler.handle(any[CitizenBiosampleEvent])) + .thenReturn(Future.successful(FirehoseResult.NotFound(atUri))) + + val service = new CitizenBiosampleService(mockHandler) + + whenReady(service.deleteBiosample(atUri)) { result => + result mustBe false + } + } + + "translate Error result to exception" in { + val mockHandler = mock[CitizenBiosampleEventHandler] + val cause = new RuntimeException("Database connection failed") + + when(mockHandler.handle(any[CitizenBiosampleEvent])) + .thenReturn(Future.successful(FirehoseResult.Error( + atUri = "at://test", + message = "Database connection failed", + cause = Some(cause) + ))) + + val service = new CitizenBiosampleService(mockHandler) + val request = createRequest() + + whenReady(service.createBiosample(request).failed) { e => + e mustBe cause + } + } + } +} diff --git a/test/services/PDSRegistrationServiceSpec.scala b/test/services/PDSRegistrationServiceSpec.scala new file mode 100644 index 0000000..861a93d --- /dev/null +++ b/test/services/PDSRegistrationServiceSpec.scala @@ -0,0 +1,259 @@ +package services + +import models.PDSRegistration +import org.mockito.ArgumentMatchers.{any, anyString} +import org.mockito.Mockito.{never, verify, when} +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatestplus.mockito.MockitoSugar +import org.scalatest.concurrent.ScalaFutures +import org.scalatestplus.play.PlaySpec +import repositories.PDSRegistrationRepository + +import java.time.ZonedDateTime +import scala.concurrent.{ExecutionContext, Future} + +class PDSRegistrationServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures { + + implicit val ec: ExecutionContext = ExecutionContext.global + + def createMocks(): (ATProtocolClient, PDSRegistrationRepository) = { + (mock[ATProtocolClient], mock[PDSRegistrationRepository]) + } + + "PDSRegistrationService" should { + + "register a new PDS successfully" in { + val (atClient, repo) = createMocks() + + val did = "did:plc:test123" + val handle = "user.bsky.social" + val pdsUrl = "https://pds.example.com" + val rToken = "auth-token" + + // PDS not already registered + when(repo.findByDid(did)) + .thenReturn(Future.successful(None)) + + // AT Protocol verification succeeds + when(atClient.getLatestCommit(pdsUrl, did, rToken)) + .thenReturn(Future.successful(Some(LatestCommitResponse( + cid = "bafyreib123", + rev = "rev-001", + seq = 42L + )))) + + // Repository create succeeds + when(repo.create(any[PDSRegistration])) + .thenAnswer(new Answer[Future[PDSRegistration]] { + override def answer(invocation: InvocationOnMock): Future[PDSRegistration] = { + Future.successful(invocation.getArgument[PDSRegistration](0)) + } + }) + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.registerPDS(did, handle, pdsUrl, rToken)) { result => + result mustBe a[Right[_, _]] + val registration = result.toOption.get + registration.did mustBe did + registration.pdsUrl mustBe pdsUrl + registration.handle mustBe handle + registration.lastCommitCid mustBe Some("bafyreib123") + registration.lastCommitSeq mustBe Some(42L) + + verify(repo).create(any[PDSRegistration]) + } + } + + "fail registration when PDS already registered" in { + val (atClient, repo) = createMocks() + + val did = "did:plc:existing" + val existingRegistration = PDSRegistration( + did = did, + pdsUrl = "https://existing.pds.com", + handle = "existing.user", + lastCommitCid = Some("abc"), + lastCommitSeq = Some(10L), + cursor = 0L, + createdAt = ZonedDateTime.now(), + updatedAt = ZonedDateTime.now() + ) + + when(repo.findByDid(did)) + .thenReturn(Future.successful(Some(existingRegistration))) + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.registerPDS(did, "handle", "https://pds.com", "token")) { result => + result mustBe a[Left[_, _]] + result.left.getOrElse("") must include("already registered") + + verify(atClient, never).getLatestCommit(anyString(), anyString(), anyString()) + verify(repo, never).create(any[PDSRegistration]) + } + } + + "fail registration when AT Protocol verification fails" in { + val (atClient, repo) = createMocks() + + val did = "did:plc:unverifiable" + val pdsUrl = "https://unreachable.pds.com" + + when(repo.findByDid(did)) + .thenReturn(Future.successful(None)) + + // AT Protocol verification fails + when(atClient.getLatestCommit(pdsUrl, did, "token")) + .thenReturn(Future.successful(None)) + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.registerPDS(did, "handle", pdsUrl, "token")) { result => + result mustBe a[Left[_, _]] + result.left.getOrElse("") must include("Failed to verify") + + verify(repo, never).create(any[PDSRegistration]) + } + } + + "retrieve PDS by DID" in { + val (atClient, repo) = createMocks() + + val did = "did:plc:test123" + val registration = PDSRegistration( + did = did, + pdsUrl = "https://pds.example.com", + handle = "user.test", + lastCommitCid = Some("cid"), + lastCommitSeq = Some(100L), + cursor = 50L, + createdAt = ZonedDateTime.now(), + updatedAt = ZonedDateTime.now() + ) + + when(repo.findByDid(did)) + .thenReturn(Future.successful(Some(registration))) + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.getPDSByDid(did)) { result => + result mustBe defined + result.get.did mustBe did + result.get.handle mustBe "user.test" + } + } + + "retrieve PDS by handle" in { + val (atClient, repo) = createMocks() + + val handle = "user.bsky.social" + val registration = PDSRegistration( + did = "did:plc:abc", + pdsUrl = "https://pds.example.com", + handle = handle, + lastCommitCid = None, + lastCommitSeq = None, + cursor = 0L, + createdAt = ZonedDateTime.now(), + updatedAt = ZonedDateTime.now() + ) + + when(repo.findByHandle(handle)) + .thenReturn(Future.successful(Some(registration))) + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.getPDSByHandle(handle)) { result => + result mustBe defined + result.get.handle mustBe handle + } + } + + "list all registered PDS entries" in { + val (atClient, repo) = createMocks() + + val registrations = Seq( + PDSRegistration("did:1", "https://pds1.com", "user1", None, None, 0L, ZonedDateTime.now(), ZonedDateTime.now()), + PDSRegistration("did:2", "https://pds2.com", "user2", None, None, 0L, ZonedDateTime.now(), ZonedDateTime.now()) + ) + + when(repo.listAll) + .thenReturn(Future.successful(registrations)) + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.listAllPDS()) { result => + result must have size 2 + result.map(_.did) must contain allOf("did:1", "did:2") + } + } + + "update PDS cursor successfully" in { + val (atClient, repo) = createMocks() + + val did = "did:plc:test" + val newCid = "newCid123" + val newCursor = 200L + + when(repo.updateCursor(did, newCid, newCursor)) + .thenReturn(Future.successful(1)) + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.updatePDSCursor(did, newCid, newCursor)) { result => + result mustBe Right(()) + verify(repo).updateCursor(did, newCid, newCursor) + } + } + + "fail cursor update when PDS not found" in { + val (atClient, repo) = createMocks() + + val did = "did:plc:nonexistent" + + when(repo.updateCursor(did, "cid", 100L)) + .thenReturn(Future.successful(0)) // No rows affected + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.updatePDSCursor(did, "cid", 100L)) { result => + result mustBe a[Left[_, _]] + result.left.getOrElse("") must include("not found") + } + } + + "delete PDS registration successfully" in { + val (atClient, repo) = createMocks() + + val did = "did:plc:todelete" + + when(repo.delete(did)) + .thenReturn(Future.successful(1)) + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.deletePDS(did)) { result => + result mustBe Right(()) + verify(repo).delete(did) + } + } + + "fail deletion when PDS not found" in { + val (atClient, repo) = createMocks() + + val did = "did:plc:nonexistent" + + when(repo.delete(did)) + .thenReturn(Future.successful(0)) + + val service = new PDSRegistrationService(atClient, repo) + + whenReady(service.deletePDS(did)) { result => + result mustBe a[Left[_, _]] + result.left.getOrElse("") must include("not found") + } + } + } +} diff --git a/test/services/ProjectServiceSpec.scala b/test/services/ProjectServiceSpec.scala new file mode 100644 index 0000000..152b125 --- /dev/null +++ b/test/services/ProjectServiceSpec.scala @@ -0,0 +1,213 @@ +package services + +import models.api.{ProjectRequest, ProjectResponse} +import models.domain.Project +import org.mockito.ArgumentMatchers.{any, anyString} +import org.mockito.Mockito.{never, verify, when} +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatestplus.mockito.MockitoSugar +import org.scalatest.concurrent.ScalaFutures +import org.scalatestplus.play.PlaySpec +import repositories.ProjectRepository + +import java.time.LocalDateTime +import java.util.UUID +import scala.concurrent.{ExecutionContext, Future} + +class ProjectServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures { + + implicit val ec: ExecutionContext = ExecutionContext.global + + def createRequest( + name: String = "Test Project", + description: Option[String] = Some("A test project"), + atUri: Option[String] = Some("at://did:plc:test/com.decodingus.atmosphere.project/rkey1"), + atCid: Option[String] = None + ): ProjectRequest = ProjectRequest( + name = name, + description = description, + atUri = atUri, + atCid = atCid + ) + + "ProjectService" should { + + "create a new project successfully" in { + val mockRepo = mock[ProjectRepository] + + when(mockRepo.create(any[Project])) + .thenAnswer(new Answer[Future[Project]] { + override def answer(invocation: InvocationOnMock): Future[Project] = { + val p = invocation.getArgument[Project](0) + Future.successful(p.copy(id = Some(1))) + } + }) + + val service = new ProjectService(mockRepo) + val request = createRequest() + + whenReady(service.createProject(request)) { response => + response.name mustBe "Test Project" + response.description mustBe Some("A test project") + response.projectGuid mustBe a[UUID] + response.atCid mustBe defined + + verify(mockRepo).create(any[Project]) + } + } + + "update an existing project successfully" in { + val mockRepo = mock[ProjectRepository] + val existingGuid = UUID.randomUUID() + val existingAtCid = "existing-cid-123" + val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/rkey1" + + val existingProject = Project( + id = Some(1), + projectGuid = existingGuid, + name = "Old Name", + description = Some("Old description"), + ownerDid = "did:example:owner", + createdAt = LocalDateTime.now().minusDays(1), + updatedAt = LocalDateTime.now().minusDays(1), + deleted = false, + atUri = Some(atUri), + atCid = Some(existingAtCid) + ) + + when(mockRepo.findByAtUri(atUri)) + .thenReturn(Future.successful(Some(existingProject))) + + when(mockRepo.update(any[Project], any[Option[String]])) + .thenReturn(Future.successful(true)) + + val service = new ProjectService(mockRepo) + val request = createRequest( + name = "Updated Name", + description = Some("Updated description"), + atUri = Some(atUri), + atCid = Some(existingAtCid) + ) + + whenReady(service.updateProject(atUri, request)) { response => + response.name mustBe "Updated Name" + response.description mustBe Some("Updated description") + response.projectGuid mustBe existingGuid + response.atCid mustBe defined + response.atCid must not be Some(existingAtCid) // Should be new CID + + verify(mockRepo).update(any[Project], any[Option[String]]) + } + } + + "fail update with optimistic locking error when atCid mismatch" in { + val mockRepo = mock[ProjectRepository] + val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/rkey1" + + val existingProject = Project( + id = Some(1), + projectGuid = UUID.randomUUID(), + name = "Project", + description = None, + ownerDid = "did:example:owner", + createdAt = LocalDateTime.now(), + updatedAt = LocalDateTime.now(), + deleted = false, + atUri = Some(atUri), + atCid = Some("current-cid") + ) + + when(mockRepo.findByAtUri(atUri)) + .thenReturn(Future.successful(Some(existingProject))) + + val service = new ProjectService(mockRepo) + val request = createRequest(atCid = Some("stale-cid")) + + whenReady(service.updateProject(atUri, request).failed) { e => + e mustBe a[IllegalStateException] + e.getMessage must include("Optimistic locking failure") + + verify(mockRepo, never).update(any[Project], any[Option[String]]) + } + } + + "fail update when project not found" in { + val mockRepo = mock[ProjectRepository] + val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/nonexistent" + + when(mockRepo.findByAtUri(atUri)) + .thenReturn(Future.successful(None)) + + val service = new ProjectService(mockRepo) + val request = createRequest() + + whenReady(service.updateProject(atUri, request).failed) { e => + e mustBe a[NoSuchElementException] + e.getMessage must include("not found") + } + } + + "delete a project successfully" in { + val mockRepo = mock[ProjectRepository] + val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/rkey1" + + when(mockRepo.softDeleteByAtUri(atUri)) + .thenReturn(Future.successful(true)) + + val service = new ProjectService(mockRepo) + + whenReady(service.deleteProject(atUri)) { result => + result mustBe true + verify(mockRepo).softDeleteByAtUri(atUri) + } + } + + "return false when deleting non-existent project" in { + val mockRepo = mock[ProjectRepository] + val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/nonexistent" + + when(mockRepo.softDeleteByAtUri(atUri)) + .thenReturn(Future.successful(false)) + + val service = new ProjectService(mockRepo) + + whenReady(service.deleteProject(atUri)) { result => + result mustBe false + } + } + + "allow update without atCid (no optimistic locking check)" in { + val mockRepo = mock[ProjectRepository] + val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/rkey1" + + val existingProject = Project( + id = Some(1), + projectGuid = UUID.randomUUID(), + name = "Project", + description = None, + ownerDid = "did:example:owner", + createdAt = LocalDateTime.now(), + updatedAt = LocalDateTime.now(), + deleted = false, + atUri = Some(atUri), + atCid = Some("any-cid") + ) + + when(mockRepo.findByAtUri(atUri)) + .thenReturn(Future.successful(Some(existingProject))) + + when(mockRepo.update(any[Project], any[Option[String]])) + .thenReturn(Future.successful(true)) + + val service = new ProjectService(mockRepo) + // Request without atCid - should skip optimistic locking check + val request = createRequest(atCid = None) + + whenReady(service.updateProject(atUri, request)) { response => + response.name mustBe "Test Project" + verify(mockRepo).update(any[Project], any[Option[String]]) + } + } + } +} diff --git a/test/services/firehose/CitizenBiosampleEventHandlerSpec.scala b/test/services/firehose/CitizenBiosampleEventHandlerSpec.scala new file mode 100644 index 0000000..cba203a --- /dev/null +++ b/test/services/firehose/CitizenBiosampleEventHandlerSpec.scala @@ -0,0 +1,506 @@ +package services.firehose + +import com.vividsolutions.jts.geom.Point +import models.api.{ExternalBiosampleRequest, SequenceDataInfo} +import models.domain.genomics.{BiologicalSex, BiosampleType, CitizenBiosample, SpecimenDonor} +import org.mockito.ArgumentMatchers.{any, anyString} +import org.mockito.Mockito.{never, verify, when} +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatestplus.mockito.MockitoSugar +import org.scalatest.concurrent.ScalaFutures +import org.scalatestplus.play.PlaySpec +import repositories._ +import services.BiosampleDataService + +import java.time.LocalDateTime +import java.util.UUID +import scala.concurrent.{ExecutionContext, Future} + +class CitizenBiosampleEventHandlerSpec extends PlaySpec with MockitoSugar with ScalaFutures { + + implicit val ec: ExecutionContext = ExecutionContext.global + + // Helper to create a minimal valid request + def createRequest( + atUri: String = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1", + accession: String = "TEST-001", + donorIdentifier: Option[String] = Some("Subject-001") + ): ExternalBiosampleRequest = ExternalBiosampleRequest( + sampleAccession = accession, + sourceSystem = "test", + description = "Test biosample", + alias = Some("test-alias"), + centerName = "Test Lab", + sex = Some(BiologicalSex.Male), + latitude = None, + longitude = None, + citizenDid = None, + atUri = Some(atUri), + donorIdentifier = donorIdentifier, + donorType = Some(BiosampleType.Citizen), + publication = None, + haplogroups = None, + sequenceData = SequenceDataInfo( + reads = Some(1000000), + readLength = Some(150), + coverage = Some(30.0), + platformName = "ILLUMINA", + testType = "WGS", + files = Seq.empty + ), + atCid = None + ) + + def createMocks(): ( + CitizenBiosampleRepository, + BiosampleDataService, + PublicationRepository, + PublicationCitizenBiosampleRepository, + CitizenBiosampleOriginalHaplogroupRepository, + SpecimenDonorRepository + ) = ( + mock[CitizenBiosampleRepository], + mock[BiosampleDataService], + mock[PublicationRepository], + mock[PublicationCitizenBiosampleRepository], + mock[CitizenBiosampleOriginalHaplogroupRepository], + mock[SpecimenDonorRepository] + ) + + "CitizenBiosampleEventHandler" should { + + "create a new biosample successfully" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val request = createRequest() + val event = CitizenBiosampleEvent.forCreate(request) + + // Mock: no existing biosample with this accession + when(biosampleRepo.findByAccession(anyString())) + .thenReturn(Future.successful(None)) + + // Mock: no existing donor, create new one + when(donorRepo.findByDidAndIdentifier(anyString(), anyString())) + .thenReturn(Future.successful(None)) + + when(donorRepo.create(any[SpecimenDonor])) + .thenAnswer(new Answer[Future[SpecimenDonor]] { + override def answer(invocation: InvocationOnMock): Future[SpecimenDonor] = { + val donor = invocation.getArgument[SpecimenDonor](0) + Future.successful(donor.copy(id = Some(1))) + } + }) + + // Mock: create biosample + when(biosampleRepo.create(any[CitizenBiosample])) + .thenAnswer(new Answer[Future[CitizenBiosample]] { + override def answer(invocation: InvocationOnMock): Future[CitizenBiosample] = { + val bs = invocation.getArgument[CitizenBiosample](0) + Future.successful(bs.copy(id = Some(100))) + } + }) + + // Mock: sequence data handling + when(dataService.addSequenceData(any[UUID], any[SequenceDataInfo])) + .thenReturn(Future.successful(())) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.Success] + val success = result.asInstanceOf[FirehoseResult.Success] + success.sampleGuid mustBe defined + success.newAtCid must not be empty + + verify(donorRepo).create(any[SpecimenDonor]) + verify(biosampleRepo).create(any[CitizenBiosample]) + verify(dataService).addSequenceData(any[UUID], any[SequenceDataInfo]) + } + } + + "return Conflict when accession already exists" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val request = createRequest() + val event = CitizenBiosampleEvent.forCreate(request) + + val existingBiosample = CitizenBiosample( + id = Some(1), + atUri = Some("at://existing"), + accession = Some("TEST-001"), + alias = None, + sourcePlatform = None, + collectionDate = None, + sex = None, + geocoord = None, + description = None, + sampleGuid = UUID.randomUUID(), + deleted = false, + createdAt = LocalDateTime.now(), + updatedAt = LocalDateTime.now() + ) + + when(biosampleRepo.findByAccession("TEST-001")) + .thenReturn(Future.successful(Some(existingBiosample))) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.Conflict] + result.asInstanceOf[FirehoseResult.Conflict].message must include("already exists") + + verify(biosampleRepo, never).create(any[CitizenBiosample]) + } + } + + "reuse existing donor when found" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val request = createRequest() + val event = CitizenBiosampleEvent.forCreate(request) + + when(biosampleRepo.findByAccession(anyString())) + .thenReturn(Future.successful(None)) + + // Existing donor found + val existingDonor = SpecimenDonor( + id = Some(42), + donorIdentifier = "Subject-001", + originBiobank = "Test Lab", + donorType = BiosampleType.Citizen, + sex = None, + geocoord = None, + atUri = Some("did:plc:test123") + ) + when(donorRepo.findByDidAndIdentifier("did:plc:test123", "Subject-001")) + .thenReturn(Future.successful(Some(existingDonor))) + + when(biosampleRepo.create(any[CitizenBiosample])) + .thenAnswer(new Answer[Future[CitizenBiosample]] { + override def answer(invocation: InvocationOnMock): Future[CitizenBiosample] = { + val bs = invocation.getArgument[CitizenBiosample](0) + // Verify the donor ID was set correctly + bs.specimenDonorId mustBe Some(42) + Future.successful(bs.copy(id = Some(100))) + } + }) + + when(dataService.addSequenceData(any[UUID], any[SequenceDataInfo])) + .thenReturn(Future.successful(())) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.Success] + + // Should NOT create a new donor + verify(donorRepo, never).create(any[SpecimenDonor]) + } + } + + "update existing biosample successfully" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val existingGuid = UUID.randomUUID() + val existingAtCid = "existing-cid-123" + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1" + + val existingBiosample = CitizenBiosample( + id = Some(1), + atUri = Some(atUri), + accession = Some("TEST-001"), + alias = Some("old-alias"), + sourcePlatform = Some("old-system"), + collectionDate = None, + sex = Some(BiologicalSex.Male), + geocoord = None, + description = Some("Old description"), + sampleGuid = existingGuid, + deleted = false, + atCid = Some(existingAtCid), + createdAt = LocalDateTime.now().minusDays(1), + updatedAt = LocalDateTime.now().minusDays(1), + specimenDonorId = Some(42) + ) + + // Request without donorIdentifier - should preserve existing donor + val request = createRequest(atUri = atUri, donorIdentifier = None).copy( + description = "Updated description", + alias = Some("new-alias"), + atCid = Some(existingAtCid) + ) + val event = CitizenBiosampleEvent.forUpdate(atUri, request) + + when(biosampleRepo.findByAtUri(atUri)) + .thenReturn(Future.successful(Some(existingBiosample))) + + when(biosampleRepo.update(any[CitizenBiosample], any[Option[String]])) + .thenReturn(Future.successful(true)) + + when(dataService.replaceSequenceData(any[UUID], any[SequenceDataInfo])) + .thenReturn(Future.successful(())) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.Success] + val success = result.asInstanceOf[FirehoseResult.Success] + success.sampleGuid mustBe Some(existingGuid) + + verify(biosampleRepo).update(any[CitizenBiosample], any[Option[String]]) + verify(dataService).replaceSequenceData(any[UUID], any[SequenceDataInfo]) + // Should NOT touch donor repo since donorIdentifier is None + verify(donorRepo, never).findByDidAndIdentifier(anyString(), anyString()) + } + } + + "update biosample with new donor identifier" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val existingGuid = UUID.randomUUID() + val existingAtCid = "existing-cid-123" + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1" + + val existingBiosample = CitizenBiosample( + id = Some(1), + atUri = Some(atUri), + accession = Some("TEST-001"), + alias = Some("old-alias"), + sourcePlatform = Some("old-system"), + collectionDate = None, + sex = Some(BiologicalSex.Male), + geocoord = None, + description = Some("Old description"), + sampleGuid = existingGuid, + deleted = false, + atCid = Some(existingAtCid), + createdAt = LocalDateTime.now().minusDays(1), + updatedAt = LocalDateTime.now().minusDays(1), + specimenDonorId = Some(42) // Old donor + ) + + // Request WITH donorIdentifier - should resolve new donor + val request = createRequest(atUri = atUri, donorIdentifier = Some("NewSubject-002")).copy( + atCid = Some(existingAtCid) + ) + val event = CitizenBiosampleEvent.forUpdate(atUri, request) + + when(biosampleRepo.findByAtUri(atUri)) + .thenReturn(Future.successful(Some(existingBiosample))) + + // New donor needs to be created + when(donorRepo.findByDidAndIdentifier("did:plc:test123", "NewSubject-002")) + .thenReturn(Future.successful(None)) + + when(donorRepo.create(any[SpecimenDonor])) + .thenAnswer(new Answer[Future[SpecimenDonor]] { + override def answer(invocation: InvocationOnMock): Future[SpecimenDonor] = { + val donor = invocation.getArgument[SpecimenDonor](0) + Future.successful(donor.copy(id = Some(99))) // New donor ID + } + }) + + when(biosampleRepo.update(any[CitizenBiosample], any[Option[String]])) + .thenReturn(Future.successful(true)) + + when(dataService.replaceSequenceData(any[UUID], any[SequenceDataInfo])) + .thenReturn(Future.successful(())) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.Success] + verify(donorRepo).findByDidAndIdentifier("did:plc:test123", "NewSubject-002") + verify(donorRepo).create(any[SpecimenDonor]) + } + } + + "return NotFound when updating non-existent biosample" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/nonexistent" + val request = createRequest(atUri = atUri) + val event = CitizenBiosampleEvent.forUpdate(atUri, request) + + when(biosampleRepo.findByAtUri(atUri)) + .thenReturn(Future.successful(None)) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.NotFound] + result.atUri mustBe atUri + } + } + + "return Conflict on optimistic locking failure during update" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1" + val existingBiosample = CitizenBiosample( + id = Some(1), + atUri = Some(atUri), + accession = Some("TEST-001"), + alias = None, + sourcePlatform = None, + collectionDate = None, + sex = None, + geocoord = None, + description = None, + sampleGuid = UUID.randomUUID(), + deleted = false, + atCid = Some("current-cid"), + createdAt = LocalDateTime.now(), + updatedAt = LocalDateTime.now() + ) + + val request = createRequest(atUri = atUri).copy( + atCid = Some("stale-cid") // Different from current + ) + val event = CitizenBiosampleEvent.forUpdate(atUri, request) + + when(biosampleRepo.findByAtUri(atUri)) + .thenReturn(Future.successful(Some(existingBiosample))) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.Conflict] + result.asInstanceOf[FirehoseResult.Conflict].message must include("Optimistic locking") + + verify(biosampleRepo, never).update(any[CitizenBiosample], any[Option[String]]) + } + } + + "delete biosample successfully" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/rkey1" + val event = CitizenBiosampleEvent.forDelete(atUri) + + when(biosampleRepo.softDeleteByAtUri(atUri)) + .thenReturn(Future.successful(true)) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.Success] + verify(biosampleRepo).softDeleteByAtUri(atUri) + } + } + + "return NotFound when deleting non-existent biosample" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val atUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/nonexistent" + val event = CitizenBiosampleEvent.forDelete(atUri) + + when(biosampleRepo.softDeleteByAtUri(atUri)) + .thenReturn(Future.successful(false)) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.NotFound] + } + } + + "extract DID correctly from atUri" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + // Request without explicit citizenDid - should extract from atUri + val request = createRequest( + atUri = "at://did:plc:abc123xyz/com.decodingus.atmosphere.biosample/rkey1" + ).copy(citizenDid = None) + + val event = CitizenBiosampleEvent.forCreate(request) + + when(biosampleRepo.findByAccession(anyString())) + .thenReturn(Future.successful(None)) + + // Verify the DID is extracted correctly by checking the donor lookup + when(donorRepo.findByDidAndIdentifier("did:plc:abc123xyz", "Subject-001")) + .thenReturn(Future.successful(None)) + + when(donorRepo.create(any[SpecimenDonor])) + .thenAnswer(new Answer[Future[SpecimenDonor]] { + override def answer(invocation: InvocationOnMock): Future[SpecimenDonor] = { + val donor = invocation.getArgument[SpecimenDonor](0) + // Verify the atUri on the donor is the extracted DID + donor.atUri mustBe Some("did:plc:abc123xyz") + Future.successful(donor.copy(id = Some(1))) + } + }) + + when(biosampleRepo.create(any[CitizenBiosample])) + .thenAnswer(new Answer[Future[CitizenBiosample]] { + override def answer(invocation: InvocationOnMock): Future[CitizenBiosample] = { + Future.successful(invocation.getArgument[CitizenBiosample](0).copy(id = Some(1))) + } + }) + + when(dataService.addSequenceData(any[UUID], any[SequenceDataInfo])) + .thenReturn(Future.successful(())) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.Success] + verify(donorRepo).findByDidAndIdentifier("did:plc:abc123xyz", "Subject-001") + } + } + + "handle missing donorIdentifier gracefully (no donor created)" in { + val (biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo) = createMocks() + + val request = createRequest(donorIdentifier = None) + val event = CitizenBiosampleEvent.forCreate(request) + + when(biosampleRepo.findByAccession(anyString())) + .thenReturn(Future.successful(None)) + + when(biosampleRepo.create(any[CitizenBiosample])) + .thenAnswer(new Answer[Future[CitizenBiosample]] { + override def answer(invocation: InvocationOnMock): Future[CitizenBiosample] = { + val bs = invocation.getArgument[CitizenBiosample](0) + bs.specimenDonorId mustBe None + Future.successful(bs.copy(id = Some(100))) + } + }) + + when(dataService.addSequenceData(any[UUID], any[SequenceDataInfo])) + .thenReturn(Future.successful(())) + + val handler = new CitizenBiosampleEventHandler( + biosampleRepo, dataService, pubRepo, pubBioRepo, haplogroupRepo, donorRepo + ) + + whenReady(handler.handle(event)) { result => + result mustBe a[FirehoseResult.Success] + verify(donorRepo, never).findByDidAndIdentifier(anyString(), anyString()) + verify(donorRepo, never).create(any[SpecimenDonor]) + } + } + } +}