From 916ec501cc5ede571dfb2a34316ad768cf934f33 Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Thu, 29 Jan 2026 16:47:59 -0500
Subject: [PATCH 01/13] add file models

---
 api/filerecord/__init__.py |   7 ++
 api/filerecord/models.py   | 185 +++++++++++++++++++++++++++++++++++++
 2 files changed, 192 insertions(+)
 create mode 100644 api/filerecord/__init__.py
 create mode 100644 api/filerecord/models.py

diff --git a/api/filerecord/__init__.py b/api/filerecord/__init__.py
new file mode 100644
index 0000000..a238ddd
--- /dev/null
+++ b/api/filerecord/__init__.py
@@ -0,0 +1,7 @@
+"""
+FileRecord module - reusable file metadata records.
+
+This module provides a polymorphic file reference system that can associate
+file metadata (URI, size, hashes, tags) with various entity types like
+QCRecord, Sample, etc.
+"""
diff --git a/api/filerecord/models.py b/api/filerecord/models.py
new file mode 100644
index 0000000..fccb644
--- /dev/null
+++ b/api/filerecord/models.py
@@ -0,0 +1,185 @@
+"""
+FileRecord Models - Reusable file metadata records.
+
+These models provide a polymorphic file reference system that can associate
+file metadata (URI, size, hashes, tags, samples) with various entity types.
+"""
+
+import uuid
+from datetime import datetime
+from enum import Enum
+from typing import List, TYPE_CHECKING
+from sqlmodel import SQLModel, Field, Relationship, UniqueConstraint
+from pydantic import ConfigDict
+
+
+class FileRecordEntityType(str, Enum):
+    """Entity types that can have file records associated."""
+    QCRECORD = "QCRECORD"
+    SAMPLE = "SAMPLE"
+
+
+# ============================================================================
+# Database Tables
+# ============================================================================
+
+
+class FileRecordHash(SQLModel, table=True):
+    """
+    Hash values for file records.
+    Supports multiple hash algorithms (md5, sha256, etag, etc.) per file.
+    """
+    __tablename__ = "filerecordhash"
+
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    file_record_id: uuid.UUID = Field(foreign_key="filerecord.id", nullable=False)
+    algorithm: str = Field(max_length=50, nullable=False)
+    value: str = Field(max_length=128, nullable=False)
+
+    # Relationship back to parent
+    file_record: "FileRecord" = Relationship(back_populates="hashes")
+
+    __table_args__ = (
+        UniqueConstraint("file_record_id", "algorithm", name="uq_filerecordhash_file_algorithm"),
+    )
+
+
+class FileRecordTag(SQLModel, table=True):
+    """
+    Key-value tags for file records.
+    Allows arbitrary metadata to be attached to files.
+    """
+    __tablename__ = "filerecordtag"
+
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    file_record_id: uuid.UUID = Field(foreign_key="filerecord.id", nullable=False)
+    key: str = Field(max_length=255, nullable=False)
+    value: str = Field(nullable=False)
+
+    # Relationship back to parent
+    file_record: "FileRecord" = Relationship(back_populates="tags")
+
+    __table_args__ = (
+        UniqueConstraint("file_record_id", "key", name="uq_filerecordtag_file_key"),
+    )
+
+
+class FileRecordSample(SQLModel, table=True):
+    """
+    Associates samples with a file record.
+    
+    Supports:
+    - 0 rows: workflow-level file (e.g., expression matrix)
+    - 1 row: single-sample file (e.g., BAM file)
+    - N rows: multi-sample file with roles (e.g., tumor/normal VCF)
+    """
+    __tablename__ = "filerecordsample"
+
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    file_record_id: uuid.UUID = Field(foreign_key="filerecord.id", nullable=False)
+    sample_name: str = Field(max_length=255, nullable=False)
+    role: str | None = Field(default=None, max_length=50)  # e.g., "tumor", "normal"
+
+    # Relationship back to parent
+    file_record: "FileRecord" = Relationship(back_populates="samples")
+
+    __table_args__ = (
+        UniqueConstraint("file_record_id", "sample_name", name="uq_filerecordsample_file_sample"),
+    )
+
+
+class FileRecord(SQLModel, table=True):
+    """
+    Metadata record for files stored in external locations (S3, etc.).
+    
+    Uses polymorphic association via entity_type and entity_id to link
+    to parent entities (QCRecord, Sample, etc.) without hard FK constraints.
+    """
+    __tablename__ = "filerecord"
+
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    entity_type: FileRecordEntityType = Field(nullable=False)
+    entity_id: uuid.UUID = Field(nullable=False)
+    uri: str = Field(max_length=1024, nullable=False)
+    size: int | None = Field(default=None)  # File size in bytes
+    created_on: datetime | None = Field(default=None)  # File creation timestamp
+
+    # Relationships to child tables
+    hashes: List["FileRecordHash"] = Relationship(
+        back_populates="file_record",
+        sa_relationship_kwargs={"cascade": "all, delete-orphan"}
+    )
+    tags: List["FileRecordTag"] = Relationship(
+        back_populates="file_record",
+        sa_relationship_kwargs={"cascade": "all, delete-orphan"}
+    )
+    samples: List["FileRecordSample"] = Relationship(
+        back_populates="file_record",
+        sa_relationship_kwargs={"cascade": "all, delete-orphan"}
+    )
+
+    model_config = ConfigDict(from_attributes=True)
+
+
+# ============================================================================
+# Request/Response Models (Pydantic)
+# ============================================================================
+
+
+class HashInput(SQLModel):
+    """Hash input for file creation - key is algorithm, value is hash."""
+    algorithm: str
+    value: str
+
+
+class TagInput(SQLModel):
+    """Tag input for file creation."""
+    key: str
+    value: str
+
+
+class SampleInput(SQLModel):
+    """Sample association input for file creation."""
+    sample_name: str
+    role: str | None = None
+
+
+class FileRecordCreate(SQLModel):
+    """Request model for creating a file record."""
+    uri: str
+    size: int | None = None
+    created_on: datetime | None = None
+    hash: dict[str, str] | None = None  # {"md5": "abc...", "sha256": "def..."}
+    tags: dict[str, str] | None = None  # {"type": "alignment", "format": "bam"}
+    samples: List[SampleInput] | None = None  # Sample associations
+
+    model_config = ConfigDict(extra="forbid")
+
+
+class HashPublic(SQLModel):
+    """Public representation of a file hash."""
+    algorithm: str
+    value: str
+
+
+class TagPublic(SQLModel):
+    """Public representation of a file tag."""
+    key: str
+    value: str
+
+
+class SamplePublic(SQLModel):
+    """Public representation of a sample association."""
+    sample_name: str
+    role: str | None
+
+
+class FileRecordPublic(SQLModel):
+    """Public representation of a file record."""
+    id: uuid.UUID
+    uri: str
+    size: int | None
+    created_on: datetime | None
+    hashes: List[HashPublic]
+    tags: List[TagPublic]
+    samples: List[SamplePublic]

From 7ab85b522a727c2b0b24097c8a4c2a744d5ad4bf Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Thu, 29 Jan 2026 16:49:26 -0500
Subject: [PATCH 02/13] add qcmetric models

---
 api/qcmetrics/__init__.py |   6 +
 api/qcmetrics/models.py   | 241 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 247 insertions(+)
 create mode 100644 api/qcmetrics/__init__.py
 create mode 100644 api/qcmetrics/models.py

diff --git a/api/qcmetrics/__init__.py b/api/qcmetrics/__init__.py
new file mode 100644
index 0000000..2328ac2
--- /dev/null
+++ b/api/qcmetrics/__init__.py
@@ -0,0 +1,6 @@
+"""
+QCMetrics module - Quality control metrics from pipeline executions.
+
+This module provides models and APIs for storing and retrieving QC metrics
+from bioinformatics pipeline runs.
+"""
diff --git a/api/qcmetrics/models.py b/api/qcmetrics/models.py
new file mode 100644
index 0000000..c980850
--- /dev/null
+++ b/api/qcmetrics/models.py
@@ -0,0 +1,241 @@
+"""
+QCMetrics Models - Quality control metrics from pipeline executions.
+
+These models store QC metrics and outputs from bioinformatics pipelines,
+supporting workflow-level, single-sample, and multi-sample (paired) metrics.
+"""
+
+import uuid
+from datetime import datetime, timezone
+from typing import List
+from sqlmodel import SQLModel, Field, Relationship, UniqueConstraint
+from pydantic import ConfigDict
+
+from api.filerecord.models import (
+    FileRecordCreate,
+    FileRecordPublic,
+)
+
+
+# ============================================================================
+# Database Tables
+# ============================================================================
+
+
+class QCRecordMetadata(SQLModel, table=True):
+    """
+    Key-value store for pipeline-level metadata.
+    Examples: pipeline name, version, configuration parameters.
+    """
+    __tablename__ = "qcrecordmetadata"
+
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    qcrecord_id: uuid.UUID = Field(foreign_key="qcrecord.id", nullable=False)
+    key: str = Field(max_length=255, nullable=False)
+    value: str = Field(nullable=False)
+
+    # Relationship back to parent
+    qcrecord: "QCRecord" = Relationship(back_populates="metadata")
+
+    __table_args__ = (
+        UniqueConstraint("qcrecord_id", "key", name="uq_qcrecordmetadata_record_key"),
+    )
+
+
+class QCMetricValue(SQLModel, table=True):
+    """
+    Key-value store for individual metric values within a metric group.
+    Examples: reads=50000000, alignment_rate=95.5, tmb=8.5
+    """
+    __tablename__ = "qcmetricvalue"
+
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    qc_metric_id: uuid.UUID = Field(foreign_key="qcmetric.id", nullable=False)
+    key: str = Field(max_length=255, nullable=False)
+    value: str = Field(nullable=False)
+
+    # Relationship back to parent
+    qc_metric: "QCMetric" = Relationship(back_populates="values")
+
+    __table_args__ = (
+        UniqueConstraint("qc_metric_id", "key", name="uq_qcmetricvalue_metric_key"),
+    )
+
+
+class QCMetricSample(SQLModel, table=True):
+    """
+    Associates samples with a metric group.
+    
+    Supports:
+    - 0 rows: workflow-level metric (e.g., pipeline runtime)
+    - 1 row: single-sample metric (e.g., alignment stats for Sample1)
+    - N rows: multi-sample metric with roles (e.g., tumor/normal somatic variants)
+    """
+    __tablename__ = "qcmetricsample"
+
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    qc_metric_id: uuid.UUID = Field(foreign_key="qcmetric.id", nullable=False)
+    sample_name: str = Field(max_length=255, nullable=False)
+    role: str | None = Field(default=None, max_length=50)  # e.g., "tumor", "normal"
+
+    # Relationship back to parent
+    qc_metric: "QCMetric" = Relationship(back_populates="samples")
+
+    __table_args__ = (
+        UniqueConstraint("qc_metric_id", "sample_name", name="uq_qcmetricsample_metric_sample"),
+    )
+
+
+class QCMetric(SQLModel, table=True):
+    """
+    A named group of metrics within a QC record.
+    
+    Can be workflow-level (no samples), single-sample, or multi-sample (paired).
+    Examples: alignment_stats, somatic_variants, expression_summary
+    """
+    __tablename__ = "qcmetric"
+
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    qcrecord_id: uuid.UUID = Field(foreign_key="qcrecord.id", nullable=False)
+    name: str = Field(max_length=255, nullable=False)
+
+    # Relationships to child tables
+    values: List["QCMetricValue"] = Relationship(
+        back_populates="qc_metric",
+        sa_relationship_kwargs={"cascade": "all, delete-orphan"}
+    )
+    samples: List["QCMetricSample"] = Relationship(
+        back_populates="qc_metric",
+        sa_relationship_kwargs={"cascade": "all, delete-orphan"}
+    )
+
+    # Relationship back to parent
+    qcrecord: "QCRecord" = Relationship(back_populates="metrics")
+
+    __table_args__ = (
+        UniqueConstraint("qcrecord_id", "name", name="uq_qcmetric_record_name"),
+    )
+
+
+class QCRecord(SQLModel, table=True):
+    """
+    Main QC record entity - one per pipeline execution per project.
+    
+    Multiple records per project are allowed for versioning (history).
+    The created_on timestamp differentiates versions.
+    """
+    __tablename__ = "qcrecord"
+    __searchable__ = ["project_id"]
+
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    created_on: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False
+    )
+    created_by: str = Field(max_length=100, nullable=False)
+    project_id: str = Field(max_length=50, nullable=False, index=True)
+
+    # Relationships to child tables
+    metadata: List["QCRecordMetadata"] = Relationship(
+        back_populates="qcrecord",
+        sa_relationship_kwargs={"cascade": "all, delete-orphan"}
+    )
+    metrics: List["QCMetric"] = Relationship(
+        back_populates="qcrecord",
+        sa_relationship_kwargs={"cascade": "all, delete-orphan"}
+    )
+
+    model_config = ConfigDict(from_attributes=True)
+
+
+# ============================================================================
+# Request/Response Models (Pydantic)
+# ============================================================================
+
+
+class MetadataKeyValue(SQLModel):
+    """Key-value pair for metadata."""
+    key: str
+    value: str
+
+
+class MetricValueInput(SQLModel):
+    """Key-value pair for metric values."""
+    key: str
+    value: str
+
+
+class MetricSampleInput(SQLModel):
+    """Sample association input for metrics."""
+    sample_name: str
+    role: str | None = None
+
+
+class MetricInput(SQLModel):
+    """Input model for a metric group."""
+    name: str
+    samples: List[MetricSampleInput] | None = None
+    values: dict[str, str]  # {"reads": "50000000", "alignment_rate": "95.5"}
+
+
+class QCRecordCreate(SQLModel):
+    """
+    Request model for creating a QC record.
+    
+    Accepts both the new explicit format and backward-compatible formats.
+    """
+    project_id: str
+    metadata: dict[str, str] | None = None  # {"pipeline": "RNA-Seq", "version": "2.0"}
+    metrics: List[MetricInput] | None = None  # New format with explicit sample associations
+    sample_level_metrics: dict[str, dict[str, str]] | None = None  # Legacy ES format
+    output_files: List[FileRecordCreate] | None = None
+
+    model_config = ConfigDict(extra="forbid")
+
+
+class MetricValuePublic(SQLModel):
+    """Public representation of a metric value."""
+    key: str
+    value: str
+
+
+class MetricSamplePublic(SQLModel):
+    """Public representation of a sample association."""
+    sample_name: str
+    role: str | None
+
+
+class MetricPublic(SQLModel):
+    """Public representation of a metric group."""
+    name: str
+    samples: List[MetricSamplePublic]
+    values: List[MetricValuePublic]
+
+
+class QCRecordPublic(SQLModel):
+    """Public representation of a QC record."""
+    id: uuid.UUID
+    created_on: datetime
+    created_by: str
+    project_id: str
+    metadata: List[MetadataKeyValue]
+    metrics: List[MetricPublic]
+    output_files: List[FileRecordPublic]
+
+
+class QCRecordsPublic(SQLModel):
+    """Paginated list of QC records."""
+    data: List[QCRecordPublic]
+    total: int
+    page: int
+    per_page: int
+
+
+class QCRecordSearchRequest(SQLModel):
+    """Request model for searching QC records."""
+    filter_on: dict | None = None  # Flexible filtering
+    page: int = 1
+    per_page: int = 100
+    latest: bool = True  # Return only newest version per project
+
+    model_config = ConfigDict(extra="forbid")

From 3b61eb18304418e997ed0286ea7f6b16a4906c2e Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Thu, 29 Jan 2026 18:48:24 -0500
Subject: [PATCH 03/13] add qcmetrics and filerecord migration

---
 ...5e6_add_qcmetrics_and_filerecord_tables.py | 161 ++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py

diff --git a/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py b/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py
new file mode 100644
index 0000000..4c489f2
--- /dev/null
+++ b/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py
@@ -0,0 +1,161 @@
+"""Add QCMetrics and FileRecord tables
+
+Revision ID: f1a2b3c4d5e6
+Revises: e158df5a8df1
+Create Date: 2026-01-29 16:45:00.000000
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'f1a2b3c4d5e6'
+down_revision: Union[str, Sequence[str], None] = 'e158df5a8df1'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Create QCMetrics and FileRecord tables."""
+
+    # ========================================================================
+    # FileRecord Tables (reusable across QCRecord, Sample, etc.)
+    # ========================================================================
+
+    # filerecord - main file metadata table
+    op.create_table(
+        'filerecord',
+        sa.Column('id', sa.Uuid(), nullable=False),
+        sa.Column('entity_type', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=False),
+        sa.Column('entity_id', sa.Uuid(), nullable=False),
+        sa.Column('uri', sqlmodel.sql.sqltypes.AutoString(length=1024), nullable=False),
+        sa.Column('size', sa.BigInteger(), nullable=True),
+        sa.Column('created_on', sa.DateTime(), nullable=True),
+        sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index(
+        'ix_filerecord_entity',
+        'filerecord',
+        ['entity_type', 'entity_id']
+    )
+
+    # filerecordhash - hash values for files
+    op.create_table(
+        'filerecordhash',
+        sa.Column('id', sa.Uuid(), nullable=False),
+        sa.Column('file_record_id', sa.Uuid(), nullable=False),
+        sa.Column('algorithm', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=False),
+        sa.Column('value', sqlmodel.sql.sqltypes.AutoString(length=128), nullable=False),
+        sa.ForeignKeyConstraint(['file_record_id'], ['filerecord.id'], ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id'),
+        sa.UniqueConstraint('file_record_id', 'algorithm', name='uq_filerecordhash_file_algorithm')
+    )
+
+    # filerecordtag - key-value tags for files
+    op.create_table(
+        'filerecordtag',
+        sa.Column('id', sa.Uuid(), nullable=False),
+        sa.Column('file_record_id', sa.Uuid(), nullable=False),
+        sa.Column('key', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
+        sa.Column('value', sa.Text(), nullable=False),
+        sa.ForeignKeyConstraint(['file_record_id'], ['filerecord.id'], ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id'),
+        sa.UniqueConstraint('file_record_id', 'key', name='uq_filerecordtag_file_key')
+    )
+
+    # filerecordsample - sample associations for files
+    op.create_table(
+        'filerecordsample',
+        sa.Column('id', sa.Uuid(), nullable=False),
+        sa.Column('file_record_id', sa.Uuid(), nullable=False),
+        sa.Column('sample_name', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
+        sa.Column('role', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=True),
+        sa.ForeignKeyConstraint(['file_record_id'], ['filerecord.id'], ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id'),
+        sa.UniqueConstraint('file_record_id', 'sample_name', name='uq_filerecordsample_file_sample')
+    )
+
+    # ========================================================================
+    # QCRecord Tables
+    # ========================================================================
+
+    # qcrecord - main QC record table
+    op.create_table(
+        'qcrecord',
+        sa.Column('id', sa.Uuid(), nullable=False),
+        sa.Column('created_on', sa.DateTime(), nullable=False),
+        sa.Column('created_by', sqlmodel.sql.sqltypes.AutoString(length=100), nullable=False),
+        sa.Column('project_id', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=False),
+        sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index('ix_qcrecord_project_id', 'qcrecord', ['project_id'])
+
+    # qcrecordmetadata - pipeline-level metadata
+    op.create_table(
+        'qcrecordmetadata',
+        sa.Column('id', sa.Uuid(), nullable=False),
+        sa.Column('qcrecord_id', sa.Uuid(), nullable=False),
+        sa.Column('key', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
+        sa.Column('value', sa.Text(), nullable=False),
+        sa.ForeignKeyConstraint(['qcrecord_id'], ['qcrecord.id'], ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id'),
+        sa.UniqueConstraint('qcrecord_id', 'key', name='uq_qcrecordmetadata_record_key')
+    )
+
+    # qcmetric - named metric groups
+    op.create_table(
+        'qcmetric',
+        sa.Column('id', sa.Uuid(), nullable=False),
+        sa.Column('qcrecord_id', sa.Uuid(), nullable=False),
+        sa.Column('name', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
+        sa.ForeignKeyConstraint(['qcrecord_id'], ['qcrecord.id'], ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id'),
+        sa.UniqueConstraint('qcrecord_id', 'name', name='uq_qcmetric_record_name')
+    )
+
+    # qcmetricvalue - metric values
+    op.create_table(
+        'qcmetricvalue',
+        sa.Column('id', sa.Uuid(), nullable=False),
+        sa.Column('qc_metric_id', sa.Uuid(), nullable=False),
+        sa.Column('key', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
+        sa.Column('value', sa.Text(), nullable=False),
+        sa.ForeignKeyConstraint(['qc_metric_id'], ['qcmetric.id'], ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id'),
+        sa.UniqueConstraint('qc_metric_id', 'key', name='uq_qcmetricvalue_metric_key')
+    )
+
+    # qcmetricsample - sample associations for metrics
+    op.create_table(
+        'qcmetricsample',
+        sa.Column('id', sa.Uuid(), nullable=False),
+        sa.Column('qc_metric_id', sa.Uuid(), nullable=False),
+        sa.Column('sample_name', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
+        sa.Column('role', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=True),
+        sa.ForeignKeyConstraint(['qc_metric_id'], ['qcmetric.id'], ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id'),
+        sa.UniqueConstraint('qc_metric_id', 'sample_name', name='uq_qcmetricsample_metric_sample')
+    )
+
+
+def downgrade() -> None:
+    """Drop QCMetrics and FileRecord tables."""
+
+    # Drop QCRecord tables (in reverse order of creation)
+    op.drop_table('qcmetricsample')
+    op.drop_table('qcmetricvalue')
+    op.drop_table('qcmetric')
+    op.drop_table('qcrecordmetadata')
+    op.drop_index('ix_qcrecord_project_id', table_name='qcrecord')
+    op.drop_table('qcrecord')
+
+    # Drop FileRecord tables
+    op.drop_table('filerecordsample')
+    op.drop_table('filerecordtag')
+    op.drop_table('filerecordhash')
+    op.drop_index('ix_filerecord_entity', table_name='filerecord')
+    op.drop_table('filerecord')

From f0980fad6847f96b4adf962d171faf152f0e1c07 Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Thu, 29 Jan 2026 19:01:32 -0500
Subject: [PATCH 04/13] add qcrecord CRUD methods

---
 api/qcmetrics/services.py | 480 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 480 insertions(+)
 create mode 100644 api/qcmetrics/services.py

diff --git a/api/qcmetrics/services.py b/api/qcmetrics/services.py
new file mode 100644
index 0000000..c4bd136
--- /dev/null
+++ b/api/qcmetrics/services.py
@@ -0,0 +1,480 @@
+"""
+Services for the QCMetrics API.
+
+Business logic for creating, searching, and deleting QC records.
+"""
+
+import logging
+from datetime import datetime, timezone
+from typing import List
+from fastapi import HTTPException, status
+from sqlmodel import Session, select, col
+from sqlalchemy import func
+
+from api.qcmetrics.models import (
+    QCRecord,
+    QCRecordMetadata,
+    QCMetric,
+    QCMetricValue,
+    QCMetricSample,
+    QCRecordCreate,
+    QCRecordPublic,
+    QCRecordsPublic,
+    MetadataKeyValue,
+    MetricPublic,
+    MetricValuePublic,
+    MetricSamplePublic,
+    MetricInput,
+)
+from api.filerecord.models import (
+    FileRecord,
+    FileRecordHash,
+    FileRecordTag,
+    FileRecordSample,
+    FileRecordEntityType,
+    FileRecordCreate,
+    FileRecordPublic,
+    HashPublic,
+    TagPublic,
+    SamplePublic,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+def create_qcrecord(
+    session: Session,
+    qcrecord_create: QCRecordCreate,
+    created_by: str,
+) -> QCRecordPublic:
+    """
+    Create a new QC record with all associated data.
+    
+    Handles both the new explicit format (metrics with samples) and
+    the legacy ES format (sample_level_metrics dict).
+    """
+    # Check for duplicate record
+    existing = _check_duplicate_record(session, qcrecord_create)
+    if existing:
+        logger.info(
+            "Equivalent QC record already exists for project %s: %s",
+            qcrecord_create.project_id,
+            existing.id
+        )
+        return _qcrecord_to_public(session, existing)
+
+    # Create main QC record
+    qcrecord = QCRecord(
+        created_on=datetime.now(timezone.utc),
+        created_by=created_by,
+        project_id=qcrecord_create.project_id,
+    )
+    session.add(qcrecord)
+    session.flush()  # Get the ID
+
+    # Add metadata
+    if qcrecord_create.metadata:
+        for key, value in qcrecord_create.metadata.items():
+            metadata_entry = QCRecordMetadata(
+                qcrecord_id=qcrecord.id,
+                key=key,
+                value=str(value),
+            )
+            session.add(metadata_entry)
+
+    # Add metrics (new format)
+    if qcrecord_create.metrics:
+        for metric_input in qcrecord_create.metrics:
+            _create_metric(session, qcrecord.id, metric_input)
+
+    # Handle legacy sample_level_metrics format (ES compatibility)
+    if qcrecord_create.sample_level_metrics:
+        for sample_name, metrics_dict in qcrecord_create.sample_level_metrics.items():
+            # Convert to new format: one metric group per sample
+            metric_input = MetricInput(
+                name=f"sample_metrics_{sample_name}",
+                samples=[{"sample_name": sample_name}],
+                values=metrics_dict,
+            )
+            _create_metric(session, qcrecord.id, metric_input)
+
+    # Add output files
+    if qcrecord_create.output_files:
+        for file_create in qcrecord_create.output_files:
+            _create_file_record(
+                session,
+                entity_type=FileRecordEntityType.QCRECORD,
+                entity_id=qcrecord.id,
+                file_create=file_create,
+            )
+
+    session.commit()
+    session.refresh(qcrecord)
+
+    logger.info(
+        "Created QC record %s for project %s by %s",
+        qcrecord.id,
+        qcrecord.project_id,
+        created_by
+    )
+
+    return _qcrecord_to_public(session, qcrecord)
+
+
+def _create_metric(
+    session: Session,
+    qcrecord_id,
+    metric_input: MetricInput,
+) -> QCMetric:
+    """Create a metric group with its samples and values."""
+    metric = QCMetric(
+        qcrecord_id=qcrecord_id,
+        name=metric_input.name,
+    )
+    session.add(metric)
+    session.flush()
+
+    # Add sample associations
+    if metric_input.samples:
+        for sample_input in metric_input.samples:
+            sample_assoc = QCMetricSample(
+                qc_metric_id=metric.id,
+                sample_name=sample_input.sample_name if hasattr(sample_input, 'sample_name') else sample_input['sample_name'],
+                role=sample_input.role if hasattr(sample_input, 'role') else sample_input.get('role'),
+            )
+            session.add(sample_assoc)
+
+    # Add metric values
+    for key, value in metric_input.values.items():
+        metric_value = QCMetricValue(
+            qc_metric_id=metric.id,
+            key=key,
+            value=str(value),
+        )
+        session.add(metric_value)
+
+    return metric
+
+
+def _create_file_record(
+    session: Session,
+    entity_type: FileRecordEntityType,
+    entity_id,
+    file_create: FileRecordCreate,
+) -> FileRecord:
+    """Create a file record with its hashes, tags, and samples."""
+    file_record = FileRecord(
+        entity_type=entity_type,
+        entity_id=entity_id,
+        uri=file_create.uri,
+        size=file_create.size,
+        created_on=file_create.created_on,
+    )
+    session.add(file_record)
+    session.flush()
+
+    # Add hashes
+    if file_create.hash:
+        for algorithm, value in file_create.hash.items():
+            hash_entry = FileRecordHash(
+                file_record_id=file_record.id,
+                algorithm=algorithm,
+                value=value,
+            )
+            session.add(hash_entry)
+
+    # Add tags
+    if file_create.tags:
+        for key, value in file_create.tags.items():
+            tag_entry = FileRecordTag(
+                file_record_id=file_record.id,
+                key=key,
+                value=str(value),
+            )
+            session.add(tag_entry)
+
+    # Add sample associations
+    if file_create.samples:
+        for sample_input in file_create.samples:
+            sample_assoc = FileRecordSample(
+                file_record_id=file_record.id,
+                sample_name=sample_input.sample_name,
+                role=sample_input.role,
+            )
+            session.add(sample_assoc)
+
+    return file_record
+
+
+def _check_duplicate_record(
+    session: Session,
+    qcrecord_create: QCRecordCreate,
+) -> QCRecord | None:
+    """
+    Check if an equivalent QC record already exists.
+    
+    Returns the existing record if found, None otherwise.
+    """
+    # Find existing records for this project
+    stmt = select(QCRecord).where(
+        QCRecord.project_id == qcrecord_create.project_id
+    ).order_by(col(QCRecord.created_on).desc())
+    
+    existing_records = session.exec(stmt).all()
+    
+    if not existing_records:
+        return None
+
+    # For now, just check the latest record
+    # A full comparison would require comparing all nested data
+    # This is a simplified version that checks metadata keys
+    latest = existing_records[0]
+    
+    # Get existing metadata
+    existing_metadata = {
+        m.key: m.value
+        for m in session.exec(
+            select(QCRecordMetadata).where(
+                QCRecordMetadata.qcrecord_id == latest.id
+            )
+        ).all()
+    }
+    
+    # Compare metadata
+    new_metadata = qcrecord_create.metadata or {}
+    if existing_metadata == {k: str(v) for k, v in new_metadata.items()}:
+        # Metadata matches - could do deeper comparison here
+        # For now, consider it a duplicate if metadata matches
+        return latest
+    
+    return None
+
+
+def search_qcrecords(
+    session: Session,
+    filter_on: dict | None = None,
+    page: int = 1,
+    per_page: int = 100,
+    latest: bool = True,
+) -> QCRecordsPublic:
+    """
+    Search for QC records with filtering and pagination.
+    
+    Args:
+        session: Database session
+        filter_on: Dictionary of fields to filter by
+        page: Page number (1-based)
+        per_page: Results per page
+        latest: If True, return only the newest record per project
+    """
+    filter_on = filter_on or {}
+    
+    # Build base query
+    stmt = select(QCRecord)
+    
+    # Apply filters
+    if "project_id" in filter_on:
+        project_ids = filter_on["project_id"]
+        if isinstance(project_ids, list):
+            stmt = stmt.where(col(QCRecord.project_id).in_(project_ids))
+        else:
+            stmt = stmt.where(QCRecord.project_id == project_ids)
+    
+    # Handle metadata filtering
+    if "metadata" in filter_on and isinstance(filter_on["metadata"], dict):
+        for key, value in filter_on["metadata"].items():
+            # Subquery to find QCRecords with matching metadata
+            subq = select(QCRecordMetadata.qcrecord_id).where(
+                QCRecordMetadata.key == key,
+                QCRecordMetadata.value == str(value)
+            )
+            stmt = stmt.where(col(QCRecord.id).in_(subq))
+    
+    # Order by created_on descending
+    stmt = stmt.order_by(col(QCRecord.created_on).desc())
+    
+    # Execute to get all matching records
+    all_records = list(session.exec(stmt).all())
+    
+    # Apply "latest" filter - keep only newest per project
+    if latest:
+        seen_projects = set()
+        filtered_records = []
+        for record in all_records:
+            if record.project_id not in seen_projects:
+                filtered_records.append(record)
+                seen_projects.add(record.project_id)
+        all_records = filtered_records
+    
+    # Calculate pagination
+    total = len(all_records)
+    start_idx = (page - 1) * per_page
+    end_idx = start_idx + per_page
+    paginated_records = all_records[start_idx:end_idx]
+    
+    # Convert to public format
+    data = [_qcrecord_to_public(session, record) for record in paginated_records]
+    
+    return QCRecordsPublic(
+        data=data,
+        total=total,
+        page=page,
+        per_page=per_page,
+    )
+
+
+def get_qcrecord_by_id(session: Session, qcrecord_id: str) -> QCRecordPublic:
+    """Get a single QC record by ID."""
+    import uuid as uuid_module
+    
+    try:
+        record_uuid = uuid_module.UUID(qcrecord_id)
+    except ValueError as exc:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Invalid UUID format: {qcrecord_id}"
+        ) from exc
+    
+    record = session.get(QCRecord, record_uuid)
+    if not record:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"QC record not found: {qcrecord_id}"
+        )
+    
+    return _qcrecord_to_public(session, record)
+
+
+def delete_qcrecord(session: Session, qcrecord_id: str) -> dict:
+    """Delete a QC record and all associated data."""
+    import uuid as uuid_module
+    
+    try:
+        record_uuid = uuid_module.UUID(qcrecord_id)
+    except ValueError as exc:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Invalid UUID format: {qcrecord_id}"
+        ) from exc
+    
+    record = session.get(QCRecord, record_uuid)
+    if not record:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"QC record not found: {qcrecord_id}"
+        )
+    
+    # Delete associated file records (polymorphic, not cascade)
+    file_records = session.exec(
+        select(FileRecord).where(
+            FileRecord.entity_type == FileRecordEntityType.QCRECORD,
+            FileRecord.entity_id == record_uuid
+        )
+    ).all()
+    
+    for file_record in file_records:
+        session.delete(file_record)
+    
+    # Delete the QC record (cascades to metadata, metrics, etc.)
+    session.delete(record)
+    session.commit()
+    
+    logger.info("Deleted QC record %s", qcrecord_id)
+    
+    return {"status": "deleted", "id": qcrecord_id}
+
+
+def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
+    """Convert a QCRecord database object to public format."""
+    # Get metadata
+    metadata_entries = session.exec(
+        select(QCRecordMetadata).where(
+            QCRecordMetadata.qcrecord_id == record.id
+        )
+    ).all()
+    
+    metadata = [
+        MetadataKeyValue(key=m.key, value=m.value)
+        for m in metadata_entries
+    ]
+    
+    # Get metrics
+    metric_entries = session.exec(
+        select(QCMetric).where(QCMetric.qcrecord_id == record.id)
+    ).all()
+    
+    metrics = []
+    for metric in metric_entries:
+        # Get metric values
+        values = session.exec(
+            select(QCMetricValue).where(QCMetricValue.qc_metric_id == metric.id)
+        ).all()
+        
+        # Get metric samples
+        samples = session.exec(
+            select(QCMetricSample).where(QCMetricSample.qc_metric_id == metric.id)
+        ).all()
+        
+        metrics.append(MetricPublic(
+            name=metric.name,
+            samples=[
+                MetricSamplePublic(sample_name=s.sample_name, role=s.role)
+                for s in samples
+            ],
+            values=[
+                MetricValuePublic(key=v.key, value=v.value)
+                for v in values
+            ],
+        ))
+    
+    # Get file records
+    file_records = session.exec(
+        select(FileRecord).where(
+            FileRecord.entity_type == FileRecordEntityType.QCRECORD,
+            FileRecord.entity_id == record.id
+        )
+    ).all()
+    
+    output_files = []
+    for file_record in file_records:
+        # Get hashes
+        hashes = session.exec(
+            select(FileRecordHash).where(
+                FileRecordHash.file_record_id == file_record.id
+            )
+        ).all()
+        
+        # Get tags
+        tags = session.exec(
+            select(FileRecordTag).where(
+                FileRecordTag.file_record_id == file_record.id
+            )
+        ).all()
+        
+        # Get samples
+        samples = session.exec(
+            select(FileRecordSample).where(
+                FileRecordSample.file_record_id == file_record.id
+            )
+        ).all()
+        
+        output_files.append(FileRecordPublic(
+            id=file_record.id,
+            uri=file_record.uri,
+            size=file_record.size,
+            created_on=file_record.created_on,
+            hashes=[HashPublic(algorithm=h.algorithm, value=h.value) for h in hashes],
+            tags=[TagPublic(key=t.key, value=t.value) for t in tags],
+            samples=[SamplePublic(sample_name=s.sample_name, role=s.role) for s in samples],
+        ))
+    
+    return QCRecordPublic(
+        id=record.id,
+        created_on=record.created_on,
+        created_by=record.created_by,
+        project_id=record.project_id,
+        metadata=metadata,
+        metrics=metrics,
+        output_files=output_files,
+    )

From 91407637f8bef75bf5a53559bbb4df4c495067ea Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Thu, 29 Jan 2026 19:02:54 -0500
Subject: [PATCH 05/13] add qcmetrics routes

---
 api/qcmetrics/routes.py | 206 ++++++++++++++++++++++++++++++++++++++++
 main.py                 |   1 +
 2 files changed, 207 insertions(+)
 create mode 100644 api/qcmetrics/routes.py

diff --git a/api/qcmetrics/routes.py b/api/qcmetrics/routes.py
new file mode 100644
index 0000000..29c18d3
--- /dev/null
+++ b/api/qcmetrics/routes.py
@@ -0,0 +1,206 @@
+"""
+Routes/endpoints for the QCMetrics API.
+
+Provides endpoints for creating, searching, and deleting QC records.
+"""
+
+from fastapi import APIRouter, Depends, Query, status
+from typing import Optional
+
+from api.qcmetrics.models import (
+    QCRecordCreate,
+    QCRecordPublic,
+    QCRecordsPublic,
+    QCRecordSearchRequest,
+)
+from api.qcmetrics import services
+from core.deps import SessionDep
+
+router = APIRouter(prefix="/qcmetrics", tags=["QC Metrics"])
+
+
+@router.post(
+    "",
+    response_model=QCRecordPublic,
+    status_code=status.HTTP_201_CREATED,
+    summary="Create a new QC record",
+)
+def create_qcrecord(
+    session: SessionDep,
+    qcrecord_create: QCRecordCreate,
+    created_by: str = Query(
+        ...,
+        description="Username of the person creating this record"
+    ),
+) -> QCRecordPublic:
+    """
+    Create a new QC record with metrics and output files.
+    
+    The record stores quality control metrics from a pipeline execution.
+    
+    **Request body format:**
+    
+    ```json
+    {
+      "project_id": "P-1234",
+      "metadata": {
+        "pipeline": "RNA-Seq",
+        "version": "2.0.0"
+      },
+      "metrics": [
+        {
+          "name": "alignment_stats",
+          "samples": [{"sample_name": "Sample1"}],
+          "values": {"reads": "50000000", "alignment_rate": "95.5"}
+        }
+      ],
+      "output_files": [
+        {
+          "uri": "s3://bucket/path/file.bam",
+          "size": 123456789,
+          "samples": [{"sample_name": "Sample1"}],
+          "hash": {"md5": "abc123..."},
+          "tags": {"type": "alignment"}
+        }
+      ]
+    }
+    ```
+    
+    **Sample association patterns:**
+    - **Workflow-level**: Omit `samples` array (applies to entire pipeline run)
+    - **Single sample**: One entry in `samples` array
+    - **Sample pair**: Two entries with roles, e.g., `[{"sample_name": "T1", "role": "tumor"}, {"sample_name": "N1", "role": "normal"}]`
+    
+    **Duplicate detection:**
+    If an equivalent record already exists for the project (same metadata),
+    the existing record is returned instead of creating a duplicate.
+    """
+    return services.create_qcrecord(session, qcrecord_create, created_by)
+
+
+@router.get(
+    "/search",
+    response_model=QCRecordsPublic,
+    summary="Search QC records (GET)",
+)
+def search_qcrecords_get(
+    session: SessionDep,
+    project_id: Optional[str] = Query(None, description="Filter by project ID"),
+    latest: bool = Query(True, description="Return only newest record per project"),
+    page: int = Query(1, ge=1, description="Page number"),
+    per_page: int = Query(100, ge=1, le=1000, description="Results per page"),
+) -> QCRecordsPublic:
+    """
+    Search QC records using query parameters.
+    
+    **Parameters:**
+    - `project_id`: Filter to specific project(s)
+    - `latest`: If true (default), returns only the most recent QC record per project
+    - `page`: Page number for pagination (starts at 1)
+    - `per_page`: Number of results per page (max 1000)
+    
+    **Example:**
+    ```
+    GET /api/v1/qcmetrics/search?project_id=P-1234&latest=true
+    ```
+    """
+    filter_on = {}
+    if project_id:
+        filter_on["project_id"] = project_id
+    
+    return services.search_qcrecords(
+        session,
+        filter_on=filter_on,
+        page=page,
+        per_page=per_page,
+        latest=latest,
+    )
+
+
+@router.post(
+    "/search",
+    response_model=QCRecordsPublic,
+    summary="Search QC records (POST)",
+)
+def search_qcrecords_post(
+    session: SessionDep,
+    search_request: QCRecordSearchRequest,
+) -> QCRecordsPublic:
+    """
+    Search QC records using a JSON body for advanced filtering.
+    
+    **Request body format:**
+    
+    ```json
+    {
+      "filter_on": {
+        "project_id": "P-1234",
+        "metadata": {
+          "pipeline": "RNA-Seq"
+        }
+      },
+      "page": 1,
+      "per_page": 100,
+      "latest": true
+    }
+    ```
+    
+    **Filter options:**
+    - `project_id`: Single value or list of project IDs
+    - `metadata`: Key-value pairs to match against pipeline metadata
+    
+    **Pagination:**
+    - `page`: Page number (starts at 1)
+    - `per_page`: Results per page (max 1000)
+    
+    **Latest filtering:**
+    - `latest: true` (default): Returns only the newest QC record per project
+    - `latest: false`: Returns all matching records (full history)
+    """
+    return services.search_qcrecords(
+        session,
+        filter_on=search_request.filter_on,
+        page=search_request.page,
+        per_page=search_request.per_page,
+        latest=search_request.latest,
+    )
+
+
+@router.get(
+    "/{qcrecord_id}",
+    response_model=QCRecordPublic,
+    summary="Get QC record by ID",
+)
+def get_qcrecord(
+    session: SessionDep,
+    qcrecord_id: str,
+) -> QCRecordPublic:
+    """
+    Retrieve a specific QC record by its UUID.
+    
+    Returns the full QC record including metadata, metrics, and output files.
+    """
+    return services.get_qcrecord_by_id(session, qcrecord_id)
+
+
+@router.delete(
+    "/{qcrecord_id}",
+    status_code=status.HTTP_200_OK,
+    summary="Delete QC record",
+)
+def delete_qcrecord(
+    session: SessionDep,
+    qcrecord_id: str,
+) -> dict:
+    """
+    Delete a QC record and all associated data.
+    
+    This permanently removes:
+    - The QC record
+    - All associated metadata
+    - All associated metrics and metric values
+    - All associated output file records
+    
+    **Warning:** This action cannot be undone.
+    """
+    return services.delete_qcrecord(session, qcrecord_id)
diff --git a/main.py b/main.py
index d9efa23..37b4192 100644
--- a/main.py
+++ b/main.py
@@ -18,6 +18,7 @@
 from api.vendors.routes import router as vendors_router
 from api.workflow.routes import router as workflow_router
 from api.manifest.routes import router as manifest_router
+from api.qcmetrics.routes import router as qcmetrics_router
 
 
 # Customize route id's

From b4a6ec94470767c8b9b772465b666d6c53abae2f Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Thu, 29 Jan 2026 19:10:58 -0500
Subject: [PATCH 06/13] revise field metadata -> pipeline_metadata to appease
 SQLAlchemy - E   sqlalchemy.exc.InvalidRequestError: Attribute name
 'metadata' is reserved when using the Declarative API.

---
 api/qcmetrics/models.py | 4 ++--
 main.py                 | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/api/qcmetrics/models.py b/api/qcmetrics/models.py
index c980850..c2f909f 100644
--- a/api/qcmetrics/models.py
+++ b/api/qcmetrics/models.py
@@ -35,7 +35,7 @@ class QCRecordMetadata(SQLModel, table=True):
     value: str = Field(nullable=False)
 
     # Relationship back to parent
-    qcrecord: "QCRecord" = Relationship(back_populates="metadata")
+    qcrecord: "QCRecord" = Relationship(back_populates="pipeline_metadata")
 
     __table_args__ = (
         UniqueConstraint("qcrecord_id", "key", name="uq_qcrecordmetadata_record_key"),
@@ -136,7 +136,7 @@ class QCRecord(SQLModel, table=True):
     project_id: str = Field(max_length=50, nullable=False, index=True)
 
     # Relationships to child tables
-    metadata: List["QCRecordMetadata"] = Relationship(
+    pipeline_metadata: List["QCRecordMetadata"] = Relationship(
         back_populates="qcrecord",
         sa_relationship_kwargs={"cascade": "all, delete-orphan"}
     )
diff --git a/main.py b/main.py
index 37b4192..4879ea4 100644
--- a/main.py
+++ b/main.py
@@ -69,6 +69,7 @@ def health_check():
 app.include_router(vendors_router, prefix=API_PREFIX)
 app.include_router(manifest_router, prefix=API_PREFIX)
 app.include_router(workflow_router, prefix=API_PREFIX)
+app.include_router(qcmetrics_router, prefix=API_PREFIX)
 
 
 if __name__ == "__main__":

From 0bdc2a2d24c2bf151c0eff48461556229c5a44f0 Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Thu, 29 Jan 2026 19:11:14 -0500
Subject: [PATCH 07/13] add test suite

---
 tests/api/test_qcmetrics.py | 453 ++++++++++++++++++++++++++++++++++++
 1 file changed, 453 insertions(+)
 create mode 100644 tests/api/test_qcmetrics.py

diff --git a/tests/api/test_qcmetrics.py b/tests/api/test_qcmetrics.py
new file mode 100644
index 0000000..de1d69d
--- /dev/null
+++ b/tests/api/test_qcmetrics.py
@@ -0,0 +1,453 @@
+"""
+Tests for the QCMetrics API.
+"""
+
+from fastapi.testclient import TestClient
+from sqlmodel import Session
+
+
+def test_create_qcrecord_basic(client: TestClient, session: Session):
+    """
+    Test creating a basic QC record with metadata only.
+    """
+    qcrecord_data = {
+        "project_id": "P-TEST-001",
+        "metadata": {
+            "pipeline": "RNA-Seq",
+            "version": "2.0.0"
+        }
+    }
+
+    response = client.post(
+        "/api/v1/qcmetrics?created_by=test_user",
+        json=qcrecord_data
+    )
+    assert response.status_code == 201
+    
+    data = response.json()
+    assert data["project_id"] == "P-TEST-001"
+    assert data["created_by"] == "test_user"
+    assert len(data["metadata"]) == 2
+    
+    # Check metadata values
+    metadata_dict = {m["key"]: m["value"] for m in data["metadata"]}
+    assert metadata_dict["pipeline"] == "RNA-Seq"
+    assert metadata_dict["version"] == "2.0.0"
+
+
+def test_create_qcrecord_with_single_sample_metrics(client: TestClient, session: Session):
+    """
+    Test creating a QC record with single-sample metrics.
+    """
+    qcrecord_data = {
+        "project_id": "P-TEST-002",
+        "metadata": {
+            "pipeline": "WES"
+        },
+        "metrics": [
+            {
+                "name": "alignment_stats",
+                "samples": [{"sample_name": "Sample1"}],
+                "values": {
+                    "total_reads": "50000000",
+                    "mapped_reads": "48500000",
+                    "alignment_rate": "97.0"
+                }
+            }
+        ]
+    }
+
+    response = client.post(
+        "/api/v1/qcmetrics?created_by=test_user",
+        json=qcrecord_data
+    )
+    assert response.status_code == 201
+    
+    data = response.json()
+    assert len(data["metrics"]) == 1
+    
+    metric = data["metrics"][0]
+    assert metric["name"] == "alignment_stats"
+    assert len(metric["samples"]) == 1
+    assert metric["samples"][0]["sample_name"] == "Sample1"
+    
+    # Check metric values
+    values_dict = {v["key"]: v["value"] for v in metric["values"]}
+    assert values_dict["total_reads"] == "50000000"
+    assert values_dict["alignment_rate"] == "97.0"
+
+
+def test_create_qcrecord_with_paired_sample_metrics(client: TestClient, session: Session):
+    """
+    Test creating a QC record with tumor/normal paired metrics.
+    """
+    qcrecord_data = {
+        "project_id": "P-TEST-003",
+        "metadata": {
+            "pipeline": "Somatic"
+        },
+        "metrics": [
+            {
+                "name": "somatic_variants",
+                "samples": [
+                    {"sample_name": "Sample1", "role": "tumor"},
+                    {"sample_name": "Sample2", "role": "normal"}
+                ],
+                "values": {
+                    "snv_count": "15234",
+                    "indel_count": "1523",
+                    "tmb": "8.5"
+                }
+            }
+        ]
+    }
+
+    response = client.post(
+        "/api/v1/qcmetrics?created_by=test_user",
+        json=qcrecord_data
+    )
+    assert response.status_code == 201
+    
+    data = response.json()
+    metric = data["metrics"][0]
+    
+    # Check paired samples with roles
+    assert len(metric["samples"]) == 2
+    samples_by_role = {s["role"]: s["sample_name"] for s in metric["samples"]}
+    assert samples_by_role["tumor"] == "Sample1"
+    assert samples_by_role["normal"] == "Sample2"
+
+
+def test_create_qcrecord_with_workflow_level_metrics(client: TestClient, session: Session):
+    """
+    Test creating a QC record with workflow-level metrics (no samples).
+    """
+    qcrecord_data = {
+        "project_id": "P-TEST-004",
+        "metadata": {
+            "pipeline": "RNA-Seq"
+        },
+        "metrics": [
+            {
+                "name": "pipeline_summary",
+                "values": {
+                    "total_samples_processed": "48",
+                    "samples_passed_qc": "46",
+                    "pipeline_runtime_hours": "12.5"
+                }
+            }
+        ]
+    }
+
+    response = client.post(
+        "/api/v1/qcmetrics?created_by=test_user",
+        json=qcrecord_data
+    )
+    assert response.status_code == 201
+    
+    data = response.json()
+    metric = data["metrics"][0]
+    
+    # Workflow-level metrics have no samples
+    assert len(metric["samples"]) == 0
+    
+    values_dict = {v["key"]: v["value"] for v in metric["values"]}
+    assert values_dict["total_samples_processed"] == "48"
+
+
+def test_create_qcrecord_with_output_files(client: TestClient, session: Session):
+    """
+    Test creating a QC record with output files.
+    """
+    qcrecord_data = {
+        "project_id": "P-TEST-005",
+        "metadata": {
+            "pipeline": "WGS"
+        },
+        "output_files": [
+            {
+                "uri": "s3://bucket/Sample1.bam",
+                "size": 123456789,
+                "samples": [{"sample_name": "Sample1"}],
+                "hash": {"md5": "abc123def456"},
+                "tags": {"type": "alignment", "format": "bam"}
+            },
+            {
+                "uri": "s3://bucket/expression_matrix.tsv",
+                "size": 5678901,
+                "hash": {"sha256": "xyz789"},
+                "tags": {"type": "expression"}
+            }
+        ]
+    }
+
+    response = client.post(
+        "/api/v1/qcmetrics?created_by=test_user",
+        json=qcrecord_data
+    )
+    assert response.status_code == 201
+    
+    data = response.json()
+    assert len(data["output_files"]) == 2
+    
+    # Check first file (single sample)
+    bam_file = next(f for f in data["output_files"] if "bam" in f["uri"])
+    assert bam_file["size"] == 123456789
+    assert len(bam_file["samples"]) == 1
+    assert bam_file["samples"][0]["sample_name"] == "Sample1"
+    
+    # Check hashes
+    hashes_dict = {h["algorithm"]: h["value"] for h in bam_file["hashes"]}
+    assert hashes_dict["md5"] == "abc123def456"
+    
+    # Check tags
+    tags_dict = {t["key"]: t["value"] for t in bam_file["tags"]}
+    assert tags_dict["type"] == "alignment"
+    
+    # Check second file (workflow-level, no samples)
+    matrix_file = next(f for f in data["output_files"] if "matrix" in f["uri"])
+    assert len(matrix_file["samples"]) == 0
+
+
+def test_search_qcrecords_empty(client: TestClient, session: Session):
+    """
+    Test searching QC records when none exist.
+    """
+    response = client.get("/api/v1/qcmetrics/search")
+    assert response.status_code == 200
+    
+    data = response.json()
+    assert data["total"] == 0
+    assert data["data"] == []
+
+
+def test_search_qcrecords_by_project_id(client: TestClient, session: Session):
+    """
+    Test searching QC records by project ID.
+    """
+    # Create a QC record
+    qcrecord_data = {
+        "project_id": "P-SEARCH-001",
+        "metadata": {"pipeline": "RNA-Seq"}
+    }
+    client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data)
+    
+    # Search for it
+    response = client.get("/api/v1/qcmetrics/search?project_id=P-SEARCH-001")
+    assert response.status_code == 200
+    
+    data = response.json()
+    assert data["total"] == 1
+    assert data["data"][0]["project_id"] == "P-SEARCH-001"
+
+
+def test_search_qcrecords_latest_only(client: TestClient, session: Session):
+    """
+    Test that latest=true returns only the newest record per project.
+    """
+    # Create two QC records for the same project
+    qcrecord_data_1 = {
+        "project_id": "P-LATEST-001",
+        "metadata": {"version": "1.0"}
+    }
+    client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data_1)
+    
+    qcrecord_data_2 = {
+        "project_id": "P-LATEST-001",
+        "metadata": {"version": "2.0"}  # Different metadata, so not a duplicate
+    }
+    client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data_2)
+    
+    # Search with latest=true (default)
+    response = client.get("/api/v1/qcmetrics/search?project_id=P-LATEST-001&latest=true")
+    assert response.status_code == 200
+    
+    data = response.json()
+    assert data["total"] == 1
+    
+    # Should be version 2.0 (the latest)
+    metadata_dict = {m["key"]: m["value"] for m in data["data"][0]["metadata"]}
+    assert metadata_dict["version"] == "2.0"
+
+
+def test_search_qcrecords_all_versions(client: TestClient, session: Session):
+    """
+    Test that latest=false returns all versions.
+    """
+    # Create two QC records for the same project
+    qcrecord_data_1 = {
+        "project_id": "P-ALLVER-001",
+        "metadata": {"version": "1.0"}
+    }
+    client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data_1)
+    
+    qcrecord_data_2 = {
+        "project_id": "P-ALLVER-001",
+        "metadata": {"version": "2.0"}
+    }
+    client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data_2)
+    
+    # Search with latest=false
+    response = client.get("/api/v1/qcmetrics/search?project_id=P-ALLVER-001&latest=false")
+    assert response.status_code == 200
+    
+    data = response.json()
+    assert data["total"] == 2
+
+
+def test_search_qcrecords_post_with_metadata_filter(client: TestClient, session: Session):
+    """
+    Test POST search with metadata filtering.
+    """
+    # Create QC records with different pipelines
+    client.post("/api/v1/qcmetrics?created_by=test_user", json={
+        "project_id": "P-META-001",
+        "metadata": {"pipeline": "RNA-Seq"}
+    })
+    client.post("/api/v1/qcmetrics?created_by=test_user", json={
+        "project_id": "P-META-002",
+        "metadata": {"pipeline": "WES"}
+    })
+    
+    # Search for RNA-Seq pipeline only
+    search_request = {
+        "filter_on": {
+            "metadata": {"pipeline": "RNA-Seq"}
+        }
+    }
+    response = client.post("/api/v1/qcmetrics/search", json=search_request)
+    assert response.status_code == 200
+    
+    data = response.json()
+    assert data["total"] == 1
+    assert data["data"][0]["project_id"] == "P-META-001"
+
+
+def test_get_qcrecord_by_id(client: TestClient, session: Session):
+    """
+    Test getting a QC record by its ID.
+    """
+    # Create a QC record
+    create_response = client.post("/api/v1/qcmetrics?created_by=test_user", json={
+        "project_id": "P-GET-001",
+        "metadata": {"pipeline": "RNA-Seq"}
+    })
+    qcrecord_id = create_response.json()["id"]
+    
+    # Get by ID
+    response = client.get(f"/api/v1/qcmetrics/{qcrecord_id}")
+    assert response.status_code == 200
+    
+    data = response.json()
+    assert data["id"] == qcrecord_id
+    assert data["project_id"] == "P-GET-001"
+
+
+def test_get_qcrecord_not_found(client: TestClient, session: Session):
+    """
+    Test getting a non-existent QC record returns 404.
+    """
+    fake_uuid = "00000000-0000-0000-0000-000000000000"
+    response = client.get(f"/api/v1/qcmetrics/{fake_uuid}")
+    assert response.status_code == 404
+
+
+def test_get_qcrecord_invalid_uuid(client: TestClient, session: Session):
+    """
+    Test getting with an invalid UUID format returns 400.
+    """
+    response = client.get("/api/v1/qcmetrics/not-a-uuid")
+    assert response.status_code == 400
+
+
+def test_delete_qcrecord(client: TestClient, session: Session):
+    """
+    Test deleting a QC record.
+    """
+    # Create a QC record
+    create_response = client.post("/api/v1/qcmetrics?created_by=test_user", json={
+        "project_id": "P-DELETE-001",
+        "metadata": {"pipeline": "RNA-Seq"}
+    })
+    qcrecord_id = create_response.json()["id"]
+    
+    # Delete it
+    response = client.delete(f"/api/v1/qcmetrics/{qcrecord_id}")
+    assert response.status_code == 200
+    assert response.json()["status"] == "deleted"
+    
+    # Verify it's gone
+    get_response = client.get(f"/api/v1/qcmetrics/{qcrecord_id}")
+    assert get_response.status_code == 404
+
+
+def test_delete_qcrecord_not_found(client: TestClient, session: Session):
+    """
+    Test deleting a non-existent QC record returns 404.
+    """
+    fake_uuid = "00000000-0000-0000-0000-000000000000"
+    response = client.delete(f"/api/v1/qcmetrics/{fake_uuid}")
+    assert response.status_code == 404
+
+
+def test_duplicate_detection(client: TestClient, session: Session):
+    """
+    Test that equivalent records are detected as duplicates.
+    """
+    qcrecord_data = {
+        "project_id": "P-DUP-001",
+        "metadata": {"pipeline": "RNA-Seq", "version": "2.0"}
+    }
+    
+    # Create first record
+    response1 = client.post("/api/v1/qcmetrics?created_by=user1", json=qcrecord_data)
+    assert response1.status_code == 201
+    id1 = response1.json()["id"]
+    
+    # Try to create identical record
+    response2 = client.post("/api/v1/qcmetrics?created_by=user2", json=qcrecord_data)
+    assert response2.status_code == 201
+    id2 = response2.json()["id"]
+    
+    # Should return the same record (duplicate detection)
+    assert id1 == id2
+
+
+def test_legacy_sample_level_metrics_format(client: TestClient, session: Session):
+    """
+    Test backward compatibility with the legacy ES format (sample_level_metrics).
+    """
+    qcrecord_data = {
+        "project_id": "P-LEGACY-001",
+        "metadata": {"pipeline": "RNA-Seq"},
+        "sample_level_metrics": {
+            "Sample1": {
+                "reads": "50000000",
+                "alignment_rate": "95.5"
+            },
+            "Sample2": {
+                "reads": "45000000",
+                "alignment_rate": "93.2"
+            }
+        }
+    }
+
+    response = client.post(
+        "/api/v1/qcmetrics?created_by=test_user",
+        json=qcrecord_data
+    )
+    assert response.status_code == 201
+    
+    data = response.json()
+    
+    # Legacy format should be converted to metrics
+    assert len(data["metrics"]) == 2
+    
+    # Check that sample names are preserved
+    metric_sample_names = set()
+    for metric in data["metrics"]:
+        for sample in metric["samples"]:
+            metric_sample_names.add(sample["sample_name"])
+    
+    assert "Sample1" in metric_sample_names
+    assert "Sample2" in metric_sample_names

From 4d10433532515734c1805b4b6b41c9282cc3104d Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Fri, 30 Jan 2026 12:12:44 -0500
Subject: [PATCH 08/13] reorder alphabetically for better swagger docs
 navigation

---
 main.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/main.py b/main.py
index 4879ea4..6f5eb3f 100644
--- a/main.py
+++ b/main.py
@@ -10,15 +10,15 @@
 
 from api.files.routes import router as files_router
 from api.jobs.routes import router as jobs_router
+from api.manifest.routes import router as manifest_router
 from api.project.routes import router as project_router
+from api.qcmetrics.routes import router as qcmetrics_router
 from api.runs.routes import router as runs_router
 from api.samples.routes import router as samples_router
 from api.search.routes import router as search_router
 from api.settings.routes import router as settings_router
 from api.vendors.routes import router as vendors_router
 from api.workflow.routes import router as workflow_router
-from api.manifest.routes import router as manifest_router
-from api.qcmetrics.routes import router as qcmetrics_router
 
 
 # Customize route id's
@@ -61,15 +61,15 @@ def health_check():
 
 app.include_router(files_router, prefix=API_PREFIX)
 app.include_router(jobs_router, prefix=API_PREFIX)
+app.include_router(manifest_router, prefix=API_PREFIX)
 app.include_router(project_router, prefix=API_PREFIX)
+app.include_router(qcmetrics_router, prefix=API_PREFIX)
 app.include_router(runs_router, prefix=API_PREFIX)
 app.include_router(samples_router, prefix=API_PREFIX)
 app.include_router(search_router, prefix=API_PREFIX)
 app.include_router(settings_router, prefix=API_PREFIX)
 app.include_router(vendors_router, prefix=API_PREFIX)
-app.include_router(manifest_router, prefix=API_PREFIX)
 app.include_router(workflow_router, prefix=API_PREFIX)
-app.include_router(qcmetrics_router, prefix=API_PREFIX)
 
 
 if __name__ == "__main__":

From 0b0a422cdc4021718a6f291e66d6c7c84f134f2e Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Fri, 30 Jan 2026 12:40:02 -0500
Subject: [PATCH 09/13] linting throughout PR

---
 api/filerecord/models.py    |  6 +--
 api/qcmetrics/models.py     |  8 ++--
 api/qcmetrics/routes.py     | 37 ++++++++--------
 api/qcmetrics/services.py   | 86 ++++++++++++++++++-------------------
 tests/api/test_qcmetrics.py | 76 ++++++++++++++++----------------
 5 files changed, 106 insertions(+), 107 deletions(-)

diff --git a/api/filerecord/models.py b/api/filerecord/models.py
index fccb644..ae7588f 100644
--- a/api/filerecord/models.py
+++ b/api/filerecord/models.py
@@ -8,7 +8,7 @@
 import uuid
 from datetime import datetime
 from enum import Enum
-from typing import List, TYPE_CHECKING
+from typing import List
 from sqlmodel import SQLModel, Field, Relationship, UniqueConstraint
 from pydantic import ConfigDict
 
@@ -67,7 +67,7 @@ class FileRecordTag(SQLModel, table=True):
 class FileRecordSample(SQLModel, table=True):
     """
     Associates samples with a file record.
-    
+
     Supports:
     - 0 rows: workflow-level file (e.g., expression matrix)
     - 1 row: single-sample file (e.g., BAM file)
@@ -91,7 +91,7 @@ class FileRecordSample(SQLModel, table=True):
 class FileRecord(SQLModel, table=True):
     """
     Metadata record for files stored in external locations (S3, etc.).
-    
+
     Uses polymorphic association via entity_type and entity_id to link
     to parent entities (QCRecord, Sample, etc.) without hard FK constraints.
     """
diff --git a/api/qcmetrics/models.py b/api/qcmetrics/models.py
index c2f909f..2a23c5a 100644
--- a/api/qcmetrics/models.py
+++ b/api/qcmetrics/models.py
@@ -65,7 +65,7 @@ class QCMetricValue(SQLModel, table=True):
 class QCMetricSample(SQLModel, table=True):
     """
     Associates samples with a metric group.
-    
+
     Supports:
     - 0 rows: workflow-level metric (e.g., pipeline runtime)
     - 1 row: single-sample metric (e.g., alignment stats for Sample1)
@@ -89,7 +89,7 @@ class QCMetricSample(SQLModel, table=True):
 class QCMetric(SQLModel, table=True):
     """
     A named group of metrics within a QC record.
-    
+
     Can be workflow-level (no samples), single-sample, or multi-sample (paired).
     Examples: alignment_stats, somatic_variants, expression_summary
     """
@@ -120,7 +120,7 @@ class QCMetric(SQLModel, table=True):
 class QCRecord(SQLModel, table=True):
     """
     Main QC record entity - one per pipeline execution per project.
-    
+
     Multiple records per project are allowed for versioning (history).
     The created_on timestamp differentiates versions.
     """
@@ -181,7 +181,7 @@ class MetricInput(SQLModel):
 class QCRecordCreate(SQLModel):
     """
     Request model for creating a QC record.
-    
+
     Accepts both the new explicit format and backward-compatible formats.
     """
     project_id: str
diff --git a/api/qcmetrics/routes.py b/api/qcmetrics/routes.py
index 29c18d3..03a8fb3 100644
--- a/api/qcmetrics/routes.py
+++ b/api/qcmetrics/routes.py
@@ -4,8 +4,8 @@
 Provides endpoints for creating, searching, and deleting QC records.
 """
 
-from fastapi import APIRouter, Depends, Query, status
 from typing import Optional
+from fastapi import APIRouter, Query, status
 
 from api.qcmetrics.models import (
     QCRecordCreate,
@@ -35,11 +35,11 @@ def create_qcrecord(
 ) -> QCRecordPublic:
     """
     Create a new QC record with metrics and output files.
-    
+
     The record stores quality control metrics from a pipeline execution.
-    
+
     **Request body format:**
-    
+
     ```json
     {
       "project_id": "P-1234",
@@ -65,12 +65,13 @@ def create_qcrecord(
       ]
     }
     ```
-    
+
     **Sample association patterns:**
     - **Workflow-level**: Omit `samples` array (applies to entire pipeline run)
     - **Single sample**: One entry in `samples` array
-    - **Sample pair**: Two entries with roles, e.g., `[{"sample_name": "T1", "role": "tumor"}, {"sample_name": "N1", "role": "normal"}]`
-    
+    - **Sample pair**: Two entries with roles, e.g.,
+        `[{"sample_name": "T1", "role": "tumor"}, {"sample_name": "N1", "role": "normal"}]`
+
     **Duplicate detection:**
     If an equivalent record already exists for the project (same metadata),
     the existing record is returned instead of creating a duplicate.
@@ -92,13 +93,13 @@ def search_qcrecords_get(
 ) -> QCRecordsPublic:
     """
     Search QC records using query parameters.
-    
+
     **Parameters:**
     - `project_id`: Filter to specific project(s)
     - `latest`: If true (default), returns only the most recent QC record per project
     - `page`: Page number for pagination (starts at 1)
     - `per_page`: Number of results per page (max 1000)
-    
+
     **Example:**
     ```
     GET /api/v1/qcmetrics/search?project_id=P-1234&latest=true
@@ -107,7 +108,7 @@ def search_qcrecords_get(
     filter_on = {}
     if project_id:
         filter_on["project_id"] = project_id
-    
+
     return services.search_qcrecords(
         session,
         filter_on=filter_on,
@@ -128,9 +129,9 @@ def search_qcrecords_post(
 ) -> QCRecordsPublic:
     """
     Search QC records using a JSON body for advanced filtering.
-    
+
     **Request body format:**
-    
+
     ```json
     {
       "filter_on": {
@@ -144,15 +145,15 @@ def search_qcrecords_post(
       "latest": true
     }
     ```
-    
+
     **Filter options:**
     - `project_id`: Single value or list of project IDs
     - `metadata`: Key-value pairs to match against pipeline metadata
-    
+
     **Pagination:**
     - `page`: Page number (starts at 1)
     - `per_page`: Results per page (max 1000)
-    
+
     **Latest filtering:**
     - `latest: true` (default): Returns only the newest QC record per project
     - `latest: false`: Returns all matching records (full history)
@@ -177,7 +178,7 @@ def get_qcrecord(
 ) -> QCRecordPublic:
     """
     Retrieve a specific QC record by its UUID.
-    
+
     Returns the full QC record including metadata, metrics, and output files.
     """
     return services.get_qcrecord_by_id(session, qcrecord_id)
@@ -194,13 +195,13 @@ def delete_qcrecord(
 ) -> dict:
     """
     Delete a QC record and all associated data.
-    
+
     This permanently removes:
     - The QC record
     - All associated metadata
     - All associated metrics and metric values
     - All associated output file records
-    
+
     **Warning:** This action cannot be undone.
     """
     return services.delete_qcrecord(session, qcrecord_id)
diff --git a/api/qcmetrics/services.py b/api/qcmetrics/services.py
index c4bd136..aad81e9 100644
--- a/api/qcmetrics/services.py
+++ b/api/qcmetrics/services.py
@@ -6,10 +6,9 @@
 
 import logging
 from datetime import datetime, timezone
-from typing import List
+import uuid as uuid_module
 from fastapi import HTTPException, status
 from sqlmodel import Session, select, col
-from sqlalchemy import func
 
 from api.qcmetrics.models import (
     QCRecord,
@@ -50,7 +49,7 @@ def create_qcrecord(
 ) -> QCRecordPublic:
     """
     Create a new QC record with all associated data.
-    
+
     Handles both the new explicit format (metrics with samples) and
     the legacy ES format (sample_level_metrics dict).
     """
@@ -140,8 +139,10 @@ def _create_metric(
         for sample_input in metric_input.samples:
             sample_assoc = QCMetricSample(
                 qc_metric_id=metric.id,
-                sample_name=sample_input.sample_name if hasattr(sample_input, 'sample_name') else sample_input['sample_name'],
-                role=sample_input.role if hasattr(sample_input, 'role') else sample_input.get('role'),
+                sample_name=sample_input.sample_name if hasattr(sample_input, 'sample_name')
+                else sample_input['sample_name'],
+                role=sample_input.role if hasattr(sample_input, 'role')
+                else sample_input.get('role'),
             )
             session.add(sample_assoc)
 
@@ -213,16 +214,16 @@ def _check_duplicate_record(
 ) -> QCRecord | None:
     """
     Check if an equivalent QC record already exists.
-    
+
     Returns the existing record if found, None otherwise.
     """
     # Find existing records for this project
     stmt = select(QCRecord).where(
         QCRecord.project_id == qcrecord_create.project_id
     ).order_by(col(QCRecord.created_on).desc())
-    
+
     existing_records = session.exec(stmt).all()
-    
+
     if not existing_records:
         return None
 
@@ -230,7 +231,7 @@ def _check_duplicate_record(
     # A full comparison would require comparing all nested data
     # This is a simplified version that checks metadata keys
     latest = existing_records[0]
-    
+
     # Get existing metadata
     existing_metadata = {
         m.key: m.value
@@ -240,14 +241,14 @@ def _check_duplicate_record(
             )
         ).all()
     }
-    
+
     # Compare metadata
     new_metadata = qcrecord_create.metadata or {}
     if existing_metadata == {k: str(v) for k, v in new_metadata.items()}:
         # Metadata matches - could do deeper comparison here
         # For now, consider it a duplicate if metadata matches
         return latest
-    
+
     return None
 
 
@@ -260,7 +261,7 @@ def search_qcrecords(
 ) -> QCRecordsPublic:
     """
     Search for QC records with filtering and pagination.
-    
+
     Args:
         session: Database session
         filter_on: Dictionary of fields to filter by
@@ -269,10 +270,10 @@ def search_qcrecords(
         latest: If True, return only the newest record per project
     """
     filter_on = filter_on or {}
-    
+
     # Build base query
     stmt = select(QCRecord)
-    
+
     # Apply filters
     if "project_id" in filter_on:
         project_ids = filter_on["project_id"]
@@ -280,7 +281,7 @@ def search_qcrecords(
             stmt = stmt.where(col(QCRecord.project_id).in_(project_ids))
         else:
             stmt = stmt.where(QCRecord.project_id == project_ids)
-    
+
     # Handle metadata filtering
     if "metadata" in filter_on and isinstance(filter_on["metadata"], dict):
         for key, value in filter_on["metadata"].items():
@@ -290,13 +291,13 @@ def search_qcrecords(
                 QCRecordMetadata.value == str(value)
             )
             stmt = stmt.where(col(QCRecord.id).in_(subq))
-    
+
     # Order by created_on descending
     stmt = stmt.order_by(col(QCRecord.created_on).desc())
-    
+
     # Execute to get all matching records
     all_records = list(session.exec(stmt).all())
-    
+
     # Apply "latest" filter - keep only newest per project
     if latest:
         seen_projects = set()
@@ -306,16 +307,16 @@ def search_qcrecords(
                 filtered_records.append(record)
                 seen_projects.add(record.project_id)
         all_records = filtered_records
-    
+
     # Calculate pagination
     total = len(all_records)
     start_idx = (page - 1) * per_page
     end_idx = start_idx + per_page
     paginated_records = all_records[start_idx:end_idx]
-    
+
     # Convert to public format
     data = [_qcrecord_to_public(session, record) for record in paginated_records]
-    
+
     return QCRecordsPublic(
         data=data,
         total=total,
@@ -326,8 +327,7 @@ def search_qcrecords(
 
 def get_qcrecord_by_id(session: Session, qcrecord_id: str) -> QCRecordPublic:
     """Get a single QC record by ID."""
-    import uuid as uuid_module
-    
+
     try:
         record_uuid = uuid_module.UUID(qcrecord_id)
     except ValueError as exc:
@@ -335,21 +335,19 @@ def get_qcrecord_by_id(session: Session, qcrecord_id: str) -> QCRecordPublic:
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=f"Invalid UUID format: {qcrecord_id}"
         ) from exc
-    
+
     record = session.get(QCRecord, record_uuid)
     if not record:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"QC record not found: {qcrecord_id}"
         )
-    
+
     return _qcrecord_to_public(session, record)
 
 
 def delete_qcrecord(session: Session, qcrecord_id: str) -> dict:
     """Delete a QC record and all associated data."""
-    import uuid as uuid_module
-    
     try:
         record_uuid = uuid_module.UUID(qcrecord_id)
     except ValueError as exc:
@@ -357,14 +355,14 @@ def delete_qcrecord(session: Session, qcrecord_id: str) -> dict:
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=f"Invalid UUID format: {qcrecord_id}"
         ) from exc
-    
+
     record = session.get(QCRecord, record_uuid)
     if not record:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"QC record not found: {qcrecord_id}"
         )
-    
+
     # Delete associated file records (polymorphic, not cascade)
     file_records = session.exec(
         select(FileRecord).where(
@@ -372,16 +370,16 @@ def delete_qcrecord(session: Session, qcrecord_id: str) -> dict:
             FileRecord.entity_id == record_uuid
         )
     ).all()
-    
+
     for file_record in file_records:
         session.delete(file_record)
-    
+
     # Delete the QC record (cascades to metadata, metrics, etc.)
     session.delete(record)
     session.commit()
-    
+
     logger.info("Deleted QC record %s", qcrecord_id)
-    
+
     return {"status": "deleted", "id": qcrecord_id}
 
 
@@ -393,29 +391,29 @@ def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
             QCRecordMetadata.qcrecord_id == record.id
         )
     ).all()
-    
+
     metadata = [
         MetadataKeyValue(key=m.key, value=m.value)
         for m in metadata_entries
     ]
-    
+
     # Get metrics
     metric_entries = session.exec(
         select(QCMetric).where(QCMetric.qcrecord_id == record.id)
     ).all()
-    
+
     metrics = []
     for metric in metric_entries:
         # Get metric values
         values = session.exec(
             select(QCMetricValue).where(QCMetricValue.qc_metric_id == metric.id)
         ).all()
-        
+
         # Get metric samples
         samples = session.exec(
             select(QCMetricSample).where(QCMetricSample.qc_metric_id == metric.id)
         ).all()
-        
+
         metrics.append(MetricPublic(
             name=metric.name,
             samples=[
@@ -427,7 +425,7 @@ def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
                 for v in values
             ],
         ))
-    
+
     # Get file records
     file_records = session.exec(
         select(FileRecord).where(
@@ -435,7 +433,7 @@ def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
             FileRecord.entity_id == record.id
         )
     ).all()
-    
+
     output_files = []
     for file_record in file_records:
         # Get hashes
@@ -444,21 +442,21 @@ def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
                 FileRecordHash.file_record_id == file_record.id
             )
         ).all()
-        
+
         # Get tags
         tags = session.exec(
             select(FileRecordTag).where(
                 FileRecordTag.file_record_id == file_record.id
             )
         ).all()
-        
+
         # Get samples
         samples = session.exec(
             select(FileRecordSample).where(
                 FileRecordSample.file_record_id == file_record.id
             )
         ).all()
-        
+
         output_files.append(FileRecordPublic(
             id=file_record.id,
             uri=file_record.uri,
@@ -468,7 +466,7 @@ def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
             tags=[TagPublic(key=t.key, value=t.value) for t in tags],
             samples=[SamplePublic(sample_name=s.sample_name, role=s.role) for s in samples],
         ))
-    
+
     return QCRecordPublic(
         id=record.id,
         created_on=record.created_on,
diff --git a/tests/api/test_qcmetrics.py b/tests/api/test_qcmetrics.py
index de1d69d..c422254 100644
--- a/tests/api/test_qcmetrics.py
+++ b/tests/api/test_qcmetrics.py
@@ -23,12 +23,12 @@ def test_create_qcrecord_basic(client: TestClient, session: Session):
         json=qcrecord_data
     )
     assert response.status_code == 201
-    
+
     data = response.json()
     assert data["project_id"] == "P-TEST-001"
     assert data["created_by"] == "test_user"
     assert len(data["metadata"]) == 2
-    
+
     # Check metadata values
     metadata_dict = {m["key"]: m["value"] for m in data["metadata"]}
     assert metadata_dict["pipeline"] == "RNA-Seq"
@@ -62,15 +62,15 @@ def test_create_qcrecord_with_single_sample_metrics(client: TestClient, session:
         json=qcrecord_data
     )
     assert response.status_code == 201
-    
+
     data = response.json()
     assert len(data["metrics"]) == 1
-    
+
     metric = data["metrics"][0]
     assert metric["name"] == "alignment_stats"
     assert len(metric["samples"]) == 1
     assert metric["samples"][0]["sample_name"] == "Sample1"
-    
+
     # Check metric values
     values_dict = {v["key"]: v["value"] for v in metric["values"]}
     assert values_dict["total_reads"] == "50000000"
@@ -107,10 +107,10 @@ def test_create_qcrecord_with_paired_sample_metrics(client: TestClient, session:
         json=qcrecord_data
     )
     assert response.status_code == 201
-    
+
     data = response.json()
     metric = data["metrics"][0]
-    
+
     # Check paired samples with roles
     assert len(metric["samples"]) == 2
     samples_by_role = {s["role"]: s["sample_name"] for s in metric["samples"]}
@@ -144,13 +144,13 @@ def test_create_qcrecord_with_workflow_level_metrics(client: TestClient, session
         json=qcrecord_data
     )
     assert response.status_code == 201
-    
+
     data = response.json()
     metric = data["metrics"][0]
-    
+
     # Workflow-level metrics have no samples
     assert len(metric["samples"]) == 0
-    
+
     values_dict = {v["key"]: v["value"] for v in metric["values"]}
     assert values_dict["total_samples_processed"] == "48"
 
@@ -186,24 +186,24 @@ def test_create_qcrecord_with_output_files(client: TestClient, session: Session)
         json=qcrecord_data
     )
     assert response.status_code == 201
-    
+
     data = response.json()
     assert len(data["output_files"]) == 2
-    
+
     # Check first file (single sample)
     bam_file = next(f for f in data["output_files"] if "bam" in f["uri"])
     assert bam_file["size"] == 123456789
     assert len(bam_file["samples"]) == 1
     assert bam_file["samples"][0]["sample_name"] == "Sample1"
-    
+
     # Check hashes
     hashes_dict = {h["algorithm"]: h["value"] for h in bam_file["hashes"]}
     assert hashes_dict["md5"] == "abc123def456"
-    
+
     # Check tags
     tags_dict = {t["key"]: t["value"] for t in bam_file["tags"]}
     assert tags_dict["type"] == "alignment"
-    
+
     # Check second file (workflow-level, no samples)
     matrix_file = next(f for f in data["output_files"] if "matrix" in f["uri"])
     assert len(matrix_file["samples"]) == 0
@@ -215,7 +215,7 @@ def test_search_qcrecords_empty(client: TestClient, session: Session):
     """
     response = client.get("/api/v1/qcmetrics/search")
     assert response.status_code == 200
-    
+
     data = response.json()
     assert data["total"] == 0
     assert data["data"] == []
@@ -231,11 +231,11 @@ def test_search_qcrecords_by_project_id(client: TestClient, session: Session):
         "metadata": {"pipeline": "RNA-Seq"}
     }
     client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data)
-    
+
     # Search for it
     response = client.get("/api/v1/qcmetrics/search?project_id=P-SEARCH-001")
     assert response.status_code == 200
-    
+
     data = response.json()
     assert data["total"] == 1
     assert data["data"][0]["project_id"] == "P-SEARCH-001"
@@ -251,20 +251,20 @@ def test_search_qcrecords_latest_only(client: TestClient, session: Session):
         "metadata": {"version": "1.0"}
     }
     client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data_1)
-    
+
     qcrecord_data_2 = {
         "project_id": "P-LATEST-001",
         "metadata": {"version": "2.0"}  # Different metadata, so not a duplicate
     }
     client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data_2)
-    
+
     # Search with latest=true (default)
     response = client.get("/api/v1/qcmetrics/search?project_id=P-LATEST-001&latest=true")
     assert response.status_code == 200
-    
+
     data = response.json()
     assert data["total"] == 1
-    
+
     # Should be version 2.0 (the latest)
     metadata_dict = {m["key"]: m["value"] for m in data["data"][0]["metadata"]}
     assert metadata_dict["version"] == "2.0"
@@ -280,17 +280,17 @@ def test_search_qcrecords_all_versions(client: TestClient, session: Session):
         "metadata": {"version": "1.0"}
     }
     client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data_1)
-    
+
     qcrecord_data_2 = {
         "project_id": "P-ALLVER-001",
         "metadata": {"version": "2.0"}
     }
     client.post("/api/v1/qcmetrics?created_by=test_user", json=qcrecord_data_2)
-    
+
     # Search with latest=false
     response = client.get("/api/v1/qcmetrics/search?project_id=P-ALLVER-001&latest=false")
     assert response.status_code == 200
-    
+
     data = response.json()
     assert data["total"] == 2
 
@@ -308,7 +308,7 @@ def test_search_qcrecords_post_with_metadata_filter(client: TestClient, session:
         "project_id": "P-META-002",
         "metadata": {"pipeline": "WES"}
     })
-    
+
     # Search for RNA-Seq pipeline only
     search_request = {
         "filter_on": {
@@ -317,7 +317,7 @@ def test_search_qcrecords_post_with_metadata_filter(client: TestClient, session:
     }
     response = client.post("/api/v1/qcmetrics/search", json=search_request)
     assert response.status_code == 200
-    
+
     data = response.json()
     assert data["total"] == 1
     assert data["data"][0]["project_id"] == "P-META-001"
@@ -333,11 +333,11 @@ def test_get_qcrecord_by_id(client: TestClient, session: Session):
         "metadata": {"pipeline": "RNA-Seq"}
     })
     qcrecord_id = create_response.json()["id"]
-    
+
     # Get by ID
     response = client.get(f"/api/v1/qcmetrics/{qcrecord_id}")
     assert response.status_code == 200
-    
+
     data = response.json()
     assert data["id"] == qcrecord_id
     assert data["project_id"] == "P-GET-001"
@@ -370,12 +370,12 @@ def test_delete_qcrecord(client: TestClient, session: Session):
         "metadata": {"pipeline": "RNA-Seq"}
     })
     qcrecord_id = create_response.json()["id"]
-    
+
     # Delete it
     response = client.delete(f"/api/v1/qcmetrics/{qcrecord_id}")
     assert response.status_code == 200
     assert response.json()["status"] == "deleted"
-    
+
     # Verify it's gone
     get_response = client.get(f"/api/v1/qcmetrics/{qcrecord_id}")
     assert get_response.status_code == 404
@@ -398,17 +398,17 @@ def test_duplicate_detection(client: TestClient, session: Session):
         "project_id": "P-DUP-001",
         "metadata": {"pipeline": "RNA-Seq", "version": "2.0"}
     }
-    
+
     # Create first record
     response1 = client.post("/api/v1/qcmetrics?created_by=user1", json=qcrecord_data)
     assert response1.status_code == 201
     id1 = response1.json()["id"]
-    
+
     # Try to create identical record
     response2 = client.post("/api/v1/qcmetrics?created_by=user2", json=qcrecord_data)
     assert response2.status_code == 201
     id2 = response2.json()["id"]
-    
+
     # Should return the same record (duplicate detection)
     assert id1 == id2
 
@@ -437,17 +437,17 @@ def test_legacy_sample_level_metrics_format(client: TestClient, session: Session
         json=qcrecord_data
     )
     assert response.status_code == 201
-    
+
     data = response.json()
-    
+
     # Legacy format should be converted to metrics
     assert len(data["metrics"]) == 2
-    
+
     # Check that sample names are preserved
     metric_sample_names = set()
     for metric in data["metrics"]:
         for sample in metric["samples"]:
             metric_sample_names.add(sample["sample_name"])
-    
+
     assert "Sample1" in metric_sample_names
     assert "Sample2" in metric_sample_names

From 4d609bc11d17138d99d247ee96cdf00c1e5e6d92 Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Fri, 30 Jan 2026 12:57:20 -0500
Subject: [PATCH 10/13] add comment re: created_by field which should be taken
 from auth token when this is implemented

---
 api/qcmetrics/routes.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/api/qcmetrics/routes.py b/api/qcmetrics/routes.py
index 03a8fb3..58e2be1 100644
--- a/api/qcmetrics/routes.py
+++ b/api/qcmetrics/routes.py
@@ -38,6 +38,9 @@ def create_qcrecord(
 
     The record stores quality control metrics from a pipeline execution.
 
+    Right now - created_by is just a string username passed as a query parameter; once
+    authentication is in place, this will be derived from the logged-in user.
+
     **Request body format:**
 
     ```json

From dc04f2bbe8b401995b04a3761880d55eb3cd7a8a Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Fri, 30 Jan 2026 14:50:59 -0500
Subject: [PATCH 11/13] rm handling legacy sample_level_metrics to unify;
 correctly handle numeric types

---
 api/qcmetrics/models.py     |  8 ++--
 api/qcmetrics/routes.py     | 34 +++++++++++++-
 api/qcmetrics/services.py   | 17 ++-----
 tests/api/test_qcmetrics.py | 88 ++++++++++++++++++++++++++++---------
 4 files changed, 106 insertions(+), 41 deletions(-)

diff --git a/api/qcmetrics/models.py b/api/qcmetrics/models.py
index 2a23c5a..4f112d4 100644
--- a/api/qcmetrics/models.py
+++ b/api/qcmetrics/models.py
@@ -175,19 +175,19 @@ class MetricInput(SQLModel):
     """Input model for a metric group."""
     name: str
     samples: List[MetricSampleInput] | None = None
-    values: dict[str, str]  # {"reads": "50000000", "alignment_rate": "95.5"}
+    values: dict[str, str | int | float]  # {"reads": 50000000, "alignment_rate": 95.5}
 
 
 class QCRecordCreate(SQLModel):
     """
     Request model for creating a QC record.
 
-    Accepts both the new explicit format and backward-compatible formats.
+    Uses the explicit metrics format with sample associations supporting
+    workflow-level, single-sample, and paired-sample (tumor/normal) metrics.
     """
     project_id: str
     metadata: dict[str, str] | None = None  # {"pipeline": "RNA-Seq", "version": "2.0"}
-    metrics: List[MetricInput] | None = None  # New format with explicit sample associations
-    sample_level_metrics: dict[str, dict[str, str]] | None = None  # Legacy ES format
+    metrics: List[MetricInput] | None = None  # Metrics with explicit sample associations
     output_files: List[FileRecordCreate] | None = None
 
     model_config = ConfigDict(extra="forbid")
diff --git a/api/qcmetrics/routes.py b/api/qcmetrics/routes.py
index 58e2be1..9c94b84 100644
--- a/api/qcmetrics/routes.py
+++ b/api/qcmetrics/routes.py
@@ -38,8 +38,38 @@ def create_qcrecord(
 
     The record stores quality control metrics from a pipeline execution.
 
-    Right now - created_by is just a string username passed as a query parameter; once
-    authentication is in place, this will be derived from the logged-in user.
+    **Note:** Right now `created_by` is just a string username passed as a query parameter;
+    once authentication is in place, this will be derived from the logged-in user.
+
+    **Example curl command:**
+
+    ```bash
+    curl -X POST "http://localhost:8000/api/v1/qcmetrics?created_by=jsmith" \\
+      -H "Content-Type: application/json" \\
+      -d '{
+        "project_id": "P-1234",
+        "metadata": {
+          "pipeline": "RNA-Seq",
+          "version": "2.0.0"
+        },
+        "metrics": [
+          {
+            "name": "alignment_stats",
+            "samples": [{"sample_name": "Sample1"}],
+            "values": {"reads": "50000000", "alignment_rate": "95.5"}
+          }
+        ],
+        "output_files": [
+          {
+            "uri": "s3://bucket/path/file.bam",
+            "size": 123456789,
+            "samples": [{"sample_name": "Sample1"}],
+            "hash": {"md5": "abc123def456"},
+            "tags": {"type": "alignment"}
+          }
+        ]
+      }'
+    ```
 
     **Request body format:**
 
diff --git a/api/qcmetrics/services.py b/api/qcmetrics/services.py
index aad81e9..f4c8d5b 100644
--- a/api/qcmetrics/services.py
+++ b/api/qcmetrics/services.py
@@ -50,8 +50,8 @@ def create_qcrecord(
     """
     Create a new QC record with all associated data.
 
-    Handles both the new explicit format (metrics with samples) and
-    the legacy ES format (sample_level_metrics dict).
+    Metrics can have numeric values (int, float) which are stored as strings
+    in the database.
     """
     # Check for duplicate record
     existing = _check_duplicate_record(session, qcrecord_create)
@@ -82,22 +82,11 @@ def create_qcrecord(
             )
             session.add(metadata_entry)
 
-    # Add metrics (new format)
+    # Add metrics
     if qcrecord_create.metrics:
         for metric_input in qcrecord_create.metrics:
             _create_metric(session, qcrecord.id, metric_input)
 
-    # Handle legacy sample_level_metrics format (ES compatibility)
-    if qcrecord_create.sample_level_metrics:
-        for sample_name, metrics_dict in qcrecord_create.sample_level_metrics.items():
-            # Convert to new format: one metric group per sample
-            metric_input = MetricInput(
-                name=f"sample_metrics_{sample_name}",
-                samples=[{"sample_name": sample_name}],
-                values=metrics_dict,
-            )
-            _create_metric(session, qcrecord.id, metric_input)
-
     # Add output files
     if qcrecord_create.output_files:
         for file_create in qcrecord_create.output_files:
diff --git a/tests/api/test_qcmetrics.py b/tests/api/test_qcmetrics.py
index c422254..1710a20 100644
--- a/tests/api/test_qcmetrics.py
+++ b/tests/api/test_qcmetrics.py
@@ -413,23 +413,32 @@ def test_duplicate_detection(client: TestClient, session: Session):
     assert id1 == id2
 
 
-def test_legacy_sample_level_metrics_format(client: TestClient, session: Session):
+def test_numeric_metric_values(client: TestClient, session: Session):
     """
-    Test backward compatibility with the legacy ES format (sample_level_metrics).
+    Test that numeric metric values (int, float) are accepted and stored as strings.
+
+    This matches the legacy ES format where values like QC_ForwardReadCount=122483575
+    were numeric rather than string.
     """
     qcrecord_data = {
-        "project_id": "P-LEGACY-001",
+        "project_id": "P-NUMERIC-001",
         "metadata": {"pipeline": "RNA-Seq"},
-        "sample_level_metrics": {
-            "Sample1": {
-                "reads": "50000000",
-                "alignment_rate": "95.5"
-            },
-            "Sample2": {
-                "reads": "45000000",
-                "alignment_rate": "93.2"
+        "metrics": [
+            {
+                "name": "sample_qc_metrics",
+                "samples": [{"sample_name": "SampleA"}],
+                "values": {
+                    "QC_ForwardReadCount": 122483575,  # int
+                    "QC_ReverseReadCount": 122483575,  # int
+                    "QC_FractionContaminatedReads": 0,  # int (zero)
+                    "QC_MeanReadLength": 150,  # int
+                    "QC_FractionReadsAligned": 0.587,  # float
+                    "QC_StrandBalance": 0.5,  # float
+                    "QC_Median5Bias": 0.395753,  # float
+                    "QC_DynamicRange": 2452.4661796537  # float with high precision
+                }
             }
-        }
+        ]
     }
 
     response = client.post(
@@ -439,15 +448,52 @@ def test_legacy_sample_level_metrics_format(client: TestClient, session: Session
     assert response.status_code == 201
 
     data = response.json()
+    assert len(data["metrics"]) == 1
+
+    metric = data["metrics"][0]
+    assert metric["name"] == "sample_qc_metrics"
+    assert len(metric["samples"]) == 1
+    assert metric["samples"][0]["sample_name"] == "SampleA"
+
+    # Values should be stored as strings
+    values_dict = {v["key"]: v["value"] for v in metric["values"]}
+    assert values_dict["QC_ForwardReadCount"] == "122483575"
+    assert values_dict["QC_FractionReadsAligned"] == "0.587"
+    assert values_dict["QC_DynamicRange"] == "2452.4661796537"
+
+
+def test_mixed_string_and_numeric_values(client: TestClient, session: Session):
+    """
+    Test that both string and numeric values can be provided in the same metric.
+    """
+    qcrecord_data = {
+        "project_id": "P-MIXED-001",
+        "metadata": {"pipeline": "RNA-Seq"},
+        "metrics": [
+            {
+                "name": "alignment_stats",
+                "samples": [{"sample_name": "Sample1"}],
+                "values": {
+                    "total_reads": 50000000,  # numeric int
+                    "alignment_rate": 97.5,  # numeric float
+                    "reference_genome": "GRCh38",  # string
+                    "status": "passed"  # string
+                }
+            }
+        ]
+    }
 
-    # Legacy format should be converted to metrics
-    assert len(data["metrics"]) == 2
+    response = client.post(
+        "/api/v1/qcmetrics?created_by=test_user",
+        json=qcrecord_data
+    )
+    assert response.status_code == 201
 
-    # Check that sample names are preserved
-    metric_sample_names = set()
-    for metric in data["metrics"]:
-        for sample in metric["samples"]:
-            metric_sample_names.add(sample["sample_name"])
+    data = response.json()
+    values_dict = {v["key"]: v["value"] for v in data["metrics"][0]["values"]}
 
-    assert "Sample1" in metric_sample_names
-    assert "Sample2" in metric_sample_names
+    # All values should be strings in the response
+    assert values_dict["total_reads"] == "50000000"
+    assert values_dict["alignment_rate"] == "97.5"
+    assert values_dict["reference_genome"] == "GRCh38"
+    assert values_dict["status"] == "passed"

From 306c5d26c3e5bf769b0c3144dbb22604d49064af Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Fri, 30 Jan 2026 14:59:36 -0500
Subject: [PATCH 12/13] store metric value type so it can be re-cast on return

---
 ...5e6_add_qcmetrics_and_filerecord_tables.py |  6 ++-
 api/qcmetrics/models.py                       |  8 +++-
 api/qcmetrics/services.py                     | 28 +++++++++++++-
 tests/api/test_qcmetrics.py                   | 37 ++++++++++++++-----
 4 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py b/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py
index 4c489f2..0389b16 100644
--- a/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py
+++ b/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py
@@ -117,13 +117,17 @@ def upgrade() -> None:
         sa.UniqueConstraint('qcrecord_id', 'name', name='uq_qcmetric_record_name')
     )
 
-    # qcmetricvalue - metric values
+    # qcmetricvalue - metric values with type preservation
     op.create_table(
         'qcmetricvalue',
         sa.Column('id', sa.Uuid(), nullable=False),
         sa.Column('qc_metric_id', sa.Uuid(), nullable=False),
         sa.Column('key', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
         sa.Column('value', sa.Text(), nullable=False),
+        sa.Column(
+            'value_type', sqlmodel.sql.sqltypes.AutoString(length=10),
+            nullable=False, server_default='str'
+        ),
         sa.ForeignKeyConstraint(['qc_metric_id'], ['qcmetric.id'], ondelete='CASCADE'),
         sa.PrimaryKeyConstraint('id'),
         sa.UniqueConstraint('qc_metric_id', 'key', name='uq_qcmetricvalue_metric_key')
diff --git a/api/qcmetrics/models.py b/api/qcmetrics/models.py
index 4f112d4..8236b5c 100644
--- a/api/qcmetrics/models.py
+++ b/api/qcmetrics/models.py
@@ -46,6 +46,9 @@ class QCMetricValue(SQLModel, table=True):
     """
     Key-value store for individual metric values within a metric group.
     Examples: reads=50000000, alignment_rate=95.5, tmb=8.5
+
+    The value_type column preserves the original Python type so values
+    can be returned in their original format (int, float, or str).
     """
     __tablename__ = "qcmetricvalue"
 
@@ -53,6 +56,7 @@ class QCMetricValue(SQLModel, table=True):
     qc_metric_id: uuid.UUID = Field(foreign_key="qcmetric.id", nullable=False)
     key: str = Field(max_length=255, nullable=False)
     value: str = Field(nullable=False)
+    value_type: str = Field(max_length=10, default="str")  # "str", "int", "float"
 
     # Relationship back to parent
     qc_metric: "QCMetric" = Relationship(back_populates="values")
@@ -194,9 +198,9 @@ class QCRecordCreate(SQLModel):
 
 
 class MetricValuePublic(SQLModel):
-    """Public representation of a metric value."""
+    """Public representation of a metric value with original type preserved."""
     key: str
-    value: str
+    value: str | int | float
 
 
 class MetricSamplePublic(SQLModel):
diff --git a/api/qcmetrics/services.py b/api/qcmetrics/services.py
index f4c8d5b..04b11fc 100644
--- a/api/qcmetrics/services.py
+++ b/api/qcmetrics/services.py
@@ -135,12 +135,24 @@ def _create_metric(
             )
             session.add(sample_assoc)
 
-    # Add metric values
+    # Add metric values with type preservation
     for key, value in metric_input.values.items():
+        # Determine the original type
+        if isinstance(value, bool):
+            # bool is subclass of int, so check first
+            value_type = "str"  # Store bools as strings
+        elif isinstance(value, int):
+            value_type = "int"
+        elif isinstance(value, float):
+            value_type = "float"
+        else:
+            value_type = "str"
+
         metric_value = QCMetricValue(
             qc_metric_id=metric.id,
             key=key,
             value=str(value),
+            value_type=value_type,
         )
         session.add(metric_value)
 
@@ -372,6 +384,15 @@ def delete_qcrecord(session: Session, qcrecord_id: str) -> dict:
     return {"status": "deleted", "id": qcrecord_id}
 
 
+def _convert_value_to_type(value: str, value_type: str) -> str | int | float:
+    """Convert a string value back to its original type."""
+    if value_type == "int":
+        return int(value)
+    elif value_type == "float":
+        return float(value)
+    return value
+
+
 def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
     """Convert a QCRecord database object to public format."""
     # Get metadata
@@ -410,7 +431,10 @@ def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
                 for s in samples
             ],
             values=[
-                MetricValuePublic(key=v.key, value=v.value)
+                MetricValuePublic(
+                    key=v.key,
+                    value=_convert_value_to_type(v.value, v.value_type)
+                )
                 for v in values
             ],
         ))
diff --git a/tests/api/test_qcmetrics.py b/tests/api/test_qcmetrics.py
index 1710a20..78496ab 100644
--- a/tests/api/test_qcmetrics.py
+++ b/tests/api/test_qcmetrics.py
@@ -415,7 +415,8 @@ def test_duplicate_detection(client: TestClient, session: Session):
 
 def test_numeric_metric_values(client: TestClient, session: Session):
     """
-    Test that numeric metric values (int, float) are accepted and stored as strings.
+    Test that numeric metric values (int, float) are accepted and returned
+    with their original types preserved.
 
     This matches the legacy ES format where values like QC_ForwardReadCount=122483575
     were numeric rather than string.
@@ -455,16 +456,28 @@ def test_numeric_metric_values(client: TestClient, session: Session):
     assert len(metric["samples"]) == 1
     assert metric["samples"][0]["sample_name"] == "SampleA"
 
-    # Values should be stored as strings
+    # Values should be returned with their original types preserved
     values_dict = {v["key"]: v["value"] for v in metric["values"]}
-    assert values_dict["QC_ForwardReadCount"] == "122483575"
-    assert values_dict["QC_FractionReadsAligned"] == "0.587"
-    assert values_dict["QC_DynamicRange"] == "2452.4661796537"
+
+    # Integer values
+    assert values_dict["QC_ForwardReadCount"] == 122483575
+    assert isinstance(values_dict["QC_ForwardReadCount"], int)
+    assert values_dict["QC_FractionContaminatedReads"] == 0
+    assert isinstance(values_dict["QC_FractionContaminatedReads"], int)
+    assert values_dict["QC_MeanReadLength"] == 150
+    assert isinstance(values_dict["QC_MeanReadLength"], int)
+
+    # Float values
+    assert values_dict["QC_FractionReadsAligned"] == 0.587
+    assert isinstance(values_dict["QC_FractionReadsAligned"], float)
+    assert values_dict["QC_DynamicRange"] == 2452.4661796537
+    assert isinstance(values_dict["QC_DynamicRange"], float)
 
 
 def test_mixed_string_and_numeric_values(client: TestClient, session: Session):
     """
-    Test that both string and numeric values can be provided in the same metric.
+    Test that both string and numeric values can be provided in the same metric,
+    and each is returned with its original type.
     """
     qcrecord_data = {
         "project_id": "P-MIXED-001",
@@ -492,8 +505,14 @@ def test_mixed_string_and_numeric_values(client: TestClient, session: Session):
     data = response.json()
     values_dict = {v["key"]: v["value"] for v in data["metrics"][0]["values"]}
 
-    # All values should be strings in the response
-    assert values_dict["total_reads"] == "50000000"
-    assert values_dict["alignment_rate"] == "97.5"
+    # Numeric values returned with original types
+    assert values_dict["total_reads"] == 50000000
+    assert isinstance(values_dict["total_reads"], int)
+    assert values_dict["alignment_rate"] == 97.5
+    assert isinstance(values_dict["alignment_rate"], float)
+
+    # String values remain as strings
     assert values_dict["reference_genome"] == "GRCh38"
+    assert isinstance(values_dict["reference_genome"], str)
     assert values_dict["status"] == "passed"
+    assert isinstance(values_dict["status"], str)

From 75624323a79c74ae0d3eea224d6383c94c46a381 Mon Sep 17 00:00:00 2001
From: vasques1 <Steven.VasquezGrinnell@bms.com>
Date: Fri, 30 Jan 2026 16:00:22 -0500
Subject: [PATCH 13/13] store both string and numeric value representations
 with index to enable ranged queries for LLM

---
 ...5e6_add_qcmetrics_and_filerecord_tables.py | 11 +++++--
 api/qcmetrics/models.py                       | 10 ++++--
 api/qcmetrics/services.py                     | 33 ++++++++++++-------
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py b/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py
index 0389b16..3d7a8cc 100644
--- a/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py
+++ b/alembic/versions/f1a2b3c4d5e6_add_qcmetrics_and_filerecord_tables.py
@@ -117,13 +117,14 @@ def upgrade() -> None:
         sa.UniqueConstraint('qcrecord_id', 'name', name='uq_qcmetric_record_name')
     )
 
-    # qcmetricvalue - metric values with type preservation
+    # qcmetricvalue - metric values with dual storage for string/numeric queries
     op.create_table(
         'qcmetricvalue',
         sa.Column('id', sa.Uuid(), nullable=False),
         sa.Column('qc_metric_id', sa.Uuid(), nullable=False),
         sa.Column('key', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
-        sa.Column('value', sa.Text(), nullable=False),
+        sa.Column('value_string', sa.Text(), nullable=False),
+        sa.Column('value_numeric', sa.Float(), nullable=True),
         sa.Column(
             'value_type', sqlmodel.sql.sqltypes.AutoString(length=10),
             nullable=False, server_default='str'
@@ -132,6 +133,11 @@ def upgrade() -> None:
         sa.PrimaryKeyConstraint('id'),
         sa.UniqueConstraint('qc_metric_id', 'key', name='uq_qcmetricvalue_metric_key')
     )
+    # Index on key + value_numeric for efficient numeric range queries
+    op.create_index(
+        'ix_qcmetricvalue_key_numeric', 'qcmetricvalue',
+        ['key', 'value_numeric']
+    )
 
     # qcmetricsample - sample associations for metrics
     op.create_table(
@@ -151,6 +157,7 @@ def downgrade() -> None:
 
     # Drop QCRecord tables (in reverse order of creation)
     op.drop_table('qcmetricsample')
+    op.drop_index('ix_qcmetricvalue_key_numeric', table_name='qcmetricvalue')
     op.drop_table('qcmetricvalue')
     op.drop_table('qcmetric')
     op.drop_table('qcrecordmetadata')
diff --git a/api/qcmetrics/models.py b/api/qcmetrics/models.py
index 8236b5c..de81b19 100644
--- a/api/qcmetrics/models.py
+++ b/api/qcmetrics/models.py
@@ -47,15 +47,19 @@ class QCMetricValue(SQLModel, table=True):
     Key-value store for individual metric values within a metric group.
     Examples: reads=50000000, alignment_rate=95.5, tmb=8.5
 
-    The value_type column preserves the original Python type so values
-    can be returned in their original format (int, float, or str).
+    Stores values in two formats:
+    - value_string: Always populated, used for string matching and display
+    - value_numeric: Populated only for int/float types, enables numeric queries
+      (greater than, less than, range, aggregations)
+    - value_type: Preserves original Python type ("str", "int", "float")
     """
     __tablename__ = "qcmetricvalue"
 
     id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
     qc_metric_id: uuid.UUID = Field(foreign_key="qcmetric.id", nullable=False)
     key: str = Field(max_length=255, nullable=False)
-    value: str = Field(nullable=False)
+    value_string: str = Field(nullable=False)
+    value_numeric: float | None = Field(default=None, nullable=True)  # For numeric queries
     value_type: str = Field(max_length=10, default="str")  # "str", "int", "float"
 
     # Relationship back to parent
diff --git a/api/qcmetrics/services.py b/api/qcmetrics/services.py
index 04b11fc..0115918 100644
--- a/api/qcmetrics/services.py
+++ b/api/qcmetrics/services.py
@@ -135,23 +135,28 @@ def _create_metric(
             )
             session.add(sample_assoc)
 
-    # Add metric values with type preservation
+    # Add metric values with type preservation and dual storage
     for key, value in metric_input.values.items():
-        # Determine the original type
+        # Determine the original type and numeric value
         if isinstance(value, bool):
             # bool is subclass of int, so check first
-            value_type = "str"  # Store bools as strings
+            value_type = "str"
+            value_numeric = None
         elif isinstance(value, int):
             value_type = "int"
+            value_numeric = float(value)  # Store as float for consistent numeric ops
         elif isinstance(value, float):
             value_type = "float"
+            value_numeric = value
         else:
             value_type = "str"
+            value_numeric = None
 
         metric_value = QCMetricValue(
             qc_metric_id=metric.id,
             key=key,
-            value=str(value),
+            value_string=str(value),
+            value_numeric=value_numeric,
             value_type=value_type,
         )
         session.add(metric_value)
@@ -384,13 +389,15 @@ def delete_qcrecord(session: Session, qcrecord_id: str) -> dict:
     return {"status": "deleted", "id": qcrecord_id}
 
 
-def _convert_value_to_type(value: str, value_type: str) -> str | int | float:
-    """Convert a string value back to its original type."""
-    if value_type == "int":
-        return int(value)
-    elif value_type == "float":
-        return float(value)
-    return value
+def _convert_value_to_type(
+    value_string: str, value_numeric: float | None, value_type: str
+) -> str | int | float:
+    """Convert stored values back to their original type."""
+    if value_type == "int" and value_numeric is not None:
+        return int(value_numeric)
+    elif value_type == "float" and value_numeric is not None:
+        return value_numeric
+    return value_string
 
 
 def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
@@ -433,7 +440,9 @@ def _qcrecord_to_public(session: Session, record: QCRecord) -> QCRecordPublic:
             values=[
                 MetricValuePublic(
                     key=v.key,
-                    value=_convert_value_to_type(v.value, v.value_type)
+                    value=_convert_value_to_type(
+                        v.value_string, v.value_numeric, v.value_type
+                    )
                 )
                 for v in values
             ],