From 00f9e97aa60e48a049499141110f6ab6da756ba8 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sat, 20 Sep 2025 09:43:45 -0400
Subject: [PATCH 01/35] Add files architecture documentation

---
 files_api_architecture.md        | 292 +++++++++++++++++++++
 files_api_design.md              | 334 ++++++++++++++++++++++++
 files_api_implementation_plan.md | 428 +++++++++++++++++++++++++++++++
 3 files changed, 1054 insertions(+)
 create mode 100644 files_api_architecture.md
 create mode 100644 files_api_design.md
 create mode 100644 files_api_implementation_plan.md

diff --git a/files_api_architecture.md b/files_api_architecture.md
new file mode 100644
index 0000000..d457350
--- /dev/null
+++ b/files_api_architecture.md
@@ -0,0 +1,292 @@
+# Files API Architecture Diagram
+
+## System Architecture Overview
+
+```mermaid
+graph TB
+    subgraph "Client Layer"
+        WEB[Web Frontend]
+        API_CLIENT[API Clients]
+        CLI[CLI Tools]
+    end
+
+    subgraph "API Layer"
+        ROUTER[FastAPI Router]
+        AUTH[Authentication]
+        VALID[Validation]
+    end
+
+    subgraph "Service Layer"
+        FILE_SVC[File Service]
+        STORAGE_SVC[Storage Service]
+        METADATA_SVC[Metadata Service]
+    end
+
+    subgraph "Storage Backends"
+        LOCAL[Local Storage]
+        S3[AWS S3]
+        AZURE[Azure Blob]
+        GCS[Google Cloud]
+    end
+
+    subgraph "Database Layer"
+        FILE_TBL[(File Table)]
+        PROJECT_TBL[(Project Table)]
+        RUN_TBL[(Run Table)]
+        PERM_TBL[(Permissions Table)]
+    end
+
+    subgraph "Search & Index"
+        OPENSEARCH[OpenSearch]
+        SEARCH_IDX[File Index]
+    end
+
+    WEB --> ROUTER
+    API_CLIENT --> ROUTER
+    CLI --> ROUTER
+
+    ROUTER --> AUTH
+    AUTH --> VALID
+    VALID --> FILE_SVC
+
+    FILE_SVC --> STORAGE_SVC
+    FILE_SVC --> METADATA_SVC
+    FILE_SVC --> FILE_TBL
+
+    STORAGE_SVC --> LOCAL
+    STORAGE_SVC --> S3
+    STORAGE_SVC --> AZURE
+    STORAGE_SVC --> GCS
+
+    FILE_TBL -.-> PROJECT_TBL
+    FILE_TBL -.-> RUN_TBL
+    FILE_TBL --> PERM_TBL
+
+    METADATA_SVC --> OPENSEARCH
+    OPENSEARCH --> SEARCH_IDX
+```
+
+## Data Model Relationships
+
+```mermaid
+erDiagram
+    PROJECT {
+        uuid id PK
+        string project_id UK
+        string name
+    }
+
+    SEQUENCING_RUN {
+        uuid id PK
+        string barcode UK
+        date run_date
+        string machine_id
+        string status
+    }
+
+    FILE {
+        uuid id PK
+        string file_id UK
+        string filename
+        string original_filename
+        string file_path
+        int file_size
+        string mime_type
+        string checksum
+        string description
+        enum file_type
+        datetime upload_date
+        string created_by
+        enum entity_type
+        string entity_id FK
+        enum storage_backend
+        boolean is_public
+        boolean is_archived
+    }
+
+    FILE_PERMISSION {
+        uuid id PK
+        uuid file_id FK
+        string user_id
+        string group_id
+        enum permission_type
+        datetime granted_date
+    }
+
+    FILE_VERSION {
+        uuid id PK
+        uuid file_id FK
+        int version_number
+        string file_path
+        int file_size
+        string checksum
+        datetime created_date
+        boolean is_current
+    }
+
+    PROJECT ||--o{ FILE : "has files"
+    SEQUENCING_RUN ||--o{ FILE : "has files"
+    FILE ||--o{ FILE_PERMISSION : "has permissions"
+    FILE ||--o{ FILE_VERSION : "has versions"
+```
+
+## API Endpoint Structure
+
+```mermaid
+graph LR
+    subgraph "Generic File Operations"
+        A[GET /files] --> A1[List all files]
+        B[POST /files] --> B1[Upload file]
+        C[GET /files/{id}] --> C1[Get file metadata]
+        D[PUT /files/{id}] --> D1[Update metadata]
+        E[DELETE /files/{id}] --> E1[Delete file]
+        F[GET /files/{id}/download] --> F1[Download file]
+    end
+
+    subgraph "Project File Operations"
+        G[GET /projects/{id}/files] --> G1[List project files]
+        H[POST /projects/{id}/files] --> H1[Upload to project]
+        I[GET /projects/{id}/files/{file_id}] --> I1[Get project file]
+    end
+
+    subgraph "Run File Operations"
+        J[GET /runs/{barcode}/files] --> J1[List run files]
+        K[POST /runs/{barcode}/files] --> K1[Upload to run]
+        L[GET /runs/{barcode}/files/{file_id}] --> L1[Get run file]
+    end
+
+    subgraph "Bulk Operations"
+        M[POST /files/bulk-upload] --> M1[Upload multiple]
+        N[DELETE /files/bulk-delete] --> N1[Delete multiple]
+    end
+```
+
+## File Upload Flow
+
+```mermaid
+sequenceDiagram
+    participant Client
+    participant API
+    participant FileService
+    participant StorageService
+    participant Database
+    participant Storage
+
+    Client->>API: POST /projects/{id}/files
+    API->>API: Validate request
+    API->>FileService: upload_file()
+    FileService->>FileService: Generate file_id
+    FileService->>StorageService: store_file()
+    StorageService->>Storage: Upload file data
+    Storage-->>StorageService: Storage URI
+    StorageService-->>FileService: File path
+    FileService->>Database: Save file metadata
+    Database-->>FileService: File record
+    FileService-->>API: File metadata
+    API-->>Client: Upload response
+```
+
+## File Download Flow
+
+```mermaid
+sequenceDiagram
+    participant Client
+    participant API
+    participant FileService
+    participant StorageService
+    participant Database
+    participant Storage
+
+    Client->>API: GET /files/{id}/download
+    API->>API: Check permissions
+    API->>FileService: get_download_url()
+    FileService->>Database: Get file metadata
+    Database-->>FileService: File record
+    FileService->>StorageService: generate_download_url()
+    StorageService->>Storage: Create signed URL
+    Storage-->>StorageService: Signed URL
+    StorageService-->>FileService: Download URL
+    FileService-->>API: Download URL
+    API-->>Client: Redirect or URL response
+```
+
+## Storage Strategy
+
+```mermaid
+graph TB
+    subgraph "File Organization"
+        ROOT[Storage Root]
+        ROOT --> PROJECTS[/projects/]
+        ROOT --> RUNS[/runs/]
+        
+        PROJECTS --> PROJ_ID[/{project_id}/]
+        RUNS --> RUN_ID[/{run_barcode}/]
+        
+        PROJ_ID --> PROJ_TYPE[/{file_type}/]
+        RUN_ID --> RUN_TYPE[/{file_type}/]
+        
+        PROJ_TYPE --> PROJ_DATE[/{year}/{month}/]
+        RUN_TYPE --> RUN_DATE[/{year}/{month}/]
+        
+        PROJ_DATE --> PROJ_FILE[/{file_id}_{filename}]
+        RUN_DATE --> RUN_FILE[/{file_id}_{filename}]
+    end
+
+    subgraph "Storage Backends"
+        LOCAL_FS[Local Filesystem]
+        AWS_S3[AWS S3]
+        AZURE_BLOB[Azure Blob Storage]
+        GCP_STORAGE[Google Cloud Storage]
+    end
+
+    PROJ_FILE -.-> LOCAL_FS
+    PROJ_FILE -.-> AWS_S3
+    PROJ_FILE -.-> AZURE_BLOB
+    PROJ_FILE -.-> GCP_STORAGE
+
+    RUN_FILE -.-> LOCAL_FS
+    RUN_FILE -.-> AWS_S3
+    RUN_FILE -.-> AZURE_BLOB
+    RUN_FILE -.-> GCP_STORAGE
+```
+
+## Security and Access Control
+
+```mermaid
+graph TB
+    subgraph "Authentication"
+        USER[User Request]
+        AUTH_CHECK[Authentication Check]
+        TOKEN[JWT Token Validation]
+    end
+
+    subgraph "Authorization"
+        PERM_CHECK[Permission Check]
+        ENTITY_ACCESS[Entity Access Check]
+        FILE_ACCESS[File Access Check]
+    end
+
+    subgraph "File Operations"
+        READ_OP[Read Operation]
+        WRITE_OP[Write Operation]
+        DELETE_OP[Delete Operation]
+    end
+
+    USER --> AUTH_CHECK
+    AUTH_CHECK --> TOKEN
+    TOKEN --> PERM_CHECK
+    PERM_CHECK --> ENTITY_ACCESS
+    ENTITY_ACCESS --> FILE_ACCESS
+    FILE_ACCESS --> READ_OP
+    FILE_ACCESS --> WRITE_OP
+    FILE_ACCESS --> DELETE_OP
+```
+
+This architecture provides:
+
+1. **Scalable Design**: Supports multiple storage backends and can handle large file volumes
+2. **Flexible Associations**: Files can be linked to any entity type (projects, runs, future entities)
+3. **Rich Metadata**: Comprehensive file information and categorization
+4. **Security**: Multi-layered permission system
+5. **Performance**: Efficient querying and caching strategies
+6. **Extensibility**: Easy to add new file types, storage backends, and features
\ No newline at end of file
diff --git a/files_api_design.md b/files_api_design.md
new file mode 100644
index 0000000..c594eb1
--- /dev/null
+++ b/files_api_design.md
@@ -0,0 +1,334 @@
+# Files API Design for NGS360
+
+## Overview
+Design a unified Files API that supports file operations (list, fetch, upload, delete) for both sequencing runs and projects, with flexible storage backends and comprehensive metadata tracking.
+
+## Current System Analysis
+
+### Existing Models
+- **Projects**: Have `project_id` (string), `name`, and `attributes`
+- **Runs**: Have `id` (UUID), `barcode` (computed), `run_folder_uri`, and various metadata
+- **Files API**: Currently has skeleton routes but missing models/services
+
+## Proposed Architecture
+
+### 1. Data Model Design
+
+#### Core File Model
+```python
+class File(SQLModel, table=True):
+    """Core file entity that can be associated with runs or projects"""
+    
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    file_id: str = Field(unique=True)  # Human-readable identifier
+    filename: str = Field(max_length=255)
+    original_filename: str = Field(max_length=255)
+    file_path: str = Field(max_length=1024)  # Storage path/URI
+    file_size: int | None = None  # Size in bytes
+    mime_type: str | None = Field(max_length=100)
+    checksum: str | None = Field(max_length=64)  # SHA-256 hash
+    
+    # Metadata
+    description: str | None = Field(max_length=1024)
+    file_type: FileType = Field(default=FileType.OTHER)
+    upload_date: datetime = Field(default_factory=datetime.utcnow)
+    created_by: str | None = Field(max_length=100)  # User identifier
+    
+    # Polymorphic associations
+    entity_type: EntityType  # "project" or "run"
+    entity_id: str  # project_id or run barcode
+    
+    # Storage metadata
+    storage_backend: StorageBackend = Field(default=StorageBackend.LOCAL)
+    is_public: bool = Field(default=False)
+    is_archived: bool = Field(default=False)
+```
+
+#### Enums
+```python
+class FileType(str, Enum):
+    """File type categories"""
+    FASTQ = "fastq"
+    BAM = "bam"
+    VCF = "vcf"
+    SAMPLESHEET = "samplesheet"
+    METRICS = "metrics"
+    REPORT = "report"
+    LOG = "log"
+    IMAGE = "image"
+    DOCUMENT = "document"
+    OTHER = "other"
+
+class EntityType(str, Enum):
+    """Entity types that can have files"""
+    PROJECT = "project"
+    RUN = "run"
+
+class StorageBackend(str, Enum):
+    """Storage backend types"""
+    LOCAL = "local"
+    S3 = "s3"
+    AZURE = "azure"
+    GCS = "gcs"
+```
+
+### 2. API Endpoint Design
+
+#### Unified Files Endpoints
+```
+# Generic file operations
+GET    /api/v1/files                     # List all files (with filters)
+POST   /api/v1/files                     # Upload file (requires entity association)
+GET    /api/v1/files/{file_id}           # Get file metadata
+PUT    /api/v1/files/{file_id}           # Update file metadata
+DELETE /api/v1/files/{file_id}           # Delete file
+GET    /api/v1/files/{file_id}/download  # Download file content
+
+# Entity-specific file operations
+GET    /api/v1/projects/{project_id}/files           # List project files
+POST   /api/v1/projects/{project_id}/files           # Upload file to project
+GET    /api/v1/projects/{project_id}/files/{file_id} # Get project file
+
+GET    /api/v1/runs/{run_barcode}/files               # List run files
+POST   /api/v1/runs/{run_barcode}/files               # Upload file to run
+GET    /api/v1/runs/{run_barcode}/files/{file_id}     # Get run file
+
+# Bulk operations
+POST   /api/v1/files/bulk-upload         # Upload multiple files
+DELETE /api/v1/files/bulk-delete         # Delete multiple files
+```
+
+### 3. Request/Response Models
+
+#### File Upload Request
+```python
+class FileUploadRequest(SQLModel):
+    """Request model for file upload"""
+    filename: str
+    description: str | None = None
+    file_type: FileType = FileType.OTHER
+    is_public: bool = False
+    tags: List[str] | None = None
+
+class FileUploadResponse(SQLModel):
+    """Response model for file upload"""
+    file_id: str
+    filename: str
+    upload_url: str | None = None  # For direct upload scenarios
+    file_size: int | None = None
+    checksum: str | None = None
+```
+
+#### File Listing
+```python
+class FilePublic(SQLModel):
+    """Public file representation"""
+    file_id: str
+    filename: str
+    original_filename: str
+    file_size: int | None
+    mime_type: str | None
+    description: str | None
+    file_type: FileType
+    upload_date: datetime
+    created_by: str | None
+    entity_type: EntityType
+    entity_id: str
+    is_public: bool
+    download_url: str | None = None
+
+class FilesPublic(SQLModel):
+    """Paginated file listing"""
+    data: List[FilePublic]
+    total_items: int
+    total_pages: int
+    current_page: int
+    per_page: int
+    has_next: bool
+    has_prev: bool
+    filters: Dict[str, Any] | None = None
+```
+
+### 4. Storage Strategy
+
+#### Multi-Backend Support
+```python
+class StorageService:
+    """Abstract storage service interface"""
+    
+    async def upload_file(self, file_data: bytes, file_path: str) -> str:
+        """Upload file and return storage URI"""
+        pass
+    
+    async def download_file(self, file_path: str) -> bytes:
+        """Download file content"""
+        pass
+    
+    async def delete_file(self, file_path: str) -> bool:
+        """Delete file from storage"""
+        pass
+    
+    async def get_download_url(self, file_path: str, expires_in: int = 3600) -> str:
+        """Generate temporary download URL"""
+        pass
+
+class LocalStorageService(StorageService):
+    """Local filesystem storage"""
+    pass
+
+class S3StorageService(StorageService):
+    """AWS S3 storage"""
+    pass
+```
+
+#### File Path Strategy
+```
+Storage Structure:
+/{storage_root}/
+  /projects/
+    /{project_id}/
+      /{file_type}/
+        /{year}/{month}/
+          /{file_id}_{original_filename}
+  /runs/
+    /{run_barcode}/
+      /{file_type}/
+        /{year}/{month}/
+          /{file_id}_{original_filename}
+```
+
+### 5. Advanced Features
+
+#### File Filtering and Search
+```python
+class FileFilters(SQLModel):
+    """File filtering options"""
+    entity_type: EntityType | None = None
+    entity_id: str | None = None
+    file_type: FileType | None = None
+    mime_type: str | None = None
+    created_by: str | None = None
+    date_from: datetime | None = None
+    date_to: datetime | None = None
+    is_public: bool | None = None
+    is_archived: bool | None = None
+    tags: List[str] | None = None
+    search_query: str | None = None  # Search in filename/description
+```
+
+#### File Versioning (Future Enhancement)
+```python
+class FileVersion(SQLModel, table=True):
+    """File version tracking"""
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    file_id: uuid.UUID = Field(foreign_key="file.id")
+    version_number: int
+    file_path: str
+    file_size: int
+    checksum: str
+    created_date: datetime = Field(default_factory=datetime.utcnow)
+    is_current: bool = Field(default=True)
+```
+
+### 6. Security and Access Control
+
+#### File Access Permissions
+```python
+class FilePermission(SQLModel, table=True):
+    """File-level permissions"""
+    id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
+    file_id: uuid.UUID = Field(foreign_key="file.id")
+    user_id: str | None = None
+    group_id: str | None = None
+    permission_type: PermissionType
+    granted_date: datetime = Field(default_factory=datetime.utcnow)
+
+class PermissionType(str, Enum):
+    READ = "read"
+    WRITE = "write"
+    DELETE = "delete"
+    ADMIN = "admin"
+```
+
+### 7. Integration Points
+
+#### With Existing Models
+- **Projects**: Add relationship to files via `entity_id` = `project_id`
+- **Runs**: Add relationship to files via `entity_id` = `run.barcode`
+- **Search**: Include files in OpenSearch indexing for full-text search
+
+#### Database Relationships
+```python
+# In Project model
+files: List["File"] = Relationship(
+    sa_relationship_kwargs={
+        "primaryjoin": "and_(Project.project_id == File.entity_id, File.entity_type == 'project')",
+        "foreign_keys": "[File.entity_id]"
+    }
+)
+
+# In SequencingRun model  
+files: List["File"] = Relationship(
+    sa_relationship_kwargs={
+        "primaryjoin": "and_(SequencingRun.barcode == File.entity_id, File.entity_type == 'run')",
+        "foreign_keys": "[File.entity_id]"
+    }
+)
+```
+
+## Implementation Plan
+
+### Phase 1: Core Infrastructure
+1. Create file models and enums
+2. Implement basic storage service (local filesystem)
+3. Create core CRUD operations
+4. Add database migration
+
+### Phase 2: API Endpoints
+1. Implement generic file endpoints
+2. Add entity-specific endpoints
+3. Create file upload/download functionality
+4. Add comprehensive error handling
+
+### Phase 3: Advanced Features
+1. Add file filtering and search
+2. Implement multiple storage backends
+3. Add file metadata extraction
+4. Create bulk operations
+
+### Phase 4: Integration & Security
+1. Integrate with existing project/run models
+2. Add authentication and authorization
+3. Implement file permissions
+4. Add audit logging
+
+## Benefits
+
+1. **Unified Interface**: Single API for all file operations across projects and runs
+2. **Flexible Storage**: Support for multiple storage backends (local, S3, etc.)
+3. **Rich Metadata**: Comprehensive file metadata and categorization
+4. **Scalable**: Designed to handle large numbers of files efficiently
+5. **Secure**: Built-in permission system and access controls
+6. **Searchable**: Integration with existing search infrastructure
+7. **Extensible**: Easy to add new file types and storage backends
+
+## Example Usage
+
+```python
+# Upload a file to a project
+POST /api/v1/projects/PROJ001/files
+{
+    "filename": "analysis_results.pdf",
+    "description": "Final analysis report",
+    "file_type": "report",
+    "is_public": false
+}
+
+# List all FASTQ files for a run
+GET /api/v1/runs/190110_MACHINE123_0001_FLOWCELL123/files?file_type=fastq
+
+# Search for files across all entities
+GET /api/v1/files?search_query=analysis&file_type=report&date_from=2024-01-01
+```
+
+This design provides a robust, scalable, and flexible file management system that integrates seamlessly with the existing NGS360 architecture.
\ No newline at end of file
diff --git a/files_api_implementation_plan.md b/files_api_implementation_plan.md
new file mode 100644
index 0000000..670cbcf
--- /dev/null
+++ b/files_api_implementation_plan.md
@@ -0,0 +1,428 @@
+# Files API Implementation Roadmap
+
+## Overview
+This document outlines the step-by-step implementation plan for the Files API, organized into phases with specific deliverables and acceptance criteria.
+
+## Phase 1: Foundation (Week 1-2)
+
+### 1.1 Database Models and Migrations
+**Priority: Critical**
+
+#### Tasks:
+- [ ] Create `api/files/models.py` with core file models
+- [ ] Create database migration for file tables
+- [ ] Add foreign key relationships to existing models
+- [ ] Create enum definitions for file types and storage backends
+
+#### Deliverables:
+```python
+# File models with proper relationships
+class File(SQLModel, table=True)
+class FileType(str, Enum)
+class EntityType(str, Enum)
+class StorageBackend(str, Enum)
+
+# Migration script
+# alembic/versions/xxx_create_file_tables.py
+```
+
+#### Acceptance Criteria:
+- [ ] Database migration runs successfully
+- [ ] All model relationships work correctly
+- [ ] Foreign key constraints are properly defined
+- [ ] Enum values are correctly stored and retrieved
+
+### 1.2 Basic Storage Service
+**Priority: Critical**
+
+#### Tasks:
+- [ ] Create `api/files/storage.py` with storage interface
+- [ ] Implement `LocalStorageService` for filesystem storage
+- [ ] Create file path generation utilities
+- [ ] Add basic file operations (upload, download, delete)
+
+#### Deliverables:
+```python
+# Storage service interface and implementation
+class StorageService(ABC)
+class LocalStorageService(StorageService)
+
+# Utility functions
+def generate_file_path(entity_type, entity_id, file_type, filename)
+def ensure_directory_exists(path)
+```
+
+#### Acceptance Criteria:
+- [ ] Files can be uploaded to local storage
+- [ ] Files can be downloaded from local storage
+- [ ] File paths are generated consistently
+- [ ] Directory structure is created automatically
+
+### 1.3 Core Service Layer
+**Priority: Critical**
+
+#### Tasks:
+- [ ] Create `api/files/services.py` with CRUD operations
+- [ ] Implement file metadata management
+- [ ] Add file validation and security checks
+- [ ] Create file ID generation logic
+
+#### Deliverables:
+```python
+# Core file operations
+def create_file(session, file_data, entity_type, entity_id)
+def get_file(session, file_id)
+def update_file(session, file_id, updates)
+def delete_file(session, file_id)
+def list_files(session, filters, pagination)
+```
+
+#### Acceptance Criteria:
+- [ ] All CRUD operations work correctly
+- [ ] File metadata is properly validated
+- [ ] Unique file IDs are generated
+- [ ] Database transactions are handled properly
+
+## Phase 2: API Endpoints (Week 3-4)
+
+### 2.1 Generic File Endpoints
+**Priority: High**
+
+#### Tasks:
+- [ ] Update `api/files/routes.py` with complete endpoint set
+- [ ] Implement file upload with multipart form data
+- [ ] Add file download with proper headers
+- [ ] Create file listing with filtering and pagination
+
+#### Deliverables:
+```python
+# Complete API endpoints
+@router.post("/files")                    # Upload file
+@router.get("/files")                     # List files
+@router.get("/files/{file_id}")           # Get file metadata
+@router.put("/files/{file_id}")           # Update metadata
+@router.delete("/files/{file_id}")        # Delete file
+@router.get("/files/{file_id}/download")  # Download file
+```
+
+#### Acceptance Criteria:
+- [ ] File upload works with proper validation
+- [ ] File download returns correct content and headers
+- [ ] File listing supports filtering and pagination
+- [ ] All endpoints return proper HTTP status codes
+- [ ] Error handling is comprehensive
+
+### 2.2 Entity-Specific Endpoints
+**Priority: High**
+
+#### Tasks:
+- [ ] Add project-specific file endpoints
+- [ ] Add run-specific file endpoints
+- [ ] Implement entity validation (project/run exists)
+- [ ] Add entity-based access control
+
+#### Deliverables:
+```python
+# Project file endpoints
+@router.get("/projects/{project_id}/files")
+@router.post("/projects/{project_id}/files")
+@router.get("/projects/{project_id}/files/{file_id}")
+
+# Run file endpoints
+@router.get("/runs/{run_barcode}/files")
+@router.post("/runs/{run_barcode}/files")
+@router.get("/runs/{run_barcode}/files/{file_id}")
+```
+
+#### Acceptance Criteria:
+- [ ] Entity validation works correctly
+- [ ] Files are properly associated with entities
+- [ ] Entity-specific file listing works
+- [ ] Access control prevents unauthorized access
+
+### 2.3 Request/Response Models
+**Priority: High**
+
+#### Tasks:
+- [ ] Create comprehensive Pydantic models
+- [ ] Add proper validation rules
+- [ ] Implement response serialization
+- [ ] Add API documentation
+
+#### Deliverables:
+```python
+# Request/Response models
+class FileUploadRequest(SQLModel)
+class FileUploadResponse(SQLModel)
+class FilePublic(SQLModel)
+class FilesPublic(SQLModel)
+class FileFilters(SQLModel)
+```
+
+#### Acceptance Criteria:
+- [ ] All models have proper validation
+- [ ] API documentation is auto-generated
+- [ ] Response serialization works correctly
+- [ ] Error responses are properly formatted
+
+## Phase 3: Advanced Features (Week 5-6)
+
+### 3.1 File Search and Filtering
+**Priority: Medium**
+
+#### Tasks:
+- [ ] Implement advanced file filtering
+- [ ] Add full-text search capabilities
+- [ ] Integrate with OpenSearch
+- [ ] Add file indexing for search
+
+#### Deliverables:
+```python
+# Advanced filtering
+class FileFilters(SQLModel)
+def search_files(session, search_query, filters)
+def index_file_for_search(file_metadata)
+```
+
+#### Acceptance Criteria:
+- [ ] Complex filtering works correctly
+- [ ] Full-text search returns relevant results
+- [ ] Search performance is acceptable
+- [ ] Search index stays synchronized
+
+### 3.2 Multiple Storage Backends
+**Priority: Medium**
+
+#### Tasks:
+- [ ] Implement S3StorageService
+- [ ] Add storage backend configuration
+- [ ] Create storage backend selection logic
+- [ ] Add storage backend migration tools
+
+#### Deliverables:
+```python
+# Additional storage backends
+class S3StorageService(StorageService)
+class AzureStorageService(StorageService)
+class StorageBackendFactory
+
+# Configuration
+STORAGE_BACKENDS = {
+    "local": LocalStorageService,
+    "s3": S3StorageService,
+    "azure": AzureStorageService
+}
+```
+
+#### Acceptance Criteria:
+- [ ] Multiple storage backends work correctly
+- [ ] Storage backend can be configured per file
+- [ ] File migration between backends is possible
+- [ ] All backends support the same interface
+
+### 3.3 Bulk Operations
+**Priority: Medium**
+
+#### Tasks:
+- [ ] Implement bulk file upload
+- [ ] Add bulk file deletion
+- [ ] Create batch processing utilities
+- [ ] Add progress tracking for bulk operations
+
+#### Deliverables:
+```python
+# Bulk operations
+@router.post("/files/bulk-upload")
+@router.delete("/files/bulk-delete")
+def process_bulk_upload(files, entity_type, entity_id)
+def process_bulk_delete(file_ids)
+```
+
+#### Acceptance Criteria:
+- [ ] Bulk upload handles multiple files efficiently
+- [ ] Bulk delete is transactional
+- [ ] Progress tracking works correctly
+- [ ] Error handling for partial failures
+
+## Phase 4: Integration & Security (Week 7-8)
+
+### 4.1 Authentication and Authorization
+**Priority: High**
+
+#### Tasks:
+- [ ] Integrate with existing auth system
+- [ ] Implement file-level permissions
+- [ ] Add user/group access control
+- [ ] Create permission management endpoints
+
+#### Deliverables:
+```python
+# Permission models and services
+class FilePermission(SQLModel, table=True)
+class PermissionType(str, Enum)
+def check_file_permission(user, file_id, permission_type)
+def grant_file_permission(file_id, user_id, permission_type)
+```
+
+#### Acceptance Criteria:
+- [ ] Authentication is required for all operations
+- [ ] File permissions are enforced correctly
+- [ ] Permission inheritance works properly
+- [ ] Admin users can manage all files
+
+### 4.2 Integration with Existing Models
+**Priority: High**
+
+#### Tasks:
+- [ ] Add file relationships to Project model
+- [ ] Add file relationships to SequencingRun model
+- [ ] Update existing API responses to include file counts
+- [ ] Create file association utilities
+
+#### Deliverables:
+```python
+# Model updates
+# In api/project/models.py
+files: List["File"] = Relationship(...)
+
+# In api/runs/models.py  
+files: List["File"] = Relationship(...)
+
+# Updated response models
+class ProjectPublic(SQLModel):
+    file_count: int | None = None
+
+class SequencingRunPublic(SQLModel):
+    file_count: int | None = None
+```
+
+#### Acceptance Criteria:
+- [ ] File relationships work correctly
+- [ ] Existing APIs show file information
+- [ ] File counts are accurate
+- [ ] No breaking changes to existing APIs
+
+### 4.3 Audit Logging and Monitoring
+**Priority: Medium**
+
+#### Tasks:
+- [ ] Add audit logging for file operations
+- [ ] Create file access logs
+- [ ] Add monitoring metrics
+- [ ] Implement file usage analytics
+
+#### Deliverables:
+```python
+# Audit logging
+class FileAuditLog(SQLModel, table=True)
+def log_file_operation(user, file_id, operation, details)
+
+# Monitoring metrics
+def track_file_upload(file_size, file_type)
+def track_file_download(file_id, user_id)
+```
+
+#### Acceptance Criteria:
+- [ ] All file operations are logged
+- [ ] Audit logs are searchable
+- [ ] Monitoring metrics are collected
+- [ ] Usage analytics are available
+
+## Phase 5: Testing & Documentation (Week 9-10)
+
+### 5.1 Comprehensive Testing
+**Priority: Critical**
+
+#### Tasks:
+- [ ] Create unit tests for all services
+- [ ] Add integration tests for API endpoints
+- [ ] Create performance tests for file operations
+- [ ] Add security tests for access control
+
+#### Deliverables:
+```python
+# Test files
+tests/api/test_files.py
+tests/services/test_file_service.py
+tests/storage/test_storage_backends.py
+tests/security/test_file_permissions.py
+```
+
+#### Acceptance Criteria:
+- [ ] Test coverage > 90%
+- [ ] All tests pass consistently
+- [ ] Performance tests meet requirements
+- [ ] Security tests validate access control
+
+### 5.2 API Documentation
+**Priority: High**
+
+#### Tasks:
+- [ ] Complete OpenAPI documentation
+- [ ] Create usage examples
+- [ ] Add integration guides
+- [ ] Create troubleshooting documentation
+
+#### Deliverables:
+- Complete API documentation
+- Usage examples and tutorials
+- Integration guides for different storage backends
+- Troubleshooting and FAQ documentation
+
+#### Acceptance Criteria:
+- [ ] API documentation is complete and accurate
+- [ ] Examples work correctly
+- [ ] Integration guides are clear
+- [ ] Documentation is accessible to developers
+
+## Success Metrics
+
+### Performance Targets
+- File upload: < 2 seconds for files up to 100MB
+- File download: < 1 second for metadata, streaming for content
+- File listing: < 500ms for paginated results
+- Search: < 1 second for complex queries
+
+### Scalability Targets
+- Support for 10,000+ files per project/run
+- Handle 100+ concurrent file operations
+- Storage backend agnostic (local, S3, Azure, GCS)
+- Horizontal scaling capability
+
+### Security Requirements
+- All file operations require authentication
+- File-level access control
+- Audit logging for all operations
+- Secure file URLs with expiration
+
+## Risk Mitigation
+
+### Technical Risks
+1. **Large file handling**: Implement streaming uploads/downloads
+2. **Storage costs**: Add file lifecycle management
+3. **Performance**: Implement caching and CDN integration
+4. **Data consistency**: Use database transactions and validation
+
+### Operational Risks
+1. **Storage migration**: Create migration tools and procedures
+2. **Backup and recovery**: Implement automated backup strategies
+3. **Monitoring**: Add comprehensive logging and alerting
+4. **Documentation**: Maintain up-to-date documentation
+
+## Dependencies
+
+### External Dependencies
+- FastAPI for API framework
+- SQLModel for database models
+- Alembic for database migrations
+- Boto3 for AWS S3 integration
+- Azure SDK for Azure Blob Storage
+- Google Cloud SDK for GCS
+
+### Internal Dependencies
+- Existing authentication system
+- Database infrastructure
+- OpenSearch for file indexing
+- Monitoring and logging infrastructure
+
+This implementation plan provides a structured approach to building a comprehensive Files API that integrates seamlessly with the existing NGS360 system while providing robust file management capabilities for both projects and runs.
\ No newline at end of file

From 999747b31303dc3c38b7a843d7e469b80cde72ff Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sat, 20 Sep 2025 13:56:32 -0400
Subject: [PATCH 02/35] Add files test and code

---
 api/files/models.py     | 166 +++++++++++++
 api/files/services.py   | 336 ++++++++++++++++++++++++++
 tests/api/test_files.py | 513 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1015 insertions(+)
 create mode 100644 api/files/models.py
 create mode 100644 api/files/services.py
 create mode 100644 tests/api/test_files.py

diff --git a/api/files/models.py b/api/files/models.py
new file mode 100644
index 0000000..f78d4f1
--- /dev/null
+++ b/api/files/models.py
@@ -0,0 +1,166 @@
+"""
+Models for the Files API
+"""
+
+import uuid
+from datetime import datetime
+from enum import Enum
+from sqlmodel import SQLModel, Field
+from pydantic import ConfigDict
+from typing import List
+
+
+class FileType(str, Enum):
+    """File type categories"""
+    FASTQ = "fastq"
+    BAM = "bam"
+    VCF = "vcf"
+    SAMPLESHEET = "samplesheet"
+    METRICS = "metrics"
+    REPORT = "report"
+    LOG = "log"
+    IMAGE = "image"
+    DOCUMENT = "document"
+    OTHER = "other"
+
+
+class EntityType(str, Enum):
+    """Entity types that can have files"""
+    PROJECT = "project"
+    RUN = "run"
+
+
+class StorageBackend(str, Enum):
+    """Storage backend types"""
+    LOCAL = "local"
+    S3 = "s3"
+    AZURE = "azure"
+    GCS = "gcs"
+
+
+class File(SQLModel, table=True):
+    """Core file entity that can be associated with runs or projects"""
+    
+    __searchable__ = ["filename", "description", "file_id"]
+    
+    id: uuid.UUID | None = Field(default_factory=uuid.uuid4, primary_key=True)
+    file_id: str = Field(unique=True, max_length=100)  # Human-readable identifier
+    filename: str = Field(max_length=255)
+    original_filename: str = Field(max_length=255)
+    file_path: str = Field(max_length=1024)  # Storage path/URI
+    file_size: int | None = None  # Size in bytes
+    mime_type: str | None = Field(default=None, max_length=100)
+    checksum: str | None = Field(default=None, max_length=64)  # SHA-256 hash
+    
+    # Metadata
+    description: str | None = Field(default=None, max_length=1024)
+    file_type: FileType = Field(default=FileType.OTHER)
+    upload_date: datetime = Field(default_factory=datetime.utcnow)
+    created_by: str | None = Field(default=None, max_length=100)  # User identifier
+    
+    # Polymorphic associations
+    entity_type: EntityType  # "project" or "run"
+    entity_id: str = Field(max_length=100)  # project_id or run barcode
+    
+    # Storage metadata
+    storage_backend: StorageBackend = Field(default=StorageBackend.LOCAL)
+    is_public: bool = Field(default=False)
+    is_archived: bool = Field(default=False)
+
+    model_config = ConfigDict(from_attributes=True)
+
+    def generate_file_id(self) -> str:
+        """Generate a unique file ID"""
+        import secrets
+        import string
+        alphabet = string.ascii_letters + string.digits
+        return ''.join(secrets.choice(alphabet) for _ in range(12))
+
+
+class FileCreate(SQLModel):
+    """Request model for creating a file"""
+    filename: str
+    original_filename: str | None = None
+    description: str | None = None
+    file_type: FileType = FileType.OTHER
+    entity_type: EntityType
+    entity_id: str
+    is_public: bool = False
+    created_by: str | None = None
+    
+    model_config = ConfigDict(extra="forbid")
+
+
+class FileUpdate(SQLModel):
+    """Request model for updating file metadata"""
+    filename: str | None = None
+    description: str | None = None
+    file_type: FileType | None = None
+    is_public: bool | None = None
+    is_archived: bool | None = None
+    
+    model_config = ConfigDict(extra="forbid")
+
+
+class FilePublic(SQLModel):
+    """Public file representation"""
+    file_id: str
+    filename: str
+    original_filename: str
+    file_size: int | None
+    mime_type: str | None
+    description: str | None
+    file_type: FileType
+    upload_date: datetime
+    created_by: str | None
+    entity_type: EntityType
+    entity_id: str
+    is_public: bool
+    is_archived: bool
+    storage_backend: StorageBackend
+    checksum: str | None = None
+
+
+class FilesPublic(SQLModel):
+    """Paginated file listing"""
+    data: List[FilePublic]
+    total_items: int
+    total_pages: int
+    current_page: int
+    per_page: int
+    has_next: bool
+    has_prev: bool
+
+
+class FileUploadRequest(SQLModel):
+    """Request model for file upload"""
+    filename: str
+    description: str | None = None
+    file_type: FileType = FileType.OTHER
+    is_public: bool = False
+    
+    model_config = ConfigDict(extra="forbid")
+
+
+class FileUploadResponse(SQLModel):
+    """Response model for file upload"""
+    file_id: str
+    filename: str
+    file_size: int | None = None
+    checksum: str | None = None
+    upload_date: datetime
+    message: str = "File uploaded successfully"
+
+
+class FileFilters(SQLModel):
+    """File filtering options"""
+    entity_type: EntityType | None = None
+    entity_id: str | None = None
+    file_type: FileType | None = None
+    mime_type: str | None = None
+    created_by: str | None = None
+    is_public: bool | None = None
+    is_archived: bool | None = None
+    search_query: str | None = None  # Search in filename/description
+    
+    model_config = ConfigDict(extra="forbid")
\ No newline at end of file
diff --git a/api/files/services.py b/api/files/services.py
new file mode 100644
index 0000000..e1acf19
--- /dev/null
+++ b/api/files/services.py
@@ -0,0 +1,336 @@
+"""
+Services for managing files.
+"""
+
+import hashlib
+import secrets
+import string
+from pathlib import Path
+from sqlmodel import select, Session, func
+from pydantic import PositiveInt
+from fastapi import HTTPException, status
+
+from api.files.models import (
+    File,
+    FileCreate,
+    FileUpdate,
+    FilePublic,
+    FilesPublic,
+    FileFilters,
+    FileType,
+    EntityType,
+    StorageBackend,
+)
+
+
+def generate_file_id() -> str:
+    """Generate a unique file ID"""
+    alphabet = string.ascii_letters + string.digits
+    return ''.join(secrets.choice(alphabet) for _ in range(12))
+
+
+def generate_file_path(
+    entity_type: EntityType, entity_id: str, file_type: FileType, filename: str
+) -> str:
+    """Generate a structured file path"""
+    from datetime import datetime
+    now = datetime.utcnow()
+    year = now.strftime("%Y")
+    month = now.strftime("%m")
+    
+    # Create path structure: /{entity_type}/{entity_id}/{file_type}/{year}/{month}/{filename}
+    path_parts = [
+        entity_type.value,
+        entity_id,
+        file_type.value,
+        year,
+        month,
+        filename
+    ]
+    return "/".join(path_parts)
+
+
+def calculate_file_checksum(file_content: bytes) -> str:
+    """Calculate SHA-256 checksum of file content"""
+    return hashlib.sha256(file_content).hexdigest()
+
+
+def get_mime_type(filename: str) -> str:
+    """Get MIME type based on file extension"""
+    import mimetypes
+    mime_type, _ = mimetypes.guess_type(filename)
+    return mime_type or "application/octet-stream"
+
+
+def create_file(
+    session: Session,
+    file_create: FileCreate,
+    file_content: bytes | None = None,
+    storage_root: str = "storage"
+) -> File:
+    """Create a new file record and optionally store content"""
+    
+    # Generate unique file ID
+    file_id = generate_file_id()
+    
+    # Use original_filename if provided, otherwise use filename
+    original_filename = file_create.original_filename or file_create.filename
+    
+    # Generate file path
+    file_path = generate_file_path(
+        file_create.entity_type,
+        file_create.entity_id,
+        file_create.file_type,
+        f"{file_id}_{file_create.filename}"
+    )
+    
+    # Calculate file metadata if content is provided
+    file_size = len(file_content) if file_content else None
+    checksum = calculate_file_checksum(file_content) if file_content else None
+    mime_type = get_mime_type(file_create.filename)
+    
+    # Create file record
+    file_record = File(
+        file_id=file_id,
+        filename=file_create.filename,
+        original_filename=original_filename,
+        file_path=file_path,
+        file_size=file_size,
+        mime_type=mime_type,
+        checksum=checksum,
+        description=file_create.description,
+        file_type=file_create.file_type,
+        created_by=file_create.created_by,
+        entity_type=file_create.entity_type,
+        entity_id=file_create.entity_id,
+        is_public=file_create.is_public,
+        storage_backend=StorageBackend.LOCAL
+    )
+    
+    # Store file content if provided
+    if file_content:
+        full_path = Path(storage_root) / file_path
+        full_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(full_path, "wb") as f:
+            f.write(file_content)
+    
+    # Save to database
+    session.add(file_record)
+    session.commit()
+    session.refresh(file_record)
+    
+    return file_record
+
+
+def get_file(session: Session, file_id: str) -> File:
+    """Get a file by its file_id"""
+    file_record = session.exec(
+        select(File).where(File.file_id == file_id)
+    ).first()
+    
+    if not file_record:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"File with id {file_id} not found"
+        )
+    
+    return file_record
+
+
+def get_file_by_id(session: Session, id: str) -> File:
+    """Get a file by its internal UUID"""
+    file_record = session.exec(
+        select(File).where(File.id == id)
+    ).first()
+    
+    if not file_record:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"File with internal id {id} not found"
+        )
+    
+    return file_record
+
+
+def update_file(session: Session, file_id: str, file_update: FileUpdate) -> File:
+    """Update file metadata"""
+    file_record = get_file(session, file_id)
+    
+    # Update fields that are provided
+    update_data = file_update.model_dump(exclude_unset=True)
+    for field, value in update_data.items():
+        setattr(file_record, field, value)
+    
+    session.add(file_record)
+    session.commit()
+    session.refresh(file_record)
+    
+    return file_record
+
+
+def delete_file(session: Session, file_id: str, storage_root: str = "storage") -> bool:
+    """Delete a file record and its content"""
+    file_record = get_file(session, file_id)
+    
+    # Delete physical file if it exists
+    full_path = Path(storage_root) / file_record.file_path
+    if full_path.exists():
+        full_path.unlink()
+        
+        # Try to remove empty directories
+        try:
+            full_path.parent.rmdir()
+        except OSError:
+            # Directory not empty, that's fine
+            pass
+    
+    # Delete from database
+    session.delete(file_record)
+    session.commit()
+    
+    return True
+
+
+def list_files(
+    session: Session,
+    filters: FileFilters | None = None,
+    page: PositiveInt = 1,
+    per_page: PositiveInt = 20,
+    sort_by: str = "upload_date",
+    sort_order: str = "desc"
+) -> FilesPublic:
+    """List files with filtering and pagination"""
+    
+    # Build query
+    query = select(File)
+    
+    # Apply filters
+    if filters:
+        if filters.entity_type:
+            query = query.where(File.entity_type == filters.entity_type)
+        if filters.entity_id:
+            query = query.where(File.entity_id == filters.entity_id)
+        if filters.file_type:
+            query = query.where(File.file_type == filters.file_type)
+        if filters.mime_type:
+            query = query.where(File.mime_type == filters.mime_type)
+        if filters.created_by:
+            query = query.where(File.created_by == filters.created_by)
+        if filters.is_public is not None:
+            query = query.where(File.is_public == filters.is_public)
+        if filters.is_archived is not None:
+            query = query.where(File.is_archived == filters.is_archived)
+        if filters.search_query:
+            search_term = f"%{filters.search_query}%"
+            query = query.where(
+                (File.filename.ilike(search_term)) |
+                (File.description.ilike(search_term))
+            )
+    
+    # Get total count
+    total_count = session.exec(
+        select(func.count()).select_from(query.subquery())
+    ).one()
+    
+    # Calculate pagination
+    total_pages = (total_count + per_page - 1) // per_page
+    
+    # Apply sorting
+    sort_field = getattr(File, sort_by, File.upload_date)
+    if sort_order == "desc":
+        query = query.order_by(sort_field.desc())
+    else:
+        query = query.order_by(sort_field.asc())
+    
+    # Apply pagination
+    query = query.offset((page - 1) * per_page).limit(per_page)
+    
+    # Execute query
+    files = session.exec(query).all()
+    
+    # Convert to public models
+    public_files = [
+        FilePublic(
+            file_id=file.file_id,
+            filename=file.filename,
+            original_filename=file.original_filename,
+            file_size=file.file_size,
+            mime_type=file.mime_type,
+            description=file.description,
+            file_type=file.file_type,
+            upload_date=file.upload_date,
+            created_by=file.created_by,
+            entity_type=file.entity_type,
+            entity_id=file.entity_id,
+            is_public=file.is_public,
+            is_archived=file.is_archived,
+            storage_backend=file.storage_backend,
+            checksum=file.checksum
+        )
+        for file in files
+    ]
+    
+    return FilesPublic(
+        data=public_files,
+        total_items=total_count,
+        total_pages=total_pages,
+        current_page=page,
+        per_page=per_page,
+        has_next=page < total_pages,
+        has_prev=page > 1
+    )
+
+
+def get_file_content(session: Session, file_id: str, storage_root: str = "storage") -> bytes:
+    """Get file content from storage"""
+    file_record = get_file(session, file_id)
+    
+    full_path = Path(storage_root) / file_record.file_path
+    if not full_path.exists():
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"File content not found at {file_record.file_path}"
+        )
+    
+    with open(full_path, "rb") as f:
+        return f.read()
+
+
+def list_files_for_entity(
+    session: Session,
+    entity_type: EntityType,
+    entity_id: str,
+    page: PositiveInt = 1,
+    per_page: PositiveInt = 20,
+    file_type: FileType | None = None
+) -> FilesPublic:
+    """List files for a specific entity (project or run)"""
+    filters = FileFilters(
+        entity_type=entity_type,
+        entity_id=entity_id,
+        file_type=file_type
+    )
+    
+    return list_files(
+        session=session,
+        filters=filters,
+        page=page,
+        per_page=per_page
+    )
+
+
+def get_file_count_for_entity(
+    session: Session,
+    entity_type: EntityType,
+    entity_id: str
+) -> int:
+    """Get the count of files for a specific entity"""
+    count = session.exec(
+        select(func.count(File.id)).where(
+            File.entity_type == entity_type,
+            File.entity_id == entity_id,
+            ~File.is_archived
+        )
+    ).one()
+    
+    return count
\ No newline at end of file
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
new file mode 100644
index 0000000..d439772
--- /dev/null
+++ b/tests/api/test_files.py
@@ -0,0 +1,513 @@
+"""
+Test /files endpoint
+"""
+
+import tempfile
+import shutil
+from pathlib import Path
+from datetime import datetime
+
+import pytest
+from fastapi.testclient import TestClient
+from sqlmodel import Session
+
+from api.files.models import (
+    FileCreate,
+    FileUpdate,
+    FileType,
+    EntityType,
+    StorageBackend,
+)
+from api.files.services import (
+    create_file,
+    get_file,
+    update_file,
+    delete_file,
+    list_files,
+    get_file_content,
+    list_files_for_entity,
+    get_file_count_for_entity,
+    generate_file_id,
+    generate_file_path,
+    calculate_file_checksum,
+    get_mime_type,
+)
+
+
+class TestFileModels:
+    """Test file model functionality"""
+
+    def test_file_type_enum(self):
+        """Test FileType enum values"""
+        assert FileType.FASTQ == "fastq"
+        assert FileType.BAM == "bam"
+        assert FileType.VCF == "vcf"
+        assert FileType.SAMPLESHEET == "samplesheet"
+        assert FileType.METRICS == "metrics"
+        assert FileType.REPORT == "report"
+        assert FileType.LOG == "log"
+        assert FileType.IMAGE == "image"
+        assert FileType.DOCUMENT == "document"
+        assert FileType.OTHER == "other"
+
+    def test_entity_type_enum(self):
+        """Test EntityType enum values"""
+        assert EntityType.PROJECT == "project"
+        assert EntityType.RUN == "run"
+
+    def test_storage_backend_enum(self):
+        """Test StorageBackend enum values"""
+        assert StorageBackend.LOCAL == "local"
+        assert StorageBackend.S3 == "s3"
+        assert StorageBackend.AZURE == "azure"
+        assert StorageBackend.GCS == "gcs"
+
+    def test_file_create_model(self):
+        """Test FileCreate model validation"""
+        file_create = FileCreate(
+            filename="test.txt",
+            description="Test file",
+            file_type=FileType.DOCUMENT,
+            entity_type=EntityType.PROJECT,
+            entity_id="PROJ001",
+            is_public=True,
+            created_by="testuser"
+        )
+        
+        assert file_create.filename == "test.txt"
+        assert file_create.description == "Test file"
+        assert file_create.file_type == FileType.DOCUMENT
+        assert file_create.entity_type == EntityType.PROJECT
+        assert file_create.entity_id == "PROJ001"
+        assert file_create.is_public is True
+        assert file_create.created_by == "testuser"
+
+    def test_file_update_model(self):
+        """Test FileUpdate model validation"""
+        file_update = FileUpdate(
+            filename="updated.txt",
+            description="Updated description",
+            is_public=False
+        )
+        
+        assert file_update.filename == "updated.txt"
+        assert file_update.description == "Updated description"
+        assert file_update.is_public is False
+
+
+class TestFileServices:
+    """Test file service functions"""
+
+    @pytest.fixture
+    def temp_storage(self):
+        """Create temporary storage directory"""
+        temp_dir = tempfile.mkdtemp()
+        yield temp_dir
+        shutil.rmtree(temp_dir)
+
+    def test_generate_file_id(self):
+        """Test file ID generation"""
+        file_id = generate_file_id()
+        assert len(file_id) == 12
+        assert file_id.isalnum()
+        
+        # Test uniqueness
+        file_id2 = generate_file_id()
+        assert file_id != file_id2
+
+    def test_generate_file_path(self):
+        """Test file path generation"""
+        path = generate_file_path(
+            EntityType.PROJECT,
+            "PROJ001",
+            FileType.FASTQ,
+            "sample.fastq"
+        )
+        
+        # Should contain entity type, entity id, file type, year, month, filename
+        path_parts = path.split("/")
+        assert len(path_parts) == 6
+        assert path_parts[0] == "project"
+        assert path_parts[1] == "PROJ001"
+        assert path_parts[2] == "fastq"
+        assert path_parts[5] == "sample.fastq"
+        
+        # Year and month should be current
+        now = datetime.utcnow()
+        assert path_parts[3] == now.strftime("%Y")
+        assert path_parts[4] == now.strftime("%m")
+
+    def test_calculate_file_checksum(self):
+        """Test file checksum calculation"""
+        content = b"Hello, World!"
+        checksum = calculate_file_checksum(content)
+        
+        # Should be SHA-256 hash
+        assert len(checksum) == 64
+        assert checksum == "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
+
+    def test_get_mime_type(self):
+        """Test MIME type detection"""
+        assert get_mime_type("test.txt") == "text/plain"
+        assert get_mime_type("test.pdf") == "application/pdf"
+        assert get_mime_type("test.jpg") == "image/jpeg"
+        assert get_mime_type("test.fastq") == "application/octet-stream"  # Unknown extension
+
+    def test_create_file_without_content(self, session: Session, temp_storage):
+        """Test creating file record without content"""
+        file_create = FileCreate(
+            filename="test.txt",
+            description="Test file",
+            file_type=FileType.DOCUMENT,
+            entity_type=EntityType.PROJECT,
+            entity_id="PROJ001",
+            created_by="testuser"
+        )
+        
+        file_record = create_file(session, file_create, storage_root=temp_storage)
+        
+        assert file_record.filename == "test.txt"
+        assert file_record.description == "Test file"
+        assert file_record.file_type == FileType.DOCUMENT
+        assert file_record.entity_type == EntityType.PROJECT
+        assert file_record.entity_id == "PROJ001"
+        assert file_record.created_by == "testuser"
+        assert file_record.file_size is None
+        assert file_record.checksum is None
+        assert file_record.mime_type == "text/plain"
+        assert len(file_record.file_id) == 12
+
+    def test_create_file_with_content(self, session: Session, temp_storage):
+        """Test creating file record with content"""
+        content = b"Hello, World!"
+        file_create = FileCreate(
+            filename="test.txt",
+            description="Test file",
+            file_type=FileType.DOCUMENT,
+            entity_type=EntityType.PROJECT,
+            entity_id="PROJ001"
+        )
+        
+        file_record = create_file(session, file_create, content, storage_root=temp_storage)
+        
+        assert file_record.file_size == len(content)
+        assert file_record.checksum == calculate_file_checksum(content)
+        
+        # Check that file was actually written
+        file_path = Path(temp_storage) / file_record.file_path
+        assert file_path.exists()
+        assert file_path.read_bytes() == content
+
+    def test_get_file(self, session: Session, temp_storage):
+        """Test getting file by file_id"""
+        file_create = FileCreate(
+            filename="test.txt",
+            entity_type=EntityType.PROJECT,
+            entity_id="PROJ001"
+        )
+        
+        created_file = create_file(session, file_create, storage_root=temp_storage)
+        retrieved_file = get_file(session, created_file.file_id)
+        
+        assert retrieved_file.id == created_file.id
+        assert retrieved_file.file_id == created_file.file_id
+        assert retrieved_file.filename == created_file.filename
+
+    def test_get_file_not_found(self, session: Session):
+        """Test getting non-existent file"""
+        with pytest.raises(Exception) as exc_info:
+            get_file(session, "nonexistent")
+        
+        assert "not found" in str(exc_info.value)
+
+    def test_update_file(self, session: Session, temp_storage):
+        """Test updating file metadata"""
+        file_create = FileCreate(
+            filename="test.txt",
+            description="Original description",
+            entity_type=EntityType.PROJECT,
+            entity_id="PROJ001"
+        )
+        
+        created_file = create_file(session, file_create, storage_root=temp_storage)
+        
+        file_update = FileUpdate(
+            filename="updated.txt",
+            description="Updated description",
+            is_public=True
+        )
+        
+        updated_file = update_file(session, created_file.file_id, file_update)
+        
+        assert updated_file.filename == "updated.txt"
+        assert updated_file.description == "Updated description"
+        assert updated_file.is_public is True
+
+    def test_delete_file(self, session: Session, temp_storage):
+        """Test deleting file and content"""
+        content = b"Hello, World!"
+        file_create = FileCreate(
+            filename="test.txt",
+            entity_type=EntityType.PROJECT,
+            entity_id="PROJ001"
+        )
+        
+        created_file = create_file(session, file_create, content, storage_root=temp_storage)
+        file_path = Path(temp_storage) / created_file.file_path
+        
+        # Verify file exists
+        assert file_path.exists()
+        
+        # Delete file
+        result = delete_file(session, created_file.file_id, storage_root=temp_storage)
+        assert result is True
+        
+        # Verify file is deleted
+        assert not file_path.exists()
+        
+        # Verify database record is deleted
+        with pytest.raises(Exception):
+            get_file(session, created_file.file_id)
+
+    def test_list_files_empty(self, session: Session):
+        """Test listing files when none exist"""
+        result = list_files(session)
+        
+        assert result.total_items == 0
+        assert result.total_pages == 0
+        assert result.current_page == 1
+        assert result.per_page == 20
+        assert result.has_next is False
+        assert result.has_prev is False
+        assert len(result.data) == 0
+
+    def test_list_files_with_data(self, session: Session, temp_storage):
+        """Test listing files with data"""
+        # Create test files
+        for i in range(3):
+            file_create = FileCreate(
+                filename=f"test{i}.txt",
+                description=f"Test file {i}",
+                entity_type=EntityType.PROJECT,
+                entity_id="PROJ001",
+                file_type=FileType.DOCUMENT
+            )
+            create_file(session, file_create, storage_root=temp_storage)
+        
+        result = list_files(session)
+        
+        assert result.total_items == 3
+        assert result.total_pages == 1
+        assert result.current_page == 1
+        assert result.per_page == 20
+        assert result.has_next is False
+        assert result.has_prev is False
+        assert len(result.data) == 3
+
+    def test_list_files_for_entity(self, session: Session, temp_storage):
+        """Test listing files for specific entity"""
+        # Create files for different entities
+        for entity_id in ["PROJ001", "PROJ002"]:
+            for i in range(2):
+                file_create = FileCreate(
+                    filename=f"test{i}.txt",
+                    entity_type=EntityType.PROJECT,
+                    entity_id=entity_id
+                )
+                create_file(session, file_create, storage_root=temp_storage)
+        
+        # List files for PROJ001
+        result = list_files_for_entity(session, EntityType.PROJECT, "PROJ001")
+        
+        assert result.total_items == 2
+        assert all(file.entity_id == "PROJ001" for file in result.data)
+
+    def test_get_file_count_for_entity(self, session: Session, temp_storage):
+        """Test getting file count for entity"""
+        # Create files for entity
+        for i in range(3):
+            file_create = FileCreate(
+                filename=f"test{i}.txt",
+                entity_type=EntityType.PROJECT,
+                entity_id="PROJ001"
+            )
+            create_file(session, file_create, storage_root=temp_storage)
+        
+        count = get_file_count_for_entity(session, EntityType.PROJECT, "PROJ001")
+        assert count == 3
+
+    def test_get_file_content(self, session: Session, temp_storage):
+        """Test retrieving file content"""
+        content = b"Hello, World!"
+        file_create = FileCreate(
+            filename="test.txt",
+            entity_type=EntityType.PROJECT,
+            entity_id="PROJ001"
+        )
+        
+        created_file = create_file(session, file_create, content, storage_root=temp_storage)
+        retrieved_content = get_file_content(
+            session, created_file.file_id, storage_root=temp_storage
+        )
+        
+        assert retrieved_content == content
+
+    def test_get_file_content_not_found(self, session: Session, temp_storage):
+        """Test retrieving content for non-existent file"""
+        file_create = FileCreate(
+            filename="test.txt",
+            entity_type=EntityType.PROJECT,
+            entity_id="PROJ001"
+        )
+        
+        # Create file record without content
+        created_file = create_file(session, file_create, storage_root=temp_storage)
+        
+        with pytest.raises(Exception) as exc_info:
+            get_file_content(session, created_file.file_id, storage_root=temp_storage)
+        
+        assert "not found" in str(exc_info.value)
+
+
+class TestFileAPI:
+    """Test file API endpoints"""
+
+    def test_create_file_endpoint(self, client: TestClient, session: Session):
+        """Test file creation endpoint"""
+        # This would test the actual API endpoint once routes are implemented
+        # For now, we'll skip this as routes.py needs to be updated
+        pass
+
+    def test_get_files_endpoint(self, client: TestClient, session: Session):
+        """Test file listing endpoint"""
+        # This would test the actual API endpoint once routes are implemented
+        pass
+
+    def test_get_file_endpoint(self, client: TestClient, session: Session):
+        """Test file retrieval endpoint"""
+        # This would test the actual API endpoint once routes are implemented
+        pass
+
+    def test_update_file_endpoint(self, client: TestClient, session: Session):
+        """Test file update endpoint"""
+        # This would test the actual API endpoint once routes are implemented
+        pass
+
+    def test_delete_file_endpoint(self, client: TestClient, session: Session):
+        """Test file deletion endpoint"""
+        # This would test the actual API endpoint once routes are implemented
+        pass
+
+
+class TestFileIntegration:
+    """Integration tests for file operations"""
+
+    @pytest.fixture
+    def temp_storage(self):
+        """Create temporary storage directory"""
+        temp_dir = tempfile.mkdtemp()
+        yield temp_dir
+        shutil.rmtree(temp_dir)
+
+    def test_complete_file_lifecycle(self, session: Session, temp_storage):
+        """Test complete file lifecycle: create, read, update, delete"""
+        content = b"Initial content"
+        
+        # Create file
+        file_create = FileCreate(
+            filename="lifecycle.txt",
+            description="Lifecycle test file",
+            file_type=FileType.DOCUMENT,
+            entity_type=EntityType.PROJECT,
+            entity_id="PROJ001",
+            created_by="testuser"
+        )
+        
+        created_file = create_file(session, file_create, content, storage_root=temp_storage)
+        assert created_file.filename == "lifecycle.txt"
+        assert created_file.file_size == len(content)
+        
+        # Read file
+        retrieved_file = get_file(session, created_file.file_id)
+        assert retrieved_file.id == created_file.id
+        
+        retrieved_content = get_file_content(
+            session, created_file.file_id, storage_root=temp_storage
+        )
+        assert retrieved_content == content
+        
+        # Update file metadata
+        file_update = FileUpdate(
+            description="Updated lifecycle test file",
+            is_public=True
+        )
+        
+        updated_file = update_file(session, created_file.file_id, file_update)
+        assert updated_file.description == "Updated lifecycle test file"
+        assert updated_file.is_public is True
+        
+        # Delete file
+        result = delete_file(session, created_file.file_id, storage_root=temp_storage)
+        assert result is True
+        
+        # Verify deletion
+        with pytest.raises(Exception):
+            get_file(session, created_file.file_id)
+
+    def test_multiple_entities_file_management(self, session: Session, temp_storage):
+        """Test file management across multiple entities"""
+        entities = [
+            (EntityType.PROJECT, "PROJ001"),
+            (EntityType.PROJECT, "PROJ002"),
+            (EntityType.RUN, "190110_MACHINE123_0001_FLOWCELL123")
+        ]
+        
+        created_files = []
+        
+        # Create files for different entities
+        for entity_type, entity_id in entities:
+            for i in range(2):
+                file_create = FileCreate(
+                    filename=f"file{i}.txt",
+                    entity_type=entity_type,
+                    entity_id=entity_id,
+                    file_type=FileType.DOCUMENT
+                )
+                
+                file_record = create_file(session, file_create, storage_root=temp_storage)
+                created_files.append(file_record)
+        
+        # Verify total count
+        all_files = list_files(session)
+        assert all_files.total_items == 6
+        
+        # Verify entity-specific counts
+        for entity_type, entity_id in entities:
+            entity_files = list_files_for_entity(session, entity_type, entity_id)
+            assert entity_files.total_items == 2
+            
+            count = get_file_count_for_entity(session, entity_type, entity_id)
+            assert count == 2
+
+    def test_file_type_filtering(self, session: Session, temp_storage):
+        """Test filtering files by type"""
+        file_types = [FileType.FASTQ, FileType.BAM, FileType.VCF, FileType.DOCUMENT]
+        
+        # Create files of different types
+        for file_type in file_types:
+            file_create = FileCreate(
+                filename=f"test.{file_type.value}",
+                entity_type=EntityType.PROJECT,
+                entity_id="PROJ001",
+                file_type=file_type
+            )
+            create_file(session, file_create, storage_root=temp_storage)
+        
+        # Test filtering by each type
+        for file_type in file_types:
+            from api.files.models import FileFilters
+            filters = FileFilters(file_type=file_type)
+            result = list_files(session, filters=filters)
+            
+            assert result.total_items == 1
+            assert result.data[0].file_type == file_type
\ No newline at end of file

From 7b624c4c06155a10f96d9598c2c88a7871777ab9 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sat, 20 Sep 2025 14:01:58 -0400
Subject: [PATCH 03/35] lint

---
 api/files/models.py     |  20 ++++----
 api/files/services.py   |  68 +++++++++++++-------------
 tests/api/test_files.py | 104 ++++++++++++++++++++--------------------
 3 files changed, 96 insertions(+), 96 deletions(-)

diff --git a/api/files/models.py b/api/files/models.py
index f78d4f1..0346e56 100644
--- a/api/files/models.py
+++ b/api/files/models.py
@@ -40,9 +40,9 @@ class StorageBackend(str, Enum):
 
 class File(SQLModel, table=True):
     """Core file entity that can be associated with runs or projects"""
-    
+
     __searchable__ = ["filename", "description", "file_id"]
-    
+
     id: uuid.UUID | None = Field(default_factory=uuid.uuid4, primary_key=True)
     file_id: str = Field(unique=True, max_length=100)  # Human-readable identifier
     filename: str = Field(max_length=255)
@@ -51,17 +51,17 @@ class File(SQLModel, table=True):
     file_size: int | None = None  # Size in bytes
     mime_type: str | None = Field(default=None, max_length=100)
     checksum: str | None = Field(default=None, max_length=64)  # SHA-256 hash
-    
+
     # Metadata
     description: str | None = Field(default=None, max_length=1024)
     file_type: FileType = Field(default=FileType.OTHER)
     upload_date: datetime = Field(default_factory=datetime.utcnow)
     created_by: str | None = Field(default=None, max_length=100)  # User identifier
-    
+
     # Polymorphic associations
     entity_type: EntityType  # "project" or "run"
     entity_id: str = Field(max_length=100)  # project_id or run barcode
-    
+
     # Storage metadata
     storage_backend: StorageBackend = Field(default=StorageBackend.LOCAL)
     is_public: bool = Field(default=False)
@@ -87,7 +87,7 @@ class FileCreate(SQLModel):
     entity_id: str
     is_public: bool = False
     created_by: str | None = None
-    
+
     model_config = ConfigDict(extra="forbid")
 
 
@@ -98,7 +98,7 @@ class FileUpdate(SQLModel):
     file_type: FileType | None = None
     is_public: bool | None = None
     is_archived: bool | None = None
-    
+
     model_config = ConfigDict(extra="forbid")
 
 
@@ -138,7 +138,7 @@ class FileUploadRequest(SQLModel):
     description: str | None = None
     file_type: FileType = FileType.OTHER
     is_public: bool = False
-    
+
     model_config = ConfigDict(extra="forbid")
 
 
@@ -162,5 +162,5 @@ class FileFilters(SQLModel):
     is_public: bool | None = None
     is_archived: bool | None = None
     search_query: str | None = None  # Search in filename/description
-    
-    model_config = ConfigDict(extra="forbid")
\ No newline at end of file
+
+    model_config = ConfigDict(extra="forbid")
diff --git a/api/files/services.py b/api/files/services.py
index e1acf19..507dfa7 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -37,7 +37,7 @@ def generate_file_path(
     now = datetime.utcnow()
     year = now.strftime("%Y")
     month = now.strftime("%m")
-    
+
     # Create path structure: /{entity_type}/{entity_id}/{file_type}/{year}/{month}/{filename}
     path_parts = [
         entity_type.value,
@@ -69,13 +69,13 @@ def create_file(
     storage_root: str = "storage"
 ) -> File:
     """Create a new file record and optionally store content"""
-    
+
     # Generate unique file ID
     file_id = generate_file_id()
-    
+
     # Use original_filename if provided, otherwise use filename
     original_filename = file_create.original_filename or file_create.filename
-    
+
     # Generate file path
     file_path = generate_file_path(
         file_create.entity_type,
@@ -83,12 +83,12 @@ def create_file(
         file_create.file_type,
         f"{file_id}_{file_create.filename}"
     )
-    
+
     # Calculate file metadata if content is provided
     file_size = len(file_content) if file_content else None
     checksum = calculate_file_checksum(file_content) if file_content else None
     mime_type = get_mime_type(file_create.filename)
-    
+
     # Create file record
     file_record = File(
         file_id=file_id,
@@ -106,19 +106,19 @@ def create_file(
         is_public=file_create.is_public,
         storage_backend=StorageBackend.LOCAL
     )
-    
+
     # Store file content if provided
     if file_content:
         full_path = Path(storage_root) / file_path
         full_path.parent.mkdir(parents=True, exist_ok=True)
         with open(full_path, "wb") as f:
             f.write(file_content)
-    
+
     # Save to database
     session.add(file_record)
     session.commit()
     session.refresh(file_record)
-    
+
     return file_record
 
 
@@ -127,13 +127,13 @@ def get_file(session: Session, file_id: str) -> File:
     file_record = session.exec(
         select(File).where(File.file_id == file_id)
     ).first()
-    
+
     if not file_record:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"File with id {file_id} not found"
         )
-    
+
     return file_record
 
 
@@ -142,52 +142,52 @@ def get_file_by_id(session: Session, id: str) -> File:
     file_record = session.exec(
         select(File).where(File.id == id)
     ).first()
-    
+
     if not file_record:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"File with internal id {id} not found"
         )
-    
+
     return file_record
 
 
 def update_file(session: Session, file_id: str, file_update: FileUpdate) -> File:
     """Update file metadata"""
     file_record = get_file(session, file_id)
-    
+
     # Update fields that are provided
     update_data = file_update.model_dump(exclude_unset=True)
     for field, value in update_data.items():
         setattr(file_record, field, value)
-    
+
     session.add(file_record)
     session.commit()
     session.refresh(file_record)
-    
+
     return file_record
 
 
 def delete_file(session: Session, file_id: str, storage_root: str = "storage") -> bool:
     """Delete a file record and its content"""
     file_record = get_file(session, file_id)
-    
+
     # Delete physical file if it exists
     full_path = Path(storage_root) / file_record.file_path
     if full_path.exists():
         full_path.unlink()
-        
+
         # Try to remove empty directories
         try:
             full_path.parent.rmdir()
         except OSError:
             # Directory not empty, that's fine
             pass
-    
+
     # Delete from database
     session.delete(file_record)
     session.commit()
-    
+
     return True
 
 
@@ -200,10 +200,10 @@ def list_files(
     sort_order: str = "desc"
 ) -> FilesPublic:
     """List files with filtering and pagination"""
-    
+
     # Build query
     query = select(File)
-    
+
     # Apply filters
     if filters:
         if filters.entity_type:
@@ -226,28 +226,28 @@ def list_files(
                 (File.filename.ilike(search_term)) |
                 (File.description.ilike(search_term))
             )
-    
+
     # Get total count
     total_count = session.exec(
         select(func.count()).select_from(query.subquery())
     ).one()
-    
+
     # Calculate pagination
     total_pages = (total_count + per_page - 1) // per_page
-    
+
     # Apply sorting
     sort_field = getattr(File, sort_by, File.upload_date)
     if sort_order == "desc":
         query = query.order_by(sort_field.desc())
     else:
         query = query.order_by(sort_field.asc())
-    
+
     # Apply pagination
     query = query.offset((page - 1) * per_page).limit(per_page)
-    
+
     # Execute query
     files = session.exec(query).all()
-    
+
     # Convert to public models
     public_files = [
         FilePublic(
@@ -269,7 +269,7 @@ def list_files(
         )
         for file in files
     ]
-    
+
     return FilesPublic(
         data=public_files,
         total_items=total_count,
@@ -284,14 +284,14 @@ def list_files(
 def get_file_content(session: Session, file_id: str, storage_root: str = "storage") -> bytes:
     """Get file content from storage"""
     file_record = get_file(session, file_id)
-    
+
     full_path = Path(storage_root) / file_record.file_path
     if not full_path.exists():
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"File content not found at {file_record.file_path}"
         )
-    
+
     with open(full_path, "rb") as f:
         return f.read()
 
@@ -310,7 +310,7 @@ def list_files_for_entity(
         entity_id=entity_id,
         file_type=file_type
     )
-    
+
     return list_files(
         session=session,
         filters=filters,
@@ -332,5 +332,5 @@ def get_file_count_for_entity(
             ~File.is_archived
         )
     ).one()
-    
-    return count
\ No newline at end of file
+
+    return count
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index d439772..4ad7256 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -73,7 +73,7 @@ def test_file_create_model(self):
             is_public=True,
             created_by="testuser"
         )
-        
+
         assert file_create.filename == "test.txt"
         assert file_create.description == "Test file"
         assert file_create.file_type == FileType.DOCUMENT
@@ -89,7 +89,7 @@ def test_file_update_model(self):
             description="Updated description",
             is_public=False
         )
-        
+
         assert file_update.filename == "updated.txt"
         assert file_update.description == "Updated description"
         assert file_update.is_public is False
@@ -110,7 +110,7 @@ def test_generate_file_id(self):
         file_id = generate_file_id()
         assert len(file_id) == 12
         assert file_id.isalnum()
-        
+
         # Test uniqueness
         file_id2 = generate_file_id()
         assert file_id != file_id2
@@ -123,7 +123,7 @@ def test_generate_file_path(self):
             FileType.FASTQ,
             "sample.fastq"
         )
-        
+
         # Should contain entity type, entity id, file type, year, month, filename
         path_parts = path.split("/")
         assert len(path_parts) == 6
@@ -131,7 +131,7 @@ def test_generate_file_path(self):
         assert path_parts[1] == "PROJ001"
         assert path_parts[2] == "fastq"
         assert path_parts[5] == "sample.fastq"
-        
+
         # Year and month should be current
         now = datetime.utcnow()
         assert path_parts[3] == now.strftime("%Y")
@@ -141,7 +141,7 @@ def test_calculate_file_checksum(self):
         """Test file checksum calculation"""
         content = b"Hello, World!"
         checksum = calculate_file_checksum(content)
-        
+
         # Should be SHA-256 hash
         assert len(checksum) == 64
         assert checksum == "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
@@ -163,9 +163,9 @@ def test_create_file_without_content(self, session: Session, temp_storage):
             entity_id="PROJ001",
             created_by="testuser"
         )
-        
+
         file_record = create_file(session, file_create, storage_root=temp_storage)
-        
+
         assert file_record.filename == "test.txt"
         assert file_record.description == "Test file"
         assert file_record.file_type == FileType.DOCUMENT
@@ -187,12 +187,12 @@ def test_create_file_with_content(self, session: Session, temp_storage):
             entity_type=EntityType.PROJECT,
             entity_id="PROJ001"
         )
-        
+
         file_record = create_file(session, file_create, content, storage_root=temp_storage)
-        
+
         assert file_record.file_size == len(content)
         assert file_record.checksum == calculate_file_checksum(content)
-        
+
         # Check that file was actually written
         file_path = Path(temp_storage) / file_record.file_path
         assert file_path.exists()
@@ -205,10 +205,10 @@ def test_get_file(self, session: Session, temp_storage):
             entity_type=EntityType.PROJECT,
             entity_id="PROJ001"
         )
-        
+
         created_file = create_file(session, file_create, storage_root=temp_storage)
         retrieved_file = get_file(session, created_file.file_id)
-        
+
         assert retrieved_file.id == created_file.id
         assert retrieved_file.file_id == created_file.file_id
         assert retrieved_file.filename == created_file.filename
@@ -217,7 +217,7 @@ def test_get_file_not_found(self, session: Session):
         """Test getting non-existent file"""
         with pytest.raises(Exception) as exc_info:
             get_file(session, "nonexistent")
-        
+
         assert "not found" in str(exc_info.value)
 
     def test_update_file(self, session: Session, temp_storage):
@@ -228,17 +228,17 @@ def test_update_file(self, session: Session, temp_storage):
             entity_type=EntityType.PROJECT,
             entity_id="PROJ001"
         )
-        
+
         created_file = create_file(session, file_create, storage_root=temp_storage)
-        
+
         file_update = FileUpdate(
             filename="updated.txt",
             description="Updated description",
             is_public=True
         )
-        
+
         updated_file = update_file(session, created_file.file_id, file_update)
-        
+
         assert updated_file.filename == "updated.txt"
         assert updated_file.description == "Updated description"
         assert updated_file.is_public is True
@@ -251,20 +251,20 @@ def test_delete_file(self, session: Session, temp_storage):
             entity_type=EntityType.PROJECT,
             entity_id="PROJ001"
         )
-        
+
         created_file = create_file(session, file_create, content, storage_root=temp_storage)
         file_path = Path(temp_storage) / created_file.file_path
-        
+
         # Verify file exists
         assert file_path.exists()
-        
+
         # Delete file
         result = delete_file(session, created_file.file_id, storage_root=temp_storage)
         assert result is True
-        
+
         # Verify file is deleted
         assert not file_path.exists()
-        
+
         # Verify database record is deleted
         with pytest.raises(Exception):
             get_file(session, created_file.file_id)
@@ -272,7 +272,7 @@ def test_delete_file(self, session: Session, temp_storage):
     def test_list_files_empty(self, session: Session):
         """Test listing files when none exist"""
         result = list_files(session)
-        
+
         assert result.total_items == 0
         assert result.total_pages == 0
         assert result.current_page == 1
@@ -293,9 +293,9 @@ def test_list_files_with_data(self, session: Session, temp_storage):
                 file_type=FileType.DOCUMENT
             )
             create_file(session, file_create, storage_root=temp_storage)
-        
+
         result = list_files(session)
-        
+
         assert result.total_items == 3
         assert result.total_pages == 1
         assert result.current_page == 1
@@ -315,10 +315,10 @@ def test_list_files_for_entity(self, session: Session, temp_storage):
                     entity_id=entity_id
                 )
                 create_file(session, file_create, storage_root=temp_storage)
-        
+
         # List files for PROJ001
         result = list_files_for_entity(session, EntityType.PROJECT, "PROJ001")
-        
+
         assert result.total_items == 2
         assert all(file.entity_id == "PROJ001" for file in result.data)
 
@@ -332,7 +332,7 @@ def test_get_file_count_for_entity(self, session: Session, temp_storage):
                 entity_id="PROJ001"
             )
             create_file(session, file_create, storage_root=temp_storage)
-        
+
         count = get_file_count_for_entity(session, EntityType.PROJECT, "PROJ001")
         assert count == 3
 
@@ -344,12 +344,12 @@ def test_get_file_content(self, session: Session, temp_storage):
             entity_type=EntityType.PROJECT,
             entity_id="PROJ001"
         )
-        
+
         created_file = create_file(session, file_create, content, storage_root=temp_storage)
         retrieved_content = get_file_content(
             session, created_file.file_id, storage_root=temp_storage
         )
-        
+
         assert retrieved_content == content
 
     def test_get_file_content_not_found(self, session: Session, temp_storage):
@@ -359,13 +359,13 @@ def test_get_file_content_not_found(self, session: Session, temp_storage):
             entity_type=EntityType.PROJECT,
             entity_id="PROJ001"
         )
-        
+
         # Create file record without content
         created_file = create_file(session, file_create, storage_root=temp_storage)
-        
+
         with pytest.raises(Exception) as exc_info:
             get_file_content(session, created_file.file_id, storage_root=temp_storage)
-        
+
         assert "not found" in str(exc_info.value)
 
 
@@ -412,7 +412,7 @@ def temp_storage(self):
     def test_complete_file_lifecycle(self, session: Session, temp_storage):
         """Test complete file lifecycle: create, read, update, delete"""
         content = b"Initial content"
-        
+
         # Create file
         file_create = FileCreate(
             filename="lifecycle.txt",
@@ -422,34 +422,34 @@ def test_complete_file_lifecycle(self, session: Session, temp_storage):
             entity_id="PROJ001",
             created_by="testuser"
         )
-        
+
         created_file = create_file(session, file_create, content, storage_root=temp_storage)
         assert created_file.filename == "lifecycle.txt"
         assert created_file.file_size == len(content)
-        
+
         # Read file
         retrieved_file = get_file(session, created_file.file_id)
         assert retrieved_file.id == created_file.id
-        
+
         retrieved_content = get_file_content(
             session, created_file.file_id, storage_root=temp_storage
         )
         assert retrieved_content == content
-        
+
         # Update file metadata
         file_update = FileUpdate(
             description="Updated lifecycle test file",
             is_public=True
         )
-        
+
         updated_file = update_file(session, created_file.file_id, file_update)
         assert updated_file.description == "Updated lifecycle test file"
         assert updated_file.is_public is True
-        
+
         # Delete file
         result = delete_file(session, created_file.file_id, storage_root=temp_storage)
         assert result is True
-        
+
         # Verify deletion
         with pytest.raises(Exception):
             get_file(session, created_file.file_id)
@@ -461,9 +461,9 @@ def test_multiple_entities_file_management(self, session: Session, temp_storage)
             (EntityType.PROJECT, "PROJ002"),
             (EntityType.RUN, "190110_MACHINE123_0001_FLOWCELL123")
         ]
-        
+
         created_files = []
-        
+
         # Create files for different entities
         for entity_type, entity_id in entities:
             for i in range(2):
@@ -473,26 +473,26 @@ def test_multiple_entities_file_management(self, session: Session, temp_storage)
                     entity_id=entity_id,
                     file_type=FileType.DOCUMENT
                 )
-                
+
                 file_record = create_file(session, file_create, storage_root=temp_storage)
                 created_files.append(file_record)
-        
+
         # Verify total count
         all_files = list_files(session)
         assert all_files.total_items == 6
-        
+
         # Verify entity-specific counts
         for entity_type, entity_id in entities:
             entity_files = list_files_for_entity(session, entity_type, entity_id)
             assert entity_files.total_items == 2
-            
+
             count = get_file_count_for_entity(session, entity_type, entity_id)
             assert count == 2
 
     def test_file_type_filtering(self, session: Session, temp_storage):
         """Test filtering files by type"""
         file_types = [FileType.FASTQ, FileType.BAM, FileType.VCF, FileType.DOCUMENT]
-        
+
         # Create files of different types
         for file_type in file_types:
             file_create = FileCreate(
@@ -502,12 +502,12 @@ def test_file_type_filtering(self, session: Session, temp_storage):
                 file_type=file_type
             )
             create_file(session, file_create, storage_root=temp_storage)
-        
+
         # Test filtering by each type
         for file_type in file_types:
             from api.files.models import FileFilters
             filters = FileFilters(file_type=file_type)
             result = list_files(session, filters=filters)
-            
+
             assert result.total_items == 1
-            assert result.data[0].file_type == file_type
\ No newline at end of file
+            assert result.data[0].file_type == file_type

From 6d59ec46aa983d2806fe24fd85b4d3cc50a421a9 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sat, 20 Sep 2025 14:09:27 -0400
Subject: [PATCH 04/35] Add file db support

---
 .../versions/f0aae41a019b_add_file_table.py   |  33 +++
 api/files/models.py                           |  13 +
 api/files/routes.py                           | 277 ++++++++++++++++--
 api/files/services.py                         |  27 ++
 4 files changed, 321 insertions(+), 29 deletions(-)
 create mode 100644 alembic/versions/f0aae41a019b_add_file_table.py

diff --git a/alembic/versions/f0aae41a019b_add_file_table.py b/alembic/versions/f0aae41a019b_add_file_table.py
new file mode 100644
index 0000000..1b97162
--- /dev/null
+++ b/alembic/versions/f0aae41a019b_add_file_table.py
@@ -0,0 +1,33 @@
+"""add_file_table
+
+Revision ID: f0aae41a019b
+Revises: 43c1f122cf7f
+Create Date: 2025-09-20 14:07:19.102747
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'f0aae41a019b'
+down_revision: Union[str, Sequence[str], None] = '43c1f122cf7f'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
diff --git a/api/files/models.py b/api/files/models.py
index 0346e56..468901e 100644
--- a/api/files/models.py
+++ b/api/files/models.py
@@ -164,3 +164,16 @@ class FileFilters(SQLModel):
     search_query: str | None = None  # Search in filename/description
 
     model_config = ConfigDict(extra="forbid")
+
+
+class PaginatedFileResponse(SQLModel):
+    """Paginated response for file listings"""
+    data: list[FilePublic]
+    total_items: int
+    total_pages: int
+    current_page: int
+    per_page: int
+    has_next: bool
+    has_prev: bool
+
+    model_config = ConfigDict(from_attributes=True)
diff --git a/api/files/routes.py b/api/files/routes.py
index 3e88e67..65bf706 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -2,54 +2,273 @@
 Routes/endpoints for the Files API
 """
 
-from typing import Literal
-from fastapi import APIRouter, Query, status
+from typing import Optional
+from fastapi import APIRouter, Query, HTTPException, status, UploadFile, File as FastAPIFile
+from fastapi.responses import StreamingResponse
 from core.deps import SessionDep
-from api.files.models import File, FileCreate, FilePublic, FilesPublic
+from api.files.models import (
+    FileCreate,
+    FileUpdate,
+    FilePublic,
+    PaginatedFileResponse,
+    FileFilters,
+    FileType,
+    EntityType,
+)
 import api.files.services as services
 
-router = APIRouter(prefix="/files", tags=["File Endpoints"])
+router = APIRouter(prefix="/files", tags=["Files"])
 
 
 @router.post(
     "",
     response_model=FilePublic,
-    tags=["File Endpoints"],
     status_code=status.HTTP_201_CREATED,
+    summary="Create a new file record"
 )
-def create_file(session: SessionDep, file_in: FileCreate) -> File:
+def create_file(
+    session: SessionDep,
+    file_in: FileCreate,
+    content: Optional[UploadFile] = FastAPIFile(None)
+) -> FilePublic:
     """
-    Create a new file with optional attributes.
+    Create a new file record with optional file content upload.
+    
+    - **filename**: Name of the file
+    - **description**: Optional description of the file
+    - **file_type**: Type of file (fastq, bam, vcf, etc.)
+    - **entity_type**: Whether this file belongs to a project or run
+    - **entity_id**: ID of the project or run this file belongs to
+    - **is_public**: Whether the file is publicly accessible
+    - **created_by**: User who created the file
     """
-    return services.create_file(session=session, file_in=file_in)
+    file_content = None
+    if content and content.filename:
+        file_content = content.file.read()
+    
+    return services.create_file(session, file_in, file_content)
 
 
-@router.get("", response_model=FilesPublic, tags=["File Endpoints"])
-def get_files(
+@router.get(
+    "",
+    response_model=PaginatedFileResponse,
+    summary="List files with filtering and pagination"
+)
+def list_files(
     session: SessionDep,
-    page: int = Query(1, description="Page number (1-indexed)"),
-    per_page: int = Query(20, description="Number of items per page"),
-    sort_by: str = Query("file_id", description="Field to sort by"),
-    sort_order: Literal["asc", "desc"] = Query(
-        "asc", description="Sort order (asc or desc)"
-    ),
-) -> FilesPublic:
+    page: int = Query(1, ge=1, description="Page number (1-indexed)"),
+    per_page: int = Query(20, ge=1, le=100, description="Number of items per page"),
+    entity_type: Optional[EntityType] = Query(None, description="Filter by entity type"),
+    entity_id: Optional[str] = Query(None, description="Filter by entity ID"),
+    file_type: Optional[FileType] = Query(None, description="Filter by file type"),
+    search: Optional[str] = Query(None, description="Search in filename and description"),
+    is_public: Optional[bool] = Query(None, description="Filter by public/private status"),
+    created_by: Optional[str] = Query(None, description="Filter by creator"),
+) -> PaginatedFileResponse:
     """
-    Returns a paginated list of files.
+    Get a paginated list of files with optional filtering.
+    
+    Supports filtering by:
+    - Entity type and ID (project or run)
+    - File type (fastq, bam, vcf, etc.)
+    - Public/private status
+    - Creator
+    - Text search in filename and description
     """
-    return services.get_files(
-        session=session,
-        page=page,
-        per_page=per_page,
-        sort_by=sort_by,
-        sort_order=sort_order,
+    filters = FileFilters(
+        entity_type=entity_type,
+        entity_id=entity_id,
+        file_type=file_type,
+        search_query=search,
+        is_public=is_public,
+        created_by=created_by,
     )
+    
+    return services.list_files(session, filters, page, per_page)
+
+
+@router.get(
+    "/{file_id}",
+    response_model=FilePublic,
+    summary="Get file by ID"
+)
+def get_file(
+    session: SessionDep,
+    file_id: str
+) -> FilePublic:
+    """
+    Get a single file by its file_id.
+    
+    - **file_id**: The unique file identifier
+    """
+    try:
+        return services.get_file(session, file_id)
+    except Exception:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"File with id {file_id} not found"
+        )
+
+
+@router.put(
+    "/{file_id}",
+    response_model=FilePublic,
+    summary="Update file metadata"
+)
+def update_file(
+    session: SessionDep,
+    file_id: str,
+    file_update: FileUpdate
+) -> FilePublic:
+    """
+    Update file metadata.
+    
+    - **file_id**: The unique file identifier
+    - **file_update**: Fields to update
+    """
+    try:
+        return services.update_file(session, file_id, file_update)
+    except Exception:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"File with id {file_id} not found"
+        )
+
+
+@router.delete(
+    "/{file_id}",
+    status_code=status.HTTP_204_NO_CONTENT,
+    summary="Delete file"
+)
+def delete_file(
+    session: SessionDep,
+    file_id: str
+) -> None:
+    """
+    Delete a file and its content.
+    
+    - **file_id**: The unique file identifier
+    """
+    try:
+        success = services.delete_file(session, file_id)
+        if not success:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"File with id {file_id} not found"
+            )
+    except Exception:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"File with id {file_id} not found"
+        )
 
 
-@router.get("/{file_id}", response_model=FilePublic, tags=["File Endpoints"])
-def get_file_by_file_id(session: SessionDep, file_id: str) -> File:
+@router.get(
+    "/{file_id}/content",
+    summary="Download file content"
+)
+def download_file(
+    session: SessionDep,
+    file_id: str
+):
+    """
+    Download the content of a file.
+    
+    - **file_id**: The unique file identifier
+    """
+    try:
+        # Get file metadata
+        file_record = services.get_file(session, file_id)
+        
+        # Get file content
+        content = services.get_file_content(session, file_id)
+        
+        # Create streaming response
+        def generate():
+            yield content
+        
+        return StreamingResponse(
+            generate(),
+            media_type=file_record.mime_type or "application/octet-stream",
+            headers={
+                "Content-Disposition": f"attachment; filename={file_record.filename}"
+            }
+        )
+    except Exception:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"File with id {file_id} not found or content not available"
+        )
+
+
+@router.post(
+    "/{file_id}/content",
+    response_model=FilePublic,
+    summary="Upload file content"
+)
+def upload_file_content(
+    session: SessionDep,
+    file_id: str,
+    content: UploadFile = FastAPIFile(...)
+) -> FilePublic:
+    """
+    Upload content for an existing file record.
+    
+    - **file_id**: The unique file identifier
+    - **content**: The file content to upload
+    """
+    try:
+        file_content = content.file.read()
+        return services.update_file_content(session, file_id, file_content)
+    except Exception:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"File with id {file_id} not found"
+        )
+
+
+@router.get(
+    "/entity/{entity_type}/{entity_id}",
+    response_model=PaginatedFileResponse,
+    summary="List files for a specific entity"
+)
+def list_files_for_entity(
+    session: SessionDep,
+    entity_type: EntityType,
+    entity_id: str,
+    page: int = Query(1, ge=1, description="Page number (1-indexed)"),
+    per_page: int = Query(20, ge=1, le=100, description="Number of items per page"),
+    file_type: Optional[FileType] = Query(None, description="Filter by file type"),
+) -> PaginatedFileResponse:
+    """
+    Get all files associated with a specific project or run.
+    
+    - **entity_type**: Either "project" or "run"
+    - **entity_id**: The project ID or run barcode
+    """
+    filters = FileFilters(
+        entity_type=entity_type,
+        entity_id=entity_id,
+        file_type=file_type,
+    )
+    
+    return services.list_files_for_entity(session, entity_type, entity_id, page, per_page, filters)
+
+
+@router.get(
+    "/entity/{entity_type}/{entity_id}/count",
+    summary="Get file count for entity"
+)
+def get_file_count_for_entity(
+    session: SessionDep,
+    entity_type: EntityType,
+    entity_id: str
+) -> dict:
     """
-    Returns a single file by its file_id.
-    Note: This is different from its internal "id".
+    Get the total number of files for a specific project or run.
+    
+    - **entity_type**: Either "project" or "run"
+    - **entity_id**: The project ID or run barcode
     """
-    return services.get_file_by_file_id(session=session, file_id=file_id)
+    count = services.get_file_count_for_entity(session, entity_type, entity_id)
+    return {"entity_type": entity_type, "entity_id": entity_id, "file_count": count}
diff --git a/api/files/services.py b/api/files/services.py
index 507dfa7..8232577 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -334,3 +334,30 @@ def get_file_count_for_entity(
     ).one()
 
     return count
+
+
+def update_file_content(
+    session: Session, file_id: str, content: bytes, storage_root: str = "storage"
+) -> File:
+    """Update file content"""
+    # Get the file record
+    file_record = get_file(session, file_id)
+    
+    # Calculate new file metadata
+    file_size = len(content)
+    checksum = calculate_file_checksum(content)
+    
+    # Write content to storage
+    storage_path = Path(storage_root) / file_record.file_path
+    storage_path.parent.mkdir(parents=True, exist_ok=True)
+    storage_path.write_bytes(content)
+    
+    # Update file record
+    file_record.file_size = file_size
+    file_record.checksum = checksum
+    
+    session.add(file_record)
+    session.commit()
+    session.refresh(file_record)
+    
+    return file_record

From 477dd3d291b31d8540a21a50f3edf0ffe6f2382c Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sat, 20 Sep 2025 14:09:39 -0400
Subject: [PATCH 05/35] Add files rout

---
 main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/main.py b/main.py
index cf1f56b..08c38e0 100644
--- a/main.py
+++ b/main.py
@@ -12,6 +12,7 @@
 from api.runs.routes import router as runs_router
 from api.samples.routes import router as samples_router
 from api.search.routes import router as search_router
+from api.files.routes import router as files_router
 
 
 # Customize route id's
@@ -50,6 +51,7 @@ def root():
 app.include_router(runs_router, prefix=API_PREFIX)
 app.include_router(samples_router, prefix=API_PREFIX)
 app.include_router(search_router, prefix=API_PREFIX)
+app.include_router(files_router, prefix=API_PREFIX)
 
 if __name__ == "__main__":
     # For debugging purposes

From 1998cb8c7004b77e278f74456f315e1690624cf7 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sat, 20 Sep 2025 14:10:47 -0400
Subject: [PATCH 06/35] Remove empty alembic revision

---
 .../versions/f0aae41a019b_add_file_table.py   | 33 -------------------
 1 file changed, 33 deletions(-)
 delete mode 100644 alembic/versions/f0aae41a019b_add_file_table.py

diff --git a/alembic/versions/f0aae41a019b_add_file_table.py b/alembic/versions/f0aae41a019b_add_file_table.py
deleted file mode 100644
index 1b97162..0000000
--- a/alembic/versions/f0aae41a019b_add_file_table.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""add_file_table
-
-Revision ID: f0aae41a019b
-Revises: 43c1f122cf7f
-Create Date: 2025-09-20 14:07:19.102747
-
-"""
-from typing import Sequence, Union
-
-from alembic import op
-import sqlalchemy as sa
-import sqlmodel
-
-
-# revision identifiers, used by Alembic.
-revision: str = 'f0aae41a019b'
-down_revision: Union[str, Sequence[str], None] = '43c1f122cf7f'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    """Upgrade schema."""
-    # ### commands auto generated by Alembic - please adjust! ###
-    pass
-    # ### end Alembic commands ###
-
-
-def downgrade() -> None:
-    """Downgrade schema."""
-    # ### commands auto generated by Alembic - please adjust! ###
-    pass
-    # ### end Alembic commands ###

From 0e54889947ce0f230150467b2e9b258b0327a407 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sat, 20 Sep 2025 14:29:46 -0400
Subject: [PATCH 07/35] Properly generate the files table

---
 alembic/env.py                                |  3 ++
 .../versions/95817dee746c_add_files_table.py  | 53 +++++++++++++++++++
 api/files/models.py                           |  2 +-
 3 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 alembic/versions/95817dee746c_add_files_table.py

diff --git a/alembic/env.py b/alembic/env.py
index 0694a5b..f8206c9 100644
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -11,6 +11,8 @@
 from api.samples.models import Sample, SampleAttribute
 from api.project.models import Project
 from api.runs.models import SequencingRun
+from api.files.models import File
+
 
 # Import Base from your SQLModel setup
 #from core.db import Base
@@ -31,6 +33,7 @@
 # target_metadata = None
 target_metadata = SQLModel.metadata
 
+
 # other values from the config, defined by the needs of env.py,
 # can be acquired:
 # my_important_option = config.get_main_option("my_important_option")
diff --git a/alembic/versions/95817dee746c_add_files_table.py b/alembic/versions/95817dee746c_add_files_table.py
new file mode 100644
index 0000000..a267f84
--- /dev/null
+++ b/alembic/versions/95817dee746c_add_files_table.py
@@ -0,0 +1,53 @@
+"""add_files_table
+
+Revision ID: 95817dee746c
+Revises: c955364e5391
+Create Date: 2025-09-20 14:28:45.987919
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel
+
+
+# revision identifiers, used by Alembic.
+revision: str = '95817dee746c'
+down_revision: Union[str, Sequence[str], None] = 'c955364e5391'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('file',
+    sa.Column('id', sa.Uuid(), nullable=False),
+    sa.Column('file_id', sqlmodel.sql.sqltypes.AutoString(length=100), nullable=False),
+    sa.Column('filename', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
+    sa.Column('original_filename', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
+    sa.Column('file_path', sqlmodel.sql.sqltypes.AutoString(length=1024), nullable=False),
+    sa.Column('file_size', sa.Integer(), nullable=True),
+    sa.Column('mime_type', sqlmodel.sql.sqltypes.AutoString(length=100), nullable=True),
+    sa.Column('checksum', sqlmodel.sql.sqltypes.AutoString(length=64), nullable=True),
+    sa.Column('description', sqlmodel.sql.sqltypes.AutoString(length=1024), nullable=True),
+    sa.Column('file_type', sa.Enum('FASTQ', 'BAM', 'VCF', 'SAMPLESHEET', 'METRICS', 'REPORT', 'LOG', 'IMAGE', 'DOCUMENT', 'OTHER', name='filetype'), nullable=False),
+    sa.Column('upload_date', sa.DateTime(), nullable=False),
+    sa.Column('created_by', sqlmodel.sql.sqltypes.AutoString(length=100), nullable=True),
+    sa.Column('entity_type', sa.Enum('PROJECT', 'RUN', name='entitytype'), nullable=False),
+    sa.Column('entity_id', sqlmodel.sql.sqltypes.AutoString(length=100), nullable=False),
+    sa.Column('storage_backend', sa.Enum('LOCAL', 'S3', 'AZURE', 'GCS', name='storagebackend'), nullable=False),
+    sa.Column('is_public', sa.Boolean(), nullable=False),
+    sa.Column('is_archived', sa.Boolean(), nullable=False),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('file_id')
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('file')
+    # ### end Alembic commands ###
diff --git a/api/files/models.py b/api/files/models.py
index 468901e..b2f9646 100644
--- a/api/files/models.py
+++ b/api/files/models.py
@@ -2,12 +2,12 @@
 Models for the Files API
 """
 
+from typing import List
 import uuid
 from datetime import datetime
 from enum import Enum
 from sqlmodel import SQLModel, Field
 from pydantic import ConfigDict
-from typing import List
 
 
 class FileType(str, Enum):

From f1c67f43e125f26598942de3255987d62f27238d Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sat, 20 Sep 2025 14:32:12 -0400
Subject: [PATCH 08/35] Fix revision history

---
 alembic/versions/95817dee746c_add_files_table.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/alembic/versions/95817dee746c_add_files_table.py b/alembic/versions/95817dee746c_add_files_table.py
index a267f84..1c8fa83 100644
--- a/alembic/versions/95817dee746c_add_files_table.py
+++ b/alembic/versions/95817dee746c_add_files_table.py
@@ -14,7 +14,7 @@
 
 # revision identifiers, used by Alembic.
 revision: str = '95817dee746c'
-down_revision: Union[str, Sequence[str], None] = 'c955364e5391'
+down_revision: Union[str, Sequence[str], None] = '43c1f122cf7f'
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 

From 40311a60a9a02176a8f0b09936150cef653bd0ca Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sun, 21 Sep 2025 12:35:02 -0400
Subject: [PATCH 09/35] Fix deprecation warning

---
 api/files/services.py   | 4 ++--
 tests/api/test_files.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/api/files/services.py b/api/files/services.py
index 8232577..c7b2e19 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -33,8 +33,8 @@ def generate_file_path(
     entity_type: EntityType, entity_id: str, file_type: FileType, filename: str
 ) -> str:
     """Generate a structured file path"""
-    from datetime import datetime
-    now = datetime.utcnow()
+    from datetime import datetime, timezone
+    now = datetime.now(timezone.utc)
     year = now.strftime("%Y")
     month = now.strftime("%m")
 
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index 4ad7256..ab642a9 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -5,7 +5,7 @@
 import tempfile
 import shutil
 from pathlib import Path
-from datetime import datetime
+from datetime import datetime, timezone
 
 import pytest
 from fastapi.testclient import TestClient
@@ -133,7 +133,7 @@ def test_generate_file_path(self):
         assert path_parts[5] == "sample.fastq"
 
         # Year and month should be current
-        now = datetime.utcnow()
+        now = datetime.now(timezone.utc)
         assert path_parts[3] == now.strftime("%Y")
         assert path_parts[4] == now.strftime("%m")
 

From 41d871351ec779b72906e2735e724284094d6369 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Sun, 21 Sep 2025 12:39:32 -0400
Subject: [PATCH 10/35] Lint

---
 api/files/routes.py   | 30 +++++++++++++++---------------
 api/files/services.py | 10 +++++-----
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/api/files/routes.py b/api/files/routes.py
index 65bf706..b77fab2 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -33,7 +33,7 @@ def create_file(
 ) -> FilePublic:
     """
     Create a new file record with optional file content upload.
-    
+
     - **filename**: Name of the file
     - **description**: Optional description of the file
     - **file_type**: Type of file (fastq, bam, vcf, etc.)
@@ -45,7 +45,7 @@ def create_file(
     file_content = None
     if content and content.filename:
         file_content = content.file.read()
-    
+
     return services.create_file(session, file_in, file_content)
 
 
@@ -67,7 +67,7 @@ def list_files(
 ) -> PaginatedFileResponse:
     """
     Get a paginated list of files with optional filtering.
-    
+
     Supports filtering by:
     - Entity type and ID (project or run)
     - File type (fastq, bam, vcf, etc.)
@@ -83,7 +83,7 @@ def list_files(
         is_public=is_public,
         created_by=created_by,
     )
-    
+
     return services.list_files(session, filters, page, per_page)
 
 
@@ -98,7 +98,7 @@ def get_file(
 ) -> FilePublic:
     """
     Get a single file by its file_id.
-    
+
     - **file_id**: The unique file identifier
     """
     try:
@@ -122,7 +122,7 @@ def update_file(
 ) -> FilePublic:
     """
     Update file metadata.
-    
+
     - **file_id**: The unique file identifier
     - **file_update**: Fields to update
     """
@@ -146,7 +146,7 @@ def delete_file(
 ) -> None:
     """
     Delete a file and its content.
-    
+
     - **file_id**: The unique file identifier
     """
     try:
@@ -173,20 +173,20 @@ def download_file(
 ):
     """
     Download the content of a file.
-    
+
     - **file_id**: The unique file identifier
     """
     try:
         # Get file metadata
         file_record = services.get_file(session, file_id)
-        
+
         # Get file content
         content = services.get_file_content(session, file_id)
-        
+
         # Create streaming response
         def generate():
             yield content
-        
+
         return StreamingResponse(
             generate(),
             media_type=file_record.mime_type or "application/octet-stream",
@@ -213,7 +213,7 @@ def upload_file_content(
 ) -> FilePublic:
     """
     Upload content for an existing file record.
-    
+
     - **file_id**: The unique file identifier
     - **content**: The file content to upload
     """
@@ -242,7 +242,7 @@ def list_files_for_entity(
 ) -> PaginatedFileResponse:
     """
     Get all files associated with a specific project or run.
-    
+
     - **entity_type**: Either "project" or "run"
     - **entity_id**: The project ID or run barcode
     """
@@ -251,7 +251,7 @@ def list_files_for_entity(
         entity_id=entity_id,
         file_type=file_type,
     )
-    
+
     return services.list_files_for_entity(session, entity_type, entity_id, page, per_page, filters)
 
 
@@ -266,7 +266,7 @@ def get_file_count_for_entity(
 ) -> dict:
     """
     Get the total number of files for a specific project or run.
-    
+
     - **entity_type**: Either "project" or "run"
     - **entity_id**: The project ID or run barcode
     """
diff --git a/api/files/services.py b/api/files/services.py
index c7b2e19..9a4ccf6 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -342,22 +342,22 @@ def update_file_content(
     """Update file content"""
     # Get the file record
     file_record = get_file(session, file_id)
-    
+
     # Calculate new file metadata
     file_size = len(content)
     checksum = calculate_file_checksum(content)
-    
+
     # Write content to storage
     storage_path = Path(storage_root) / file_record.file_path
     storage_path.parent.mkdir(parents=True, exist_ok=True)
     storage_path.write_bytes(content)
-    
+
     # Update file record
     file_record.file_size = file_size
     file_record.checksum = checksum
-    
+
     session.add(file_record)
     session.commit()
     session.refresh(file_record)
-    
+
     return file_record

From 36de0e6cc389eb1448d6e581e539c58c7661347c Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 10:16:39 -0400
Subject: [PATCH 11/35] Implement file api tests

---
 api/files/routes.py     |  29 ++--
 tests/api/test_files.py | 307 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 316 insertions(+), 20 deletions(-)

diff --git a/api/files/routes.py b/api/files/routes.py
index b77fab2..918a6cc 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -3,7 +3,7 @@
 """
 
 from typing import Optional
-from fastapi import APIRouter, Query, HTTPException, status, UploadFile, File as FastAPIFile
+from fastapi import APIRouter, Query, HTTPException, status, UploadFile, File as FastAPIFile, Form
 from fastapi.responses import StreamingResponse
 from core.deps import SessionDep
 from api.files.models import (
@@ -28,7 +28,13 @@
 )
 def create_file(
     session: SessionDep,
-    file_in: FileCreate,
+    filename: str = Form(...),
+    entity_type: EntityType = Form(...),
+    entity_id: str = Form(...),
+    description: Optional[str] = Form(None),
+    file_type: FileType = Form(FileType.OTHER),
+    is_public: bool = Form(False),
+    created_by: Optional[str] = Form(None),
     content: Optional[UploadFile] = FastAPIFile(None)
 ) -> FilePublic:
     """
@@ -42,6 +48,17 @@ def create_file(
     - **is_public**: Whether the file is publicly accessible
     - **created_by**: User who created the file
     """
+    # Create FileCreate object from form data
+    file_in = FileCreate(
+        filename=filename,
+        description=description,
+        file_type=file_type,
+        entity_type=entity_type,
+        entity_id=entity_id,
+        is_public=is_public,
+        created_by=created_by
+    )
+    
     file_content = None
     if content and content.filename:
         file_content = content.file.read()
@@ -246,13 +263,7 @@ def list_files_for_entity(
     - **entity_type**: Either "project" or "run"
     - **entity_id**: The project ID or run barcode
     """
-    filters = FileFilters(
-        entity_type=entity_type,
-        entity_id=entity_id,
-        file_type=file_type,
-    )
-
-    return services.list_files_for_entity(session, entity_type, entity_id, page, per_page, filters)
+    return services.list_files_for_entity(session, entity_type, entity_id, page, per_page, file_type)
 
 
 @router.get(
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index ab642a9..d2e6116 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -374,29 +374,314 @@ class TestFileAPI:
 
     def test_create_file_endpoint(self, client: TestClient, session: Session):
         """Test file creation endpoint"""
-        # This would test the actual API endpoint once routes are implemented
-        # For now, we'll skip this as routes.py needs to be updated
-        pass
+        file_data = {
+            "filename": "test_api.txt",
+            "description": "Test file via API",
+            "file_type": "document",
+            "entity_type": "project",
+            "entity_id": "PROJ001",
+            "is_public": "true",  # Form data sends as string
+            "created_by": "api_test_user"
+        }
+
+        response = client.post("/api/v1/files", data=file_data)
+        assert response.status_code == 201
+        
+        data = response.json()
+        assert data["filename"] == "test_api.txt"
+        assert data["description"] == "Test file via API"
+        assert data["file_type"] == "document"
+        assert data["entity_type"] == "project"
+        assert data["entity_id"] == "PROJ001"
+        assert data["is_public"] is True
+        assert data["created_by"] == "api_test_user"
+        assert "file_id" in data
+        assert "upload_date" in data
 
     def test_get_files_endpoint(self, client: TestClient, session: Session):
         """Test file listing endpoint"""
-        # This would test the actual API endpoint once routes are implemented
-        pass
+        # First create some test files
+        for i in range(3):
+            file_data = {
+                "filename": f"test_list_{i}.txt",
+                "description": f"Test file {i}",
+                "file_type": "document",
+                "entity_type": "project",
+                "entity_id": "PROJ001",
+                "created_by": "list_test_user"
+            }
+            client.post("/api/v1/files", data=file_data)
+
+        # Test basic listing
+        response = client.get("/api/v1/files")
+        assert response.status_code == 200
+        
+        data = response.json()
+        assert "data" in data
+        assert "total_items" in data
+        assert "current_page" in data
+        assert "per_page" in data
+        assert data["total_items"] >= 3
+        assert len(data["data"]) >= 3
+
+        # Test pagination
+        response = client.get("/api/v1/files?page=1&per_page=2")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["current_page"] == 1
+        assert data["per_page"] == 2
+        assert len(data["data"]) <= 2
+
+        # Test filtering by entity
+        response = client.get("/api/v1/files?entity_type=project&entity_id=PROJ001")
+        assert response.status_code == 200
+        data = response.json()
+        for item in data["data"]:
+            assert item["entity_type"] == "project"
+            assert item["entity_id"] == "PROJ001"
+
+        # Test filtering by file type
+        response = client.get("/api/v1/files?file_type=document")
+        assert response.status_code == 200
+        data = response.json()
+        for item in data["data"]:
+            assert item["file_type"] == "document"
+
+        # Test search functionality
+        response = client.get("/api/v1/files?search=test_list_1")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total_items"] >= 1
 
     def test_get_file_endpoint(self, client: TestClient, session: Session):
         """Test file retrieval endpoint"""
-        # This would test the actual API endpoint once routes are implemented
-        pass
+        # Create a test file
+        file_data = {
+            "filename": "test_get.txt",
+            "description": "Test file for GET",
+            "file_type": "document",
+            "entity_type": "project",
+            "entity_id": "PROJ001",
+            "created_by": "get_test_user"
+        }
+        
+        create_response = client.post("/api/v1/files", data=file_data)
+        assert create_response.status_code == 201
+        created_file = create_response.json()
+        file_id = created_file["file_id"]
+
+        # Test successful retrieval
+        response = client.get(f"/api/v1/files/{file_id}")
+        assert response.status_code == 200
+        
+        data = response.json()
+        assert data["file_id"] == file_id
+        assert data["filename"] == "test_get.txt"
+        assert data["description"] == "Test file for GET"
+
+        # Test non-existent file
+        response = client.get("/api/v1/files/non-existent-id")
+        assert response.status_code == 404
 
     def test_update_file_endpoint(self, client: TestClient, session: Session):
         """Test file update endpoint"""
-        # This would test the actual API endpoint once routes are implemented
-        pass
+        # Create a test file
+        file_data = {
+            "filename": "test_update.txt",
+            "description": "Original description",
+            "file_type": "document",
+            "entity_type": "project",
+            "entity_id": "PROJ001",
+            "created_by": "update_test_user"
+        }
+        
+        create_response = client.post("/api/v1/files", data=file_data)
+        assert create_response.status_code == 201
+        created_file = create_response.json()
+        file_id = created_file["file_id"]
+
+        # Test successful update
+        update_data = {
+            "description": "Updated description",
+            "is_public": True
+        }
+        
+        response = client.put(f"/api/v1/files/{file_id}", json=update_data)
+        assert response.status_code == 200
+        
+        data = response.json()
+        assert data["file_id"] == file_id
+        assert data["description"] == "Updated description"
+        assert data["is_public"] is True
+        assert data["filename"] == "test_update.txt"  # Should remain unchanged
+
+        # Test non-existent file update
+        response = client.put("/api/v1/files/non-existent-id", json=update_data)
+        assert response.status_code == 404
 
     def test_delete_file_endpoint(self, client: TestClient, session: Session):
         """Test file deletion endpoint"""
-        # This would test the actual API endpoint once routes are implemented
-        pass
+        # Create a test file
+        file_data = {
+            "filename": "test_delete.txt",
+            "description": "Test file for deletion",
+            "file_type": "document",
+            "entity_type": "project",
+            "entity_id": "PROJ001",
+            "created_by": "delete_test_user"
+        }
+        
+        create_response = client.post("/api/v1/files", data=file_data)
+        assert create_response.status_code == 201
+        created_file = create_response.json()
+        file_id = created_file["file_id"]
+
+        # Test successful deletion
+        response = client.delete(f"/api/v1/files/{file_id}")
+        assert response.status_code == 204
+
+        # Verify file is deleted by trying to get it
+        get_response = client.get(f"/api/v1/files/{file_id}")
+        assert get_response.status_code == 404
+
+        # Test non-existent file deletion
+        response = client.delete("/api/v1/files/non-existent-id")
+        assert response.status_code == 404
+
+    def test_list_files_for_entity_endpoint(self, client: TestClient, session: Session):
+        """Test listing files for a specific entity"""
+        # Create files for different entities
+        entities = [
+            ("project", "PROJ001"),
+            ("project", "PROJ002"),
+            ("run", "190110_MACHINE123_0001_FLOWCELL123")
+        ]
+        
+        for entity_type, entity_id in entities:
+            for i in range(2):
+                file_data = {
+                    "filename": f"entity_test_{i}.txt",
+                    "description": f"Test file {i} for {entity_type} {entity_id}",
+                    "file_type": "document",
+                    "entity_type": entity_type,
+                    "entity_id": entity_id,
+                    "created_by": "entity_test_user"
+                }
+                client.post("/api/v1/files", data=file_data)
+
+        # Test listing files for specific project
+        response = client.get("/api/v1/files/entity/project/PROJ001")
+        assert response.status_code == 200
+        
+        data = response.json()
+        assert data["total_items"] == 2
+        for item in data["data"]:
+            assert item["entity_type"] == "project"
+            assert item["entity_id"] == "PROJ001"
+
+        # Test listing files for specific run
+        response = client.get("/api/v1/files/entity/run/190110_MACHINE123_0001_FLOWCELL123")
+        assert response.status_code == 200
+        
+        data = response.json()
+        assert data["total_items"] == 2
+        for item in data["data"]:
+            assert item["entity_type"] == "run"
+            assert item["entity_id"] == "190110_MACHINE123_0001_FLOWCELL123"
+
+        # Test pagination for entity files
+        response = client.get("/api/v1/files/entity/project/PROJ001?page=1&per_page=1")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["current_page"] == 1
+        assert data["per_page"] == 1
+        assert len(data["data"]) == 1
+
+    def test_get_file_count_for_entity_endpoint(self, client: TestClient, session: Session):
+        """Test getting file count for a specific entity"""
+        # Create files for a specific entity
+        entity_type = "project"
+        entity_id = "PROJ_COUNT_TEST"
+        
+        for i in range(5):
+            file_data = {
+                "filename": f"count_test_{i}.txt",
+                "description": f"Count test file {i}",
+                "file_type": "document",
+                "entity_type": entity_type,
+                "entity_id": entity_id,
+                "created_by": "count_test_user"
+            }
+            client.post("/api/v1/files", data=file_data)
+
+        # Test file count endpoint
+        response = client.get(f"/api/v1/files/entity/{entity_type}/{entity_id}/count")
+        assert response.status_code == 200
+        
+        data = response.json()
+        assert data["entity_type"] == entity_type
+        assert data["entity_id"] == entity_id
+        assert data["file_count"] == 5
+
+        # Test count for entity with no files
+        response = client.get("/api/v1/files/entity/project/EMPTY_PROJECT/count")
+        assert response.status_code == 200
+        
+        data = response.json()
+        assert data["entity_type"] == "project"
+        assert data["entity_id"] == "EMPTY_PROJECT"
+        assert data["file_count"] == 0
+
+    def test_create_file_with_content_endpoint(self, client: TestClient, session: Session):
+        """Test file creation with content upload"""
+        import io
+        
+        # Create file data
+        file_data = {
+            "filename": "test_with_content.txt",
+            "description": "Test file with content",
+            "file_type": "document",
+            "entity_type": "project",
+            "entity_id": "PROJ001",
+            "created_by": "content_test_user"
+        }
+        
+        # Create file content
+        file_content = b"Hello, this is test content!"
+        files = {"content": ("test_content.txt", io.BytesIO(file_content), "text/plain")}
+        
+        # Send multipart form data
+        response = client.post("/api/v1/files", data=file_data, files=files)
+        assert response.status_code == 201
+        
+        data = response.json()
+        assert data["filename"] == "test_with_content.txt"
+        assert data["file_size"] == len(file_content)
+        assert data["mime_type"] == "text/plain"
+
+    def test_error_handling(self, client: TestClient, session: Session):
+        """Test API error handling"""
+        # Test invalid file type
+        invalid_file_data = {
+            "filename": "test.txt",
+            "file_type": "invalid_type",
+            "entity_type": "project",
+            "entity_id": "PROJ001"
+        }
+        
+        response = client.post("/api/v1/files", data=invalid_file_data)
+        assert response.status_code == 422  # Validation error
+
+        # Test invalid entity type
+        invalid_entity_data = {
+            "filename": "test.txt",
+            "file_type": "document",
+            "entity_type": "invalid_entity",
+            "entity_id": "PROJ001"
+        }
+        
+        response = client.post("/api/v1/files", data=invalid_entity_data)
+        assert response.status_code == 422  # Validation error
 
 
 class TestFileIntegration:

From c632dbc9b24381631f15f48e082a543e825e01d5 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 10:36:26 -0400
Subject: [PATCH 12/35] Lint

---
 api/files/routes.py     |  2 +-
 tests/api/test_files.py | 40 ++++++++++++++++++++--------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/api/files/routes.py b/api/files/routes.py
index 918a6cc..50c575f 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -58,7 +58,7 @@ def create_file(
         is_public=is_public,
         created_by=created_by
     )
-    
+
     file_content = None
     if content and content.filename:
         file_content = content.file.read()
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index d2e6116..321cb78 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -386,7 +386,7 @@ def test_create_file_endpoint(self, client: TestClient, session: Session):
 
         response = client.post("/api/v1/files", data=file_data)
         assert response.status_code == 201
-        
+
         data = response.json()
         assert data["filename"] == "test_api.txt"
         assert data["description"] == "Test file via API"
@@ -415,7 +415,7 @@ def test_get_files_endpoint(self, client: TestClient, session: Session):
         # Test basic listing
         response = client.get("/api/v1/files")
         assert response.status_code == 200
-        
+
         data = response.json()
         assert "data" in data
         assert "total_items" in data
@@ -464,7 +464,7 @@ def test_get_file_endpoint(self, client: TestClient, session: Session):
             "entity_id": "PROJ001",
             "created_by": "get_test_user"
         }
-        
+
         create_response = client.post("/api/v1/files", data=file_data)
         assert create_response.status_code == 201
         created_file = create_response.json()
@@ -473,7 +473,7 @@ def test_get_file_endpoint(self, client: TestClient, session: Session):
         # Test successful retrieval
         response = client.get(f"/api/v1/files/{file_id}")
         assert response.status_code == 200
-        
+
         data = response.json()
         assert data["file_id"] == file_id
         assert data["filename"] == "test_get.txt"
@@ -494,7 +494,7 @@ def test_update_file_endpoint(self, client: TestClient, session: Session):
             "entity_id": "PROJ001",
             "created_by": "update_test_user"
         }
-        
+
         create_response = client.post("/api/v1/files", data=file_data)
         assert create_response.status_code == 201
         created_file = create_response.json()
@@ -505,10 +505,10 @@ def test_update_file_endpoint(self, client: TestClient, session: Session):
             "description": "Updated description",
             "is_public": True
         }
-        
+
         response = client.put(f"/api/v1/files/{file_id}", json=update_data)
         assert response.status_code == 200
-        
+
         data = response.json()
         assert data["file_id"] == file_id
         assert data["description"] == "Updated description"
@@ -530,7 +530,7 @@ def test_delete_file_endpoint(self, client: TestClient, session: Session):
             "entity_id": "PROJ001",
             "created_by": "delete_test_user"
         }
-        
+
         create_response = client.post("/api/v1/files", data=file_data)
         assert create_response.status_code == 201
         created_file = create_response.json()
@@ -556,7 +556,7 @@ def test_list_files_for_entity_endpoint(self, client: TestClient, session: Sessi
             ("project", "PROJ002"),
             ("run", "190110_MACHINE123_0001_FLOWCELL123")
         ]
-        
+
         for entity_type, entity_id in entities:
             for i in range(2):
                 file_data = {
@@ -572,7 +572,7 @@ def test_list_files_for_entity_endpoint(self, client: TestClient, session: Sessi
         # Test listing files for specific project
         response = client.get("/api/v1/files/entity/project/PROJ001")
         assert response.status_code == 200
-        
+
         data = response.json()
         assert data["total_items"] == 2
         for item in data["data"]:
@@ -582,7 +582,7 @@ def test_list_files_for_entity_endpoint(self, client: TestClient, session: Sessi
         # Test listing files for specific run
         response = client.get("/api/v1/files/entity/run/190110_MACHINE123_0001_FLOWCELL123")
         assert response.status_code == 200
-        
+
         data = response.json()
         assert data["total_items"] == 2
         for item in data["data"]:
@@ -602,7 +602,7 @@ def test_get_file_count_for_entity_endpoint(self, client: TestClient, session: S
         # Create files for a specific entity
         entity_type = "project"
         entity_id = "PROJ_COUNT_TEST"
-        
+
         for i in range(5):
             file_data = {
                 "filename": f"count_test_{i}.txt",
@@ -617,7 +617,7 @@ def test_get_file_count_for_entity_endpoint(self, client: TestClient, session: S
         # Test file count endpoint
         response = client.get(f"/api/v1/files/entity/{entity_type}/{entity_id}/count")
         assert response.status_code == 200
-        
+
         data = response.json()
         assert data["entity_type"] == entity_type
         assert data["entity_id"] == entity_id
@@ -626,7 +626,7 @@ def test_get_file_count_for_entity_endpoint(self, client: TestClient, session: S
         # Test count for entity with no files
         response = client.get("/api/v1/files/entity/project/EMPTY_PROJECT/count")
         assert response.status_code == 200
-        
+
         data = response.json()
         assert data["entity_type"] == "project"
         assert data["entity_id"] == "EMPTY_PROJECT"
@@ -635,7 +635,7 @@ def test_get_file_count_for_entity_endpoint(self, client: TestClient, session: S
     def test_create_file_with_content_endpoint(self, client: TestClient, session: Session):
         """Test file creation with content upload"""
         import io
-        
+
         # Create file data
         file_data = {
             "filename": "test_with_content.txt",
@@ -645,15 +645,15 @@ def test_create_file_with_content_endpoint(self, client: TestClient, session: Se
             "entity_id": "PROJ001",
             "created_by": "content_test_user"
         }
-        
+
         # Create file content
         file_content = b"Hello, this is test content!"
         files = {"content": ("test_content.txt", io.BytesIO(file_content), "text/plain")}
-        
+
         # Send multipart form data
         response = client.post("/api/v1/files", data=file_data, files=files)
         assert response.status_code == 201
-        
+
         data = response.json()
         assert data["filename"] == "test_with_content.txt"
         assert data["file_size"] == len(file_content)
@@ -668,7 +668,7 @@ def test_error_handling(self, client: TestClient, session: Session):
             "entity_type": "project",
             "entity_id": "PROJ001"
         }
-        
+
         response = client.post("/api/v1/files", data=invalid_file_data)
         assert response.status_code == 422  # Validation error
 
@@ -679,7 +679,7 @@ def test_error_handling(self, client: TestClient, session: Session):
             "entity_type": "invalid_entity",
             "entity_id": "PROJ001"
         }
-        
+
         response = client.post("/api/v1/files", data=invalid_entity_data)
         assert response.status_code == 422  # Validation error
 

From f9995a0564dbb5e2c9858c98621cc6c7bca3dcd5 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 10:38:09 -0400
Subject: [PATCH 13/35] Add file registration scripts

---
 scripts/README_S3_Registration.md          | 193 ++++++++++
 scripts/register_s3_files.py               | 402 +++++++++++++++++++++
 scripts/s3_registration_config.sample.json |  20 +
 3 files changed, 615 insertions(+)
 create mode 100644 scripts/README_S3_Registration.md
 create mode 100644 scripts/register_s3_files.py
 create mode 100644 scripts/s3_registration_config.sample.json

diff --git a/scripts/README_S3_Registration.md b/scripts/README_S3_Registration.md
new file mode 100644
index 0000000..0c25204
--- /dev/null
+++ b/scripts/README_S3_Registration.md
@@ -0,0 +1,193 @@
+# S3 Bulk File Registration
+
+This script allows you to register existing S3 files in your database without moving them. Files remain in S3 but become discoverable through the Files API.
+
+## Prerequisites
+
+1. **AWS Credentials**: Configure AWS credentials using one of these methods:
+   - AWS CLI: `aws configure`
+   - Environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`
+   - IAM roles (if running on EC2)
+   - AWS credentials file
+
+2. **Required Permissions**: Your AWS credentials need the following S3 permissions:
+   - `s3:ListBucket` - to list objects in buckets
+   - `s3:GetObject` - to read object metadata
+   - `s3:HeadBucket` - to verify bucket access
+
+3. **Python Dependencies**: Install boto3 if not already installed:
+   ```bash
+   pip install boto3
+   ```
+
+## Usage Examples
+
+### 1. Single Bucket Registration
+
+Register all files from a specific bucket:
+```bash
+python scripts/register_s3_files.py --bucket my-ngs-data-bucket
+```
+
+Register files with a specific prefix:
+```bash
+python scripts/register_s3_files.py --bucket my-bucket --prefix projects/
+```
+
+### 2. Dry Run Mode
+
+See what would be registered without making changes:
+```bash
+python scripts/register_s3_files.py --bucket my-bucket --dry-run
+```
+
+### 3. Configuration File Mode
+
+For multiple buckets or complex setups, use a configuration file:
+
+1. Create a configuration file (copy from `s3_registration_config.sample.json`):
+   ```bash
+   cp scripts/s3_registration_config.sample.json my_s3_config.json
+   ```
+
+2. Edit the configuration file with your bucket details:
+   ```json
+   {
+     "buckets": [
+       {
+         "name": "my-ngs-data-bucket",
+         "prefix": "projects/",
+         "entity_patterns": {
+           "project": "projects/([^/]+)",
+           "run": "runs/([^/]+)"
+         }
+       }
+     ],
+     "dry_run": false
+   }
+   ```
+
+3. Run with the configuration:
+   ```bash
+   python scripts/register_s3_files.py --config my_s3_config.json
+   ```
+
+### 4. Generate Sample Configuration
+
+Create a sample configuration file:
+```bash
+python scripts/register_s3_files.py --create-config
+```
+
+## Configuration File Format
+
+```json
+{
+  "buckets": [
+    {
+      "name": "bucket-name",
+      "prefix": "optional/prefix/",
+      "entity_patterns": {
+        "project": "regex_pattern_to_extract_project_id",
+        "run": "regex_pattern_to_extract_run_id"
+      }
+    }
+  ],
+  "dry_run": true
+}
+```
+
+### Entity Patterns
+
+Entity patterns are regex patterns used to extract project or run IDs from S3 object keys:
+
+- `"project": "projects/([^/]+)"` - Extracts project ID from paths like `projects/PROJ001/file.fastq`
+- `"run": "runs/([^/]+)"` - Extracts run ID from paths like `runs/RUN123/data.bam`
+
+The captured group `([^/]+)` becomes the entity ID.
+
+## File Type Detection
+
+The script automatically detects file types based on extensions:
+
+| Extension | File Type |
+|-----------|-----------|
+| `.fastq`, `.fastq.gz`, `.fq`, `.fq.gz` | FASTQ |
+| `.bam`, `.sam` | BAM |
+| `.vcf`, `.vcf.gz` | VCF |
+| `.csv`, `.xlsx` | SAMPLESHEET |
+| `.json` | METRICS |
+| `.html`, `.pdf` | REPORT |
+| `.log`, `.txt` | LOG |
+| `.png`, `.jpg`, `.jpeg`, `.svg` | IMAGE |
+| `.doc`, `.docx`, `.md` | DOCUMENT |
+| Others | OTHER |
+
+## What Gets Registered
+
+For each S3 object, the script creates a database record with:
+
+- **File metadata**: filename, size, MIME type, upload date
+- **S3 location**: `file_path` set to `s3://bucket/key`
+- **Storage backend**: Set to `StorageBackend.S3`
+- **Entity association**: Project or run ID extracted from path
+- **File type**: Auto-detected from extension
+- **Default settings**: `is_public=False`, `created_by="s3_bulk_import"`
+
+## Error Handling
+
+The script handles common issues:
+
+- **Duplicate files**: Skips files already registered (based on S3 URI)
+- **Access errors**: Reports permission or bucket access issues
+- **Invalid patterns**: Falls back to default entity extraction
+- **Database errors**: Rolls back failed registrations
+
+## Monitoring Progress
+
+The script provides progress updates every 100 files and a final summary:
+
+```
+Progress: 500 scanned, 450 registered, 30 skipped, 20 errors
+==========================================
+REGISTRATION SUMMARY
+==========================================
+Files scanned:    1000
+Files registered: 850
+Files skipped:    100  (already registered)
+Errors:           50   (permission/validation issues)
+```
+
+## Best Practices
+
+1. **Start with dry run**: Always test with `--dry-run` first
+2. **Use specific prefixes**: Limit scope with `--prefix` to avoid scanning entire buckets
+3. **Monitor logs**: Check for permission or pattern matching issues
+4. **Backup database**: Consider backing up before large imports
+5. **Run incrementally**: Process buckets/prefixes separately for better control
+
+## Troubleshooting
+
+### Common Issues
+
+**"AWS credentials not found"**
+- Configure AWS credentials using `aws configure` or environment variables
+
+**"Access denied to bucket"**
+- Verify your AWS credentials have the required S3 permissions
+- Check bucket policies and IAM roles
+
+**"No files registered"**
+- Verify bucket name and prefix are correct
+- Check entity patterns match your S3 structure
+- Use `--dry-run` to see what would be processed
+
+**"Database connection failed"**
+- Ensure your database is running and accessible
+- Check `SQLALCHEMY_DATABASE_URI` environment variable
+
+### Getting Help
+
+Run the script with `--help` for detailed usage information:
+```bash
+python scripts/register_s3_files.py --help
\ No newline at end of file
diff --git a/scripts/register_s3_files.py b/scripts/register_s3_files.py
new file mode 100644
index 0000000..d07db6f
--- /dev/null
+++ b/scripts/register_s3_files.py
@@ -0,0 +1,402 @@
+#!/usr/bin/env python
+"""
+Bulk registration script for existing S3 files.
+
+This script scans S3 buckets and registers existing files in the database
+without moving them. Files remain in S3 but become discoverable through the API.
+
+Usage:
+    python scripts/register_s3_files.py --bucket my-bucket --prefix data/
+    python scripts/register_s3_files.py --config s3_config.json
+    python scripts/register_s3_files.py --bucket my-bucket --dry-run
+"""
+
+import argparse
+import json
+import mimetypes
+import re
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, Tuple
+
+import boto3
+from botocore.exceptions import ClientError, NoCredentialsError
+from sqlmodel import Session, select
+
+# Add project root to path for imports
+sys.path.append(str(Path(__file__).parent.parent))
+
+from api.files.models import FileCreate, FileType, EntityType, StorageBackend
+from api.files.services import create_file
+from core.db import get_session
+from core.logger import logger
+
+
+class S3FileRegistrar:
+    """Handles bulk registration of S3 files to the database."""
+    
+    def __init__(self, session: Session, dry_run: bool = False):
+        self.session = session
+        self.dry_run = dry_run
+        self.stats = {
+            'scanned': 0,
+            'registered': 0,
+            'skipped': 0,
+            'errors': 0
+        }
+        
+        # File type mapping based on extensions
+        self.file_type_mapping = {
+            '.fastq': FileType.FASTQ,
+            '.fastq.gz': FileType.FASTQ,
+            '.fq': FileType.FASTQ,
+            '.fq.gz': FileType.FASTQ,
+            '.bam': FileType.BAM,
+            '.sam': FileType.BAM,
+            '.vcf': FileType.VCF,
+            '.vcf.gz': FileType.VCF,
+            '.csv': FileType.SAMPLESHEET,
+            '.xlsx': FileType.SAMPLESHEET,
+            '.json': FileType.METRICS,
+            '.html': FileType.REPORT,
+            '.pdf': FileType.REPORT,
+            '.log': FileType.LOG,
+            '.txt': FileType.LOG,
+            '.png': FileType.IMAGE,
+            '.jpg': FileType.IMAGE,
+            '.jpeg': FileType.IMAGE,
+            '.svg': FileType.IMAGE,
+            '.doc': FileType.DOCUMENT,
+            '.docx': FileType.DOCUMENT,
+            '.md': FileType.DOCUMENT,
+        }
+    
+    def get_file_type(self, filename: str) -> FileType:
+        """Determine file type based on filename extension."""
+        filename_lower = filename.lower()
+        
+        # Check for compound extensions first (e.g., .fastq.gz)
+        for ext, file_type in self.file_type_mapping.items():
+            if filename_lower.endswith(ext):
+                return file_type
+        
+        return FileType.OTHER
+    
+    def extract_entity_info(self, s3_key: str, patterns: Dict[str, str]) -> Tuple[EntityType, str]:
+        """
+        Extract entity type and ID from S3 key using regex patterns.
+        
+        Args:
+            s3_key: S3 object key
+            patterns: Dict of entity_type -> regex pattern
+            
+        Returns:
+            Tuple of (EntityType, entity_id)
+        """
+        for entity_type_str, pattern in patterns.items():
+            match = re.search(pattern, s3_key)
+            if match:
+                entity_id = match.group(1)
+                entity_type = EntityType.PROJECT if entity_type_str == 'project' else EntityType.RUN
+                return entity_type, entity_id
+        
+        # Default fallback - extract from path structure
+        path_parts = s3_key.split('/')
+        if len(path_parts) >= 2:
+            # Assume format like: projects/PROJ001/... or runs/RUN001/...
+            if path_parts[0].lower() in ['projects', 'project']:
+                return EntityType.PROJECT, path_parts[1]
+            elif path_parts[0].lower() in ['runs', 'run', 'sequencing_runs']:
+                return EntityType.RUN, path_parts[1]
+        
+        # Final fallback - use first directory as project
+        return EntityType.PROJECT, path_parts[0] if path_parts else 'unknown'
+    
+    def file_already_registered(self, s3_uri: str) -> bool:
+        """Check if file is already registered in database."""
+        from api.files.models import File
+        
+        existing = self.session.exec(
+            select(File).where(File.file_path == s3_uri)
+        ).first()
+        
+        return existing is not None
+    
+    def register_s3_object(self, bucket: str, obj: dict, entity_patterns: Dict[str, str]) -> bool:
+        """
+        Register a single S3 object in the database.
+        
+        Args:
+            bucket: S3 bucket name
+            obj: S3 object metadata from boto3
+            entity_patterns: Regex patterns for extracting entity info
+            
+        Returns:
+            True if registered successfully, False otherwise
+        """
+        s3_key = obj['Key']
+        filename = Path(s3_key).name
+        s3_uri = f"s3://{bucket}/{s3_key}"
+        
+        # Skip directories
+        if s3_key.endswith('/'):
+            return False
+        
+        # Skip if already registered
+        if self.file_already_registered(s3_uri):
+            logger.debug(f"File already registered: {s3_uri}")
+            self.stats['skipped'] += 1
+            return False
+        
+        # Extract entity information
+        entity_type, entity_id = self.extract_entity_info(s3_key, entity_patterns)
+        
+        # Determine file type
+        file_type = self.get_file_type(filename)
+        
+        # Get MIME type
+        mime_type, _ = mimetypes.guess_type(filename)
+        
+        # Create file record
+        file_create = FileCreate(
+            filename=filename,
+            original_filename=filename,
+            description=f"Imported from S3: {s3_key}",
+            file_type=file_type,
+            entity_type=entity_type,
+            entity_id=entity_id,
+            is_public=False,  # Default to private
+            created_by="s3_bulk_import"
+        )
+        
+        if self.dry_run:
+            logger.info(f"[DRY RUN] Would register: {s3_uri} -> {entity_type.value}/{entity_id}")
+            self.stats['registered'] += 1
+            return True
+        
+        try:
+            # Register file in database
+            file_record = create_file(
+                session=self.session,
+                file_create=file_create,
+                file_content=None,  # No content upload, file stays in S3
+                storage_root="storage"  # Use default storage settings
+            )
+            
+            # Update the file record with S3-specific information
+            file_record.file_path = s3_uri
+            file_record.storage_backend = StorageBackend.S3
+            file_record.file_size = obj.get('Size', 0)
+            file_record.mime_type = mime_type
+            file_record.upload_date = obj.get('LastModified', datetime.now(timezone.utc))
+            
+            self.session.add(file_record)
+            self.session.commit()
+            
+            logger.info(f"Registered: {s3_uri} -> {file_record.file_id}")
+            self.stats['registered'] += 1
+            return True
+            
+        except Exception as e:
+            logger.error(f"Failed to register {s3_uri}: {e}")
+            self.stats['errors'] += 1
+            self.session.rollback()
+            return False
+    
+    def scan_bucket(self, bucket: str, prefix: str = "",
+                    entity_patterns: Dict[str, str] = None) -> None:
+        """
+        Scan S3 bucket and register files.
+        
+        Args:
+            bucket: S3 bucket name
+            prefix: S3 key prefix to filter objects
+            entity_patterns: Regex patterns for extracting entity info
+        """
+        if entity_patterns is None:
+            entity_patterns = {
+                'project': r'(?:projects?|proj)/([^/]+)',
+                'run': r'(?:runs?|sequencing_runs?)/([^/]+)'
+            }
+        
+        try:
+            s3_client = boto3.client('s3')
+            
+            # Test bucket access
+            try:
+                s3_client.head_bucket(Bucket=bucket)
+            except ClientError as e:
+                error_code = e.response['Error']['Code']
+                if error_code == '404':
+                    logger.error(f"Bucket '{bucket}' does not exist")
+                elif error_code == '403':
+                    logger.error(f"Access denied to bucket '{bucket}'")
+                else:
+                    logger.error(f"Error accessing bucket '{bucket}': {e}")
+                return
+            
+            logger.info(f"Scanning S3 bucket: s3://{bucket}/{prefix}")
+            
+            # List objects with pagination
+            paginator = s3_client.get_paginator('list_objects_v2')
+            page_iterator = paginator.paginate(
+                Bucket=bucket,
+                Prefix=prefix
+            )
+            
+            for page in page_iterator:
+                if 'Contents' not in page:
+                    continue
+                
+                for obj in page['Contents']:
+                    self.stats['scanned'] += 1
+                    self.register_s3_object(bucket, obj, entity_patterns)
+                    
+                    # Progress logging
+                    if self.stats['scanned'] % 100 == 0:
+                        logger.info(
+                            f"Progress: {self.stats['scanned']} scanned, "
+                            f"{self.stats['registered']} registered, "
+                            f"{self.stats['skipped']} skipped, "
+                            f"{self.stats['errors']} errors"
+                        )
+            
+        except NoCredentialsError:
+            logger.error("AWS credentials not found. Please configure AWS credentials.")
+        except Exception as e:
+            logger.error(f"Error scanning bucket: {e}")
+    
+    def print_summary(self):
+        """Print registration summary."""
+        logger.info("=" * 50)
+        logger.info("REGISTRATION SUMMARY")
+        logger.info("=" * 50)
+        logger.info(f"Files scanned:    {self.stats['scanned']}")
+        logger.info(f"Files registered: {self.stats['registered']}")
+        logger.info(f"Files skipped:    {self.stats['skipped']}")
+        logger.info(f"Errors:           {self.stats['errors']}")
+        
+        if self.dry_run:
+            logger.info("\n*** DRY RUN MODE - No files were actually registered ***")
+
+
+def load_config(config_file: str) -> dict:
+    """Load configuration from JSON file."""
+    try:
+        with open(config_file, 'r') as f:
+            return json.load(f)
+    except FileNotFoundError:
+        logger.error(f"Configuration file not found: {config_file}")
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        logger.error(f"Invalid JSON in configuration file: {e}")
+        sys.exit(1)
+
+
+def create_sample_config():
+    """Create a sample configuration file."""
+    config = {
+        "buckets": [
+            {
+                "name": "my-ngs-data-bucket",
+                "prefix": "projects/",
+                "entity_patterns": {
+                    "project": r"projects/([^/]+)",
+                    "run": r"runs/([^/]+)"
+                }
+            },
+            {
+                "name": "my-runs-bucket", 
+                "prefix": "sequencing_runs/",
+                "entity_patterns": {
+                    "run": r"sequencing_runs/([^/]+)"
+                }
+            }
+        ],
+        "dry_run": True
+    }
+    
+    with open('s3_registration_config.json', 'w') as f:
+        json.dump(config, f, indent=2)
+    
+    logger.info("Sample configuration created: s3_registration_config.json")
+    logger.info("Edit this file with your S3 bucket details and run:")
+    logger.info("python scripts/register_s3_files.py --config s3_registration_config.json")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Bulk register existing S3 files in the database",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Register all files from a bucket
+  python scripts/register_s3_files.py --bucket my-bucket
+  
+  # Register files with specific prefix
+  python scripts/register_s3_files.py --bucket my-bucket --prefix projects/
+  
+  # Dry run to see what would be registered
+  python scripts/register_s3_files.py --bucket my-bucket --dry-run
+  
+  # Use configuration file for multiple buckets
+  python scripts/register_s3_files.py --config s3_config.json
+  
+  # Create sample configuration file
+  python scripts/register_s3_files.py --create-config
+        """
+    )
+    
+    parser.add_argument('--bucket', help='S3 bucket name')
+    parser.add_argument('--prefix', default='', help='S3 key prefix to filter objects')
+    parser.add_argument('--config', help='JSON configuration file for multiple buckets')
+    parser.add_argument('--dry-run', action='store_true',
+                        help='Show what would be registered without making changes')
+    parser.add_argument('--create-config', action='store_true',
+                        help='Create a sample configuration file')
+    
+    args = parser.parse_args()
+    
+    if args.create_config:
+        create_sample_config()
+        return
+    
+    if not args.bucket and not args.config:
+        parser.error("Either --bucket or --config must be specified")
+    
+    # Get database session
+    try:
+        session = next(get_session())
+    except Exception as e:
+        logger.error(f"Failed to create database session: {e}")
+        sys.exit(1)
+    
+    registrar = S3FileRegistrar(session, dry_run=args.dry_run)
+    
+    try:
+        if args.config:
+            # Load configuration and process multiple buckets
+            config = load_config(args.config)
+            dry_run = config.get('dry_run', args.dry_run)
+            registrar.dry_run = dry_run
+            
+            for bucket_config in config['buckets']:
+                bucket = bucket_config['name']
+                prefix = bucket_config.get('prefix', '')
+                entity_patterns = bucket_config.get('entity_patterns')
+                
+                logger.info(f"Processing bucket: {bucket}")
+                registrar.scan_bucket(bucket, prefix, entity_patterns)
+        
+        else:
+            # Single bucket mode
+            registrar.scan_bucket(args.bucket, args.prefix)
+    
+    finally:
+        registrar.print_summary()
+        session.close()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/s3_registration_config.sample.json b/scripts/s3_registration_config.sample.json
new file mode 100644
index 0000000..afbab7f
--- /dev/null
+++ b/scripts/s3_registration_config.sample.json
@@ -0,0 +1,20 @@
+{
+  "buckets": [
+    {
+      "name": "my-ngs-data-bucket",
+      "prefix": "projects/",
+      "entity_patterns": {
+        "project": "projects/([^/]+)",
+        "run": "runs/([^/]+)"
+      }
+    },
+    {
+      "name": "my-sequencing-runs-bucket",
+      "prefix": "sequencing_runs/",
+      "entity_patterns": {
+        "run": "sequencing_runs/([^/]+)"
+      }
+    }
+  ],
+  "dry_run": true
+}
\ No newline at end of file

From c4d4c7b6678606869bd982913c3822a805e0bdd9 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 10:39:23 -0400
Subject: [PATCH 14/35] Format with black

---
 scripts/register_s3_files.py | 293 ++++++++++++++++++-----------------
 1 file changed, 152 insertions(+), 141 deletions(-)

diff --git a/scripts/register_s3_files.py b/scripts/register_s3_files.py
index d07db6f..8134e2c 100644
--- a/scripts/register_s3_files.py
+++ b/scripts/register_s3_files.py
@@ -35,62 +35,59 @@
 
 class S3FileRegistrar:
     """Handles bulk registration of S3 files to the database."""
-    
+
     def __init__(self, session: Session, dry_run: bool = False):
         self.session = session
         self.dry_run = dry_run
-        self.stats = {
-            'scanned': 0,
-            'registered': 0,
-            'skipped': 0,
-            'errors': 0
-        }
-        
+        self.stats = {"scanned": 0, "registered": 0, "skipped": 0, "errors": 0}
+
         # File type mapping based on extensions
         self.file_type_mapping = {
-            '.fastq': FileType.FASTQ,
-            '.fastq.gz': FileType.FASTQ,
-            '.fq': FileType.FASTQ,
-            '.fq.gz': FileType.FASTQ,
-            '.bam': FileType.BAM,
-            '.sam': FileType.BAM,
-            '.vcf': FileType.VCF,
-            '.vcf.gz': FileType.VCF,
-            '.csv': FileType.SAMPLESHEET,
-            '.xlsx': FileType.SAMPLESHEET,
-            '.json': FileType.METRICS,
-            '.html': FileType.REPORT,
-            '.pdf': FileType.REPORT,
-            '.log': FileType.LOG,
-            '.txt': FileType.LOG,
-            '.png': FileType.IMAGE,
-            '.jpg': FileType.IMAGE,
-            '.jpeg': FileType.IMAGE,
-            '.svg': FileType.IMAGE,
-            '.doc': FileType.DOCUMENT,
-            '.docx': FileType.DOCUMENT,
-            '.md': FileType.DOCUMENT,
+            ".fastq": FileType.FASTQ,
+            ".fastq.gz": FileType.FASTQ,
+            ".fq": FileType.FASTQ,
+            ".fq.gz": FileType.FASTQ,
+            ".bam": FileType.BAM,
+            ".sam": FileType.BAM,
+            ".vcf": FileType.VCF,
+            ".vcf.gz": FileType.VCF,
+            ".csv": FileType.SAMPLESHEET,
+            ".xlsx": FileType.SAMPLESHEET,
+            ".json": FileType.METRICS,
+            ".html": FileType.REPORT,
+            ".pdf": FileType.REPORT,
+            ".log": FileType.LOG,
+            ".txt": FileType.LOG,
+            ".png": FileType.IMAGE,
+            ".jpg": FileType.IMAGE,
+            ".jpeg": FileType.IMAGE,
+            ".svg": FileType.IMAGE,
+            ".doc": FileType.DOCUMENT,
+            ".docx": FileType.DOCUMENT,
+            ".md": FileType.DOCUMENT,
         }
-    
+
     def get_file_type(self, filename: str) -> FileType:
         """Determine file type based on filename extension."""
         filename_lower = filename.lower()
-        
+
         # Check for compound extensions first (e.g., .fastq.gz)
         for ext, file_type in self.file_type_mapping.items():
             if filename_lower.endswith(ext):
                 return file_type
-        
+
         return FileType.OTHER
-    
-    def extract_entity_info(self, s3_key: str, patterns: Dict[str, str]) -> Tuple[EntityType, str]:
+
+    def extract_entity_info(
+        self, s3_key: str, patterns: Dict[str, str]
+    ) -> Tuple[EntityType, str]:
         """
         Extract entity type and ID from S3 key using regex patterns.
-        
+
         Args:
             s3_key: S3 object key
             patterns: Dict of entity_type -> regex pattern
-            
+
         Returns:
             Tuple of (EntityType, entity_id)
         """
@@ -98,66 +95,72 @@ def extract_entity_info(self, s3_key: str, patterns: Dict[str, str]) -> Tuple[En
             match = re.search(pattern, s3_key)
             if match:
                 entity_id = match.group(1)
-                entity_type = EntityType.PROJECT if entity_type_str == 'project' else EntityType.RUN
+                entity_type = (
+                    EntityType.PROJECT
+                    if entity_type_str == "project"
+                    else EntityType.RUN
+                )
                 return entity_type, entity_id
-        
+
         # Default fallback - extract from path structure
-        path_parts = s3_key.split('/')
+        path_parts = s3_key.split("/")
         if len(path_parts) >= 2:
             # Assume format like: projects/PROJ001/... or runs/RUN001/...
-            if path_parts[0].lower() in ['projects', 'project']:
+            if path_parts[0].lower() in ["projects", "project"]:
                 return EntityType.PROJECT, path_parts[1]
-            elif path_parts[0].lower() in ['runs', 'run', 'sequencing_runs']:
+            elif path_parts[0].lower() in ["runs", "run", "sequencing_runs"]:
                 return EntityType.RUN, path_parts[1]
-        
+
         # Final fallback - use first directory as project
-        return EntityType.PROJECT, path_parts[0] if path_parts else 'unknown'
-    
+        return EntityType.PROJECT, path_parts[0] if path_parts else "unknown"
+
     def file_already_registered(self, s3_uri: str) -> bool:
         """Check if file is already registered in database."""
         from api.files.models import File
-        
+
         existing = self.session.exec(
             select(File).where(File.file_path == s3_uri)
         ).first()
-        
+
         return existing is not None
-    
-    def register_s3_object(self, bucket: str, obj: dict, entity_patterns: Dict[str, str]) -> bool:
+
+    def register_s3_object(
+        self, bucket: str, obj: dict, entity_patterns: Dict[str, str]
+    ) -> bool:
         """
         Register a single S3 object in the database.
-        
+
         Args:
             bucket: S3 bucket name
             obj: S3 object metadata from boto3
             entity_patterns: Regex patterns for extracting entity info
-            
+
         Returns:
             True if registered successfully, False otherwise
         """
-        s3_key = obj['Key']
+        s3_key = obj["Key"]
         filename = Path(s3_key).name
         s3_uri = f"s3://{bucket}/{s3_key}"
-        
+
         # Skip directories
-        if s3_key.endswith('/'):
+        if s3_key.endswith("/"):
             return False
-        
+
         # Skip if already registered
         if self.file_already_registered(s3_uri):
             logger.debug(f"File already registered: {s3_uri}")
-            self.stats['skipped'] += 1
+            self.stats["skipped"] += 1
             return False
-        
+
         # Extract entity information
         entity_type, entity_id = self.extract_entity_info(s3_key, entity_patterns)
-        
+
         # Determine file type
         file_type = self.get_file_type(filename)
-        
+
         # Get MIME type
         mime_type, _ = mimetypes.guess_type(filename)
-        
+
         # Create file record
         file_create = FileCreate(
             filename=filename,
@@ -167,48 +170,53 @@ def register_s3_object(self, bucket: str, obj: dict, entity_patterns: Dict[str,
             entity_type=entity_type,
             entity_id=entity_id,
             is_public=False,  # Default to private
-            created_by="s3_bulk_import"
+            created_by="s3_bulk_import",
         )
-        
+
         if self.dry_run:
-            logger.info(f"[DRY RUN] Would register: {s3_uri} -> {entity_type.value}/{entity_id}")
-            self.stats['registered'] += 1
+            logger.info(
+                f"[DRY RUN] Would register: {s3_uri} -> {entity_type.value}/{entity_id}"
+            )
+            self.stats["registered"] += 1
             return True
-        
+
         try:
             # Register file in database
             file_record = create_file(
                 session=self.session,
                 file_create=file_create,
                 file_content=None,  # No content upload, file stays in S3
-                storage_root="storage"  # Use default storage settings
+                storage_root="storage",  # Use default storage settings
             )
-            
+
             # Update the file record with S3-specific information
             file_record.file_path = s3_uri
             file_record.storage_backend = StorageBackend.S3
-            file_record.file_size = obj.get('Size', 0)
+            file_record.file_size = obj.get("Size", 0)
             file_record.mime_type = mime_type
-            file_record.upload_date = obj.get('LastModified', datetime.now(timezone.utc))
-            
+            file_record.upload_date = obj.get(
+                "LastModified", datetime.now(timezone.utc)
+            )
+
             self.session.add(file_record)
             self.session.commit()
-            
+
             logger.info(f"Registered: {s3_uri} -> {file_record.file_id}")
-            self.stats['registered'] += 1
+            self.stats["registered"] += 1
             return True
-            
+
         except Exception as e:
             logger.error(f"Failed to register {s3_uri}: {e}")
-            self.stats['errors'] += 1
+            self.stats["errors"] += 1
             self.session.rollback()
             return False
-    
-    def scan_bucket(self, bucket: str, prefix: str = "",
-                    entity_patterns: Dict[str, str] = None) -> None:
+
+    def scan_bucket(
+        self, bucket: str, prefix: str = "", entity_patterns: Dict[str, str] = None
+    ) -> None:
         """
         Scan S3 bucket and register files.
-        
+
         Args:
             bucket: S3 bucket name
             prefix: S3 key prefix to filter objects
@@ -216,57 +224,54 @@ def scan_bucket(self, bucket: str, prefix: str = "",
         """
         if entity_patterns is None:
             entity_patterns = {
-                'project': r'(?:projects?|proj)/([^/]+)',
-                'run': r'(?:runs?|sequencing_runs?)/([^/]+)'
+                "project": r"(?:projects?|proj)/([^/]+)",
+                "run": r"(?:runs?|sequencing_runs?)/([^/]+)",
             }
-        
+
         try:
-            s3_client = boto3.client('s3')
-            
+            s3_client = boto3.client("s3")
+
             # Test bucket access
             try:
                 s3_client.head_bucket(Bucket=bucket)
             except ClientError as e:
-                error_code = e.response['Error']['Code']
-                if error_code == '404':
+                error_code = e.response["Error"]["Code"]
+                if error_code == "404":
                     logger.error(f"Bucket '{bucket}' does not exist")
-                elif error_code == '403':
+                elif error_code == "403":
                     logger.error(f"Access denied to bucket '{bucket}'")
                 else:
                     logger.error(f"Error accessing bucket '{bucket}': {e}")
                 return
-            
+
             logger.info(f"Scanning S3 bucket: s3://{bucket}/{prefix}")
-            
+
             # List objects with pagination
-            paginator = s3_client.get_paginator('list_objects_v2')
-            page_iterator = paginator.paginate(
-                Bucket=bucket,
-                Prefix=prefix
-            )
-            
+            paginator = s3_client.get_paginator("list_objects_v2")
+            page_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix)
+
             for page in page_iterator:
-                if 'Contents' not in page:
+                if "Contents" not in page:
                     continue
-                
-                for obj in page['Contents']:
-                    self.stats['scanned'] += 1
+
+                for obj in page["Contents"]:
+                    self.stats["scanned"] += 1
                     self.register_s3_object(bucket, obj, entity_patterns)
-                    
+
                     # Progress logging
-                    if self.stats['scanned'] % 100 == 0:
+                    if self.stats["scanned"] % 100 == 0:
                         logger.info(
                             f"Progress: {self.stats['scanned']} scanned, "
                             f"{self.stats['registered']} registered, "
                             f"{self.stats['skipped']} skipped, "
                             f"{self.stats['errors']} errors"
                         )
-            
+
         except NoCredentialsError:
             logger.error("AWS credentials not found. Please configure AWS credentials.")
         except Exception as e:
             logger.error(f"Error scanning bucket: {e}")
-    
+
     def print_summary(self):
         """Print registration summary."""
         logger.info("=" * 50)
@@ -276,7 +281,7 @@ def print_summary(self):
         logger.info(f"Files registered: {self.stats['registered']}")
         logger.info(f"Files skipped:    {self.stats['skipped']}")
         logger.info(f"Errors:           {self.stats['errors']}")
-        
+
         if self.dry_run:
             logger.info("\n*** DRY RUN MODE - No files were actually registered ***")
 
@@ -284,7 +289,7 @@ def print_summary(self):
 def load_config(config_file: str) -> dict:
     """Load configuration from JSON file."""
     try:
-        with open(config_file, 'r') as f:
+        with open(config_file, "r") as f:
             return json.load(f)
     except FileNotFoundError:
         logger.error(f"Configuration file not found: {config_file}")
@@ -303,26 +308,26 @@ def create_sample_config():
                 "prefix": "projects/",
                 "entity_patterns": {
                     "project": r"projects/([^/]+)",
-                    "run": r"runs/([^/]+)"
-                }
+                    "run": r"runs/([^/]+)",
+                },
             },
             {
-                "name": "my-runs-bucket", 
+                "name": "my-runs-bucket",
                 "prefix": "sequencing_runs/",
-                "entity_patterns": {
-                    "run": r"sequencing_runs/([^/]+)"
-                }
-            }
+                "entity_patterns": {"run": r"sequencing_runs/([^/]+)"},
+            },
         ],
-        "dry_run": True
+        "dry_run": True,
     }
-    
-    with open('s3_registration_config.json', 'w') as f:
+
+    with open("s3_registration_config.json", "w") as f:
         json.dump(config, f, indent=2)
-    
+
     logger.info("Sample configuration created: s3_registration_config.json")
     logger.info("Edit this file with your S3 bucket details and run:")
-    logger.info("python scripts/register_s3_files.py --config s3_registration_config.json")
+    logger.info(
+        "python scripts/register_s3_files.py --config s3_registration_config.json"
+    )
 
 
 def main():
@@ -345,58 +350,64 @@ def main():
   
   # Create sample configuration file
   python scripts/register_s3_files.py --create-config
-        """
+        """,
+    )
+
+    parser.add_argument("--bucket", help="S3 bucket name")
+    parser.add_argument("--prefix", default="", help="S3 key prefix to filter objects")
+    parser.add_argument("--config", help="JSON configuration file for multiple buckets")
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Show what would be registered without making changes",
+    )
+    parser.add_argument(
+        "--create-config",
+        action="store_true",
+        help="Create a sample configuration file",
     )
-    
-    parser.add_argument('--bucket', help='S3 bucket name')
-    parser.add_argument('--prefix', default='', help='S3 key prefix to filter objects')
-    parser.add_argument('--config', help='JSON configuration file for multiple buckets')
-    parser.add_argument('--dry-run', action='store_true',
-                        help='Show what would be registered without making changes')
-    parser.add_argument('--create-config', action='store_true',
-                        help='Create a sample configuration file')
-    
+
     args = parser.parse_args()
-    
+
     if args.create_config:
         create_sample_config()
         return
-    
+
     if not args.bucket and not args.config:
         parser.error("Either --bucket or --config must be specified")
-    
+
     # Get database session
     try:
         session = next(get_session())
     except Exception as e:
         logger.error(f"Failed to create database session: {e}")
         sys.exit(1)
-    
+
     registrar = S3FileRegistrar(session, dry_run=args.dry_run)
-    
+
     try:
         if args.config:
             # Load configuration and process multiple buckets
             config = load_config(args.config)
-            dry_run = config.get('dry_run', args.dry_run)
+            dry_run = config.get("dry_run", args.dry_run)
             registrar.dry_run = dry_run
-            
-            for bucket_config in config['buckets']:
-                bucket = bucket_config['name']
-                prefix = bucket_config.get('prefix', '')
-                entity_patterns = bucket_config.get('entity_patterns')
-                
+
+            for bucket_config in config["buckets"]:
+                bucket = bucket_config["name"]
+                prefix = bucket_config.get("prefix", "")
+                entity_patterns = bucket_config.get("entity_patterns")
+
                 logger.info(f"Processing bucket: {bucket}")
                 registrar.scan_bucket(bucket, prefix, entity_patterns)
-        
+
         else:
             # Single bucket mode
             registrar.scan_bucket(args.bucket, args.prefix)
-    
+
     finally:
         registrar.print_summary()
         session.close()
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From cd9208856aa1577eb7b5d5ca8761fda0b3d03146 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 10:42:30 -0400
Subject: [PATCH 15/35] Lint

---
 scripts/register_s3_files.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/scripts/register_s3_files.py b/scripts/register_s3_files.py
index 8134e2c..070579e 100644
--- a/scripts/register_s3_files.py
+++ b/scripts/register_s3_files.py
@@ -6,6 +6,7 @@
 without moving them. Files remain in S3 but become discoverable through the API.
 
 Usage:
+    PYTHONPATH=.
     python scripts/register_s3_files.py --bucket my-bucket --prefix data/
     python scripts/register_s3_files.py --config s3_config.json
     python scripts/register_s3_files.py --bucket my-bucket --dry-run
@@ -25,7 +26,7 @@
 from sqlmodel import Session, select
 
 # Add project root to path for imports
-sys.path.append(str(Path(__file__).parent.parent))
+# sys.path.append(str(Path(__file__).parent.parent))
 
 from api.files.models import FileCreate, FileType, EntityType, StorageBackend
 from api.files.services import create_file
@@ -338,16 +339,16 @@ def main():
 Examples:
   # Register all files from a bucket
   python scripts/register_s3_files.py --bucket my-bucket
-  
+
   # Register files with specific prefix
   python scripts/register_s3_files.py --bucket my-bucket --prefix projects/
-  
+
   # Dry run to see what would be registered
   python scripts/register_s3_files.py --bucket my-bucket --dry-run
-  
+
   # Use configuration file for multiple buckets
   python scripts/register_s3_files.py --config s3_config.json
-  
+
   # Create sample configuration file
   python scripts/register_s3_files.py --create-config
         """,

From f929f89eac865607b14264557572b48657e29567 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 10:54:23 -0400
Subject: [PATCH 16/35] Update method/swagger doc

---
 api/files/routes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/api/files/routes.py b/api/files/routes.py
index 50c575f..2f92b84 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -247,7 +247,7 @@ def upload_file_content(
 @router.get(
     "/entity/{entity_type}/{entity_id}",
     response_model=PaginatedFileResponse,
-    summary="List files for a specific entity"
+    summary="List files for a specific entity."
 )
 def list_files_for_entity(
     session: SessionDep,
@@ -259,6 +259,7 @@ def list_files_for_entity(
 ) -> PaginatedFileResponse:
     """
     Get all files associated with a specific project or run.
+    This is the same as /api/v1/files, but scoped to a specific entity.
 
     - **entity_type**: Either "project" or "run"
     - **entity_id**: The project ID or run barcode

From 17754f3dc2365dbf5ddf6e62459dc485f796fa8e Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 16:21:16 -0400
Subject: [PATCH 17/35] Fix sort_by field on runs

---
 api/runs/routes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/runs/routes.py b/api/runs/routes.py
index fa11ee8..a5726c4 100644
--- a/api/runs/routes.py
+++ b/api/runs/routes.py
@@ -75,7 +75,7 @@ def get_runs(
     session: SessionDep,
     page: int = Query(1, description="Page number (1-indexed)"),
     per_page: int = Query(20, description="Number of items per page"),
-    sort_by: str = Query("project_id", description="Field to sort by"),
+    sort_by: str = Query("barcode", description="Field to sort by"),
     sort_order: Literal["asc", "desc"] = Query(
         "asc", description="Sort order (asc or desc)"
     ),

From 27374136173d3dd8e7f4fde8d53e9ae466e45e49 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 16:23:51 -0400
Subject: [PATCH 18/35] Fix sort_by on barcode to support asc/desc

---
 api/runs/services.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/api/runs/services.py b/api/runs/services.py
index 089344f..fc46c99 100644
--- a/api/runs/services.py
+++ b/api/runs/services.py
@@ -95,7 +95,17 @@ def get_runs(
     total_pages = (total_count + per_page - 1) // per_page  # Ceiling division
 
     # Determine sort field and direction
-    sort_field = getattr(SequencingRun, sort_by, SequencingRun.id)
+    # Handle computed fields that can't be sorted directly
+    if sort_by == "barcode":
+        # For barcode sorting, use run_date as primary sort field since barcode starts with date
+        sort_field = SequencingRun.run_date
+    else:
+        # Get the actual database column, fallback to id if field doesn't exist
+        sort_field = getattr(SequencingRun, sort_by, SequencingRun.id)
+        # Ensure we got a column, not a property
+        if not hasattr(sort_field, 'asc'):
+            sort_field = SequencingRun.id
+    
     sort_direction = sort_field.asc() if sort_order == "asc" else sort_field.desc()
 
     # Get run selection

From 73c4207eb3e9a5886146da259d427f30ead0faea Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 16:24:16 -0400
Subject: [PATCH 19/35] Lint

---
 api/runs/services.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/runs/services.py b/api/runs/services.py
index fc46c99..957208d 100644
--- a/api/runs/services.py
+++ b/api/runs/services.py
@@ -105,7 +105,7 @@ def get_runs(
         # Ensure we got a column, not a property
         if not hasattr(sort_field, 'asc'):
             sort_field = SequencingRun.id
-    
+
     sort_direction = sort_field.asc() if sort_order == "asc" else sort_field.desc()
 
     # Get run selection

From 7d62f10037e92594d151c7925ab0b7c9bb00a21c Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 19:31:03 -0400
Subject: [PATCH 20/35] Ensure run_time is 4 digits when provided

---
 api/runs/routes.py     |  7 ++++++-
 tests/api/test_runs.py | 16 ++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/api/runs/routes.py b/api/runs/routes.py
index a5726c4..3cef2b1 100644
--- a/api/runs/routes.py
+++ b/api/runs/routes.py
@@ -14,7 +14,7 @@
 """
 
 from typing import Literal
-from fastapi import APIRouter, Query, status
+from fastapi import APIRouter, Query, status, HTTPException
 from core.deps import SessionDep, OpenSearchDep
 from api.runs.models import (
     IlluminaMetricsResponseModel,
@@ -46,6 +46,11 @@ def add_run(
     """
     if sequencingrun_in.run_time == "":
         sequencingrun_in.run_time = None
+    elif sequencingrun_in.run_time and len(sequencingrun_in.run_time) != 4:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"run_time must be in HHMM format"
+        )
 
     run = services.add_run(
         session=session,
diff --git a/tests/api/test_runs.py b/tests/api/test_runs.py
index a818be6..6ee5244 100644
--- a/tests/api/test_runs.py
+++ b/tests/api/test_runs.py
@@ -68,6 +68,22 @@ def test_add_run(client: TestClient):
     assert data["run_time"] is None
     assert data["barcode"] == "190110_MACHINE123_0002_FLOWCELL123"
 
+    # Try to add a run with an invalid run_time field
+    new_run = {
+        "run_date": "2019-01-10",
+        "machine_id": "MACHINE123",
+        "run_number": 3,
+        "flowcell_id": "FLOWCELL123",
+        "experiment_name": "Test Experiment",
+        "run_folder_uri": "s3://bucket/path/to/run",
+        "status": RunStatus.READY,
+        "run_time": "invalid_time_format",
+    }
+    response = client.post("/api/v1/runs", json=new_run)
+    assert response.status_code == 404
+    data = response.json()
+    assert data["detail"] == "run_time must be in HHMM format"
+
 
 def test_get_runs(client: TestClient, session: Session):
     """Test that we can get all runs"""

From c2c30a8e7498033d0f2f41e880e0eb78e6756d43 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 23 Sep 2025 19:39:45 -0400
Subject: [PATCH 21/35] Ensure run_time is valid before adding to db and add
 test cases

---
 api/runs/models.py     | 25 +++++++++++++++++++++++--
 api/runs/routes.py     |  8 --------
 tests/api/test_runs.py | 32 +++++++++++++++++++++++++++++---
 3 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/api/runs/models.py b/api/runs/models.py
index 54ac84d..ccaad75 100644
--- a/api/runs/models.py
+++ b/api/runs/models.py
@@ -1,12 +1,12 @@
 """
 Models for the Runs API
 """
-
+from typing import Optional
 import uuid
 from datetime import datetime, date
 from enum import Enum
 from sqlmodel import SQLModel, Field
-from pydantic import ConfigDict, computed_field
+from pydantic import ConfigDict, computed_field, field_validator
 
 
 class RunStatus(str, Enum):
@@ -137,6 +137,27 @@ class SequencingRunCreate(SQLModel):
 
     model_config = ConfigDict(extra="forbid")
 
+    @field_validator('run_time', mode='before')
+    @classmethod
+    def preprocess_run_time(cls, v):
+        """Convert empty string to None before validation"""
+        return None if v == "" else v
+
+    @field_validator('run_time')
+    @classmethod
+    def validate_run_time_format(cls, v: Optional[str]) -> Optional[str]:
+        if v is None:
+            return None
+
+        if not isinstance(v, str) or len(v) != 4 or not v.isdigit():
+            raise ValueError('run_time must be exactly 4 digits (HHMM format)')
+
+        hours, minutes = int(v[:2]), int(v[2:])
+        if not (0 <= hours <= 23) or not (0 <= minutes <= 59):
+            raise ValueError('Hours must be 00-23, minutes must be 00-59')
+
+        return v
+
 
 class SequencingRunUpdateRequest(SQLModel):
     run_status: RunStatus
diff --git a/api/runs/routes.py b/api/runs/routes.py
index 3cef2b1..01e5978 100644
--- a/api/runs/routes.py
+++ b/api/runs/routes.py
@@ -44,14 +44,6 @@ def add_run(
     """
     Create a new project with optional attributes.
     """
-    if sequencingrun_in.run_time == "":
-        sequencingrun_in.run_time = None
-    elif sequencingrun_in.run_time and len(sequencingrun_in.run_time) != 4:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"run_time must be in HHMM format"
-        )
-
     run = services.add_run(
         session=session,
         sequencingrun_in=sequencingrun_in,
diff --git a/tests/api/test_runs.py b/tests/api/test_runs.py
index 6ee5244..a3227a1 100644
--- a/tests/api/test_runs.py
+++ b/tests/api/test_runs.py
@@ -80,9 +80,35 @@ def test_add_run(client: TestClient):
         "run_time": "invalid_time_format",
     }
     response = client.post("/api/v1/runs", json=new_run)
-    assert response.status_code == 404
-    data = response.json()
-    assert data["detail"] == "run_time must be in HHMM format"
+    assert response.status_code == 422
+
+    # Add a run with valid run_time
+    new_run = {
+        "run_date": "2019-01-10",
+        "machine_id": "MACHINE123",
+        "run_number": 4,
+        "flowcell_id": "FLOWCELL123",
+        "experiment_name": "Test Experiment",
+        "run_folder_uri": "s3://bucket/path/to/run",
+        "status": RunStatus.READY,
+        "run_time": "1230",
+    }
+    response = client.post("/api/v1/runs", json=new_run)
+    assert response.status_code == 201
+
+    # Add a run with an invalid run_time
+    new_run = {
+        "run_date": "2019-01-10",
+        "machine_id": "MACHINE123",
+        "run_number": 4,
+        "flowcell_id": "FLOWCELL123",
+        "experiment_name": "Test Experiment",
+        "run_folder_uri": "s3://bucket/path/to/run",
+        "status": RunStatus.READY,
+        "run_time": "5678",
+    }
+    response = client.post("/api/v1/runs", json=new_run)
+    assert response.status_code == 422
 
 
 def test_get_runs(client: TestClient, session: Session):

From ba55626df720998298394de634e14643233d2f41 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Fri, 26 Sep 2025 19:58:53 -0400
Subject: [PATCH 22/35] Add file browse endpoints

---
 api/files/models.py     |  27 ++++++
 api/files/routes.py     |  58 ++++++++++++
 api/files/services.py   |  93 ++++++++++++++++++++
 tests/api/test_files.py | 190 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 368 insertions(+)

diff --git a/api/files/models.py b/api/files/models.py
index b2f9646..03b084d 100644
--- a/api/files/models.py
+++ b/api/files/models.py
@@ -177,3 +177,30 @@ class PaginatedFileResponse(SQLModel):
     has_prev: bool
 
     model_config = ConfigDict(from_attributes=True)
+
+
+class FileBrowserColumns(SQLModel):
+    """Individual file/folder item for file browser"""
+    name: str
+    date: str
+    size: int | None = None  # None for directories
+    dir: bool  # True for directories, False for files
+
+
+class FileBrowserFolder(SQLModel):
+    """Folder item for file browser"""
+    name: str
+    date: str
+
+
+class FileBrowserFile(SQLModel):
+    """File item for file browser"""
+    name: str
+    date: str
+    size: int
+
+
+class FileBrowserData(SQLModel):
+    """File browser data structure with separate folders and files"""
+    folders: list[FileBrowserFolder]
+    files: list[FileBrowserFile]
diff --git a/api/files/routes.py b/api/files/routes.py
index 2f92b84..5b07596 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -14,6 +14,7 @@
     FileFilters,
     FileType,
     EntityType,
+    FileBrowserData,
 )
 import api.files.services as services
 
@@ -104,6 +105,61 @@ def list_files(
     return services.list_files(session, filters, page, per_page)
 
 
+@router.get(
+    "/browse",
+    response_model=FileBrowserData,
+    summary="Browse filesystem directory"
+)
+def browse_filesystem(
+    directory_path: str = Query("", description="Directory path to browse"),
+    storage_root: str = Query("storage", description="Storage root directory")
+) -> FileBrowserData:
+    """
+    Browse a filesystem directory and return folders and files in structured format.
+    
+    - **directory_path**: Path relative to storage root (empty for root)
+    - **storage_root**: Base storage directory (defaults to 'storage')
+    
+    Returns separate arrays for folders and files with name, date, and size information.
+    """
+    return services.browse_filesystem(directory_path, storage_root)
+
+
+@router.get(
+    "/browse-db",
+    response_model=FileBrowserData,
+    summary="List database files in browser format"
+)
+def list_files_browser_format(
+    session: SessionDep,
+    page: int = Query(1, ge=1, description="Page number (1-indexed)"),
+    per_page: int = Query(20, ge=1, le=100, description="Number of items per page"),
+    entity_type: Optional[EntityType] = Query(None, description="Filter by entity type"),
+    entity_id: Optional[str] = Query(None, description="Filter by entity ID"),
+    file_type: Optional[FileType] = Query(None, description="Filter by file type"),
+    search: Optional[str] = Query(None, description="Search in filename and description"),
+    is_public: Optional[bool] = Query(None, description="Filter by public/private status"),
+    created_by: Optional[str] = Query(None, description="Filter by creator"),
+) -> FileBrowserData:
+    """
+    Get database files in FileBrowserData format (files only, no folders).
+    
+    This endpoint returns the same file data as the regular list_files endpoint,
+    but formatted to match the FileBrowserData structure with separate folders and files arrays.
+    Since database files don't have folder structure, the folders array will be empty.
+    """
+    filters = FileFilters(
+        entity_type=entity_type,
+        entity_id=entity_id,
+        file_type=file_type,
+        search_query=search,
+        is_public=is_public,
+        created_by=created_by,
+    )
+
+    return services.list_files_as_browser_data(session, filters, page, per_page)
+
+
 @router.get(
     "/{file_id}",
     response_model=FilePublic,
@@ -284,3 +340,5 @@ def get_file_count_for_entity(
     """
     count = services.get_file_count_for_entity(session, entity_type, entity_id)
     return {"entity_type": entity_type, "entity_id": entity_id, "file_count": count}
+
+
diff --git a/api/files/services.py b/api/files/services.py
index 9a4ccf6..7ef7049 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -5,7 +5,9 @@
 import hashlib
 import secrets
 import string
+import os
 from pathlib import Path
+from datetime import datetime
 from sqlmodel import select, Session, func
 from pydantic import PositiveInt
 from fastapi import HTTPException, status
@@ -20,6 +22,10 @@
     FileType,
     EntityType,
     StorageBackend,
+    FileBrowserData,
+    FileBrowserFolder,
+    FileBrowserFile,
+    FileBrowserColumns,
 )
 
 
@@ -361,3 +367,90 @@ def update_file_content(
     session.refresh(file_record)
 
     return file_record
+
+
+def browse_filesystem(directory_path: str, storage_root: str = "storage") -> FileBrowserData:
+    """Browse filesystem directory and return structured data"""
+    
+    # Construct full path
+    if os.path.isabs(directory_path):
+        full_path = Path(directory_path)
+    else:
+        full_path = Path(storage_root) / directory_path
+    
+    # Check if directory exists and is accessible
+    if not full_path.exists():
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Directory not found: {directory_path}"
+        )
+    
+    if not full_path.is_dir():
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Path is not a directory: {directory_path}"
+        )
+    
+    folders = []
+    files = []
+    
+    try:
+        # List directory contents
+        for item in full_path.iterdir():
+            # Get modification time
+            stat = item.stat()
+            mod_time = datetime.fromtimestamp(stat.st_mtime)
+            date_str = mod_time.strftime("%Y-%m-%d %H:%M:%S")
+            
+            if item.is_dir():
+                folders.append(FileBrowserFolder(
+                    name=item.name,
+                    date=date_str
+                ))
+            else:
+                files.append(FileBrowserFile(
+                    name=item.name,
+                    date=date_str,
+                    size=stat.st_size
+                ))
+    
+    except PermissionError:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail=f"Permission denied accessing directory: {directory_path}"
+        )
+    
+    # Sort folders and files by name
+    folders.sort(key=lambda x: x.name.lower())
+    files.sort(key=lambda x: x.name.lower())
+    
+    return FileBrowserData(folders=folders, files=files)
+
+
+def list_files_as_browser_data(
+    session: Session,
+    filters: FileFilters | None = None,
+    page: PositiveInt = 1,
+    per_page: PositiveInt = 20,
+    sort_by: str = "upload_date",
+    sort_order: str = "desc"
+) -> FileBrowserData:
+    """List database files in FileBrowserData format (files only, no folders)"""
+    
+    # Get files using existing list_files function
+    files_result = list_files(session, filters, page, per_page, sort_by, sort_order)
+    
+    # Convert to FileBrowserFile format
+    browser_files = []
+    for file_record in files_result.data:
+        # Format date
+        date_str = file_record.upload_date.strftime("%Y-%m-%d %H:%M:%S")
+        
+        browser_files.append(FileBrowserFile(
+            name=file_record.filename,
+            date=date_str,
+            size=file_record.file_size or 0
+        ))
+    
+    # No folders for database files
+    return FileBrowserData(folders=[], files=browser_files)
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index 321cb78..aa259c0 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -8,6 +8,7 @@
 from datetime import datetime, timezone
 
 import pytest
+from fastapi import HTTPException
 from fastapi.testclient import TestClient
 from sqlmodel import Session
 
@@ -17,6 +18,9 @@
     FileType,
     EntityType,
     StorageBackend,
+    FileBrowserData,
+    FileBrowserFolder,
+    FileBrowserFile,
 )
 from api.files.services import (
     create_file,
@@ -31,6 +35,8 @@
     generate_file_path,
     calculate_file_checksum,
     get_mime_type,
+    browse_filesystem,
+    list_files_as_browser_data,
 )
 
 
@@ -796,3 +802,187 @@ def test_file_type_filtering(self, session: Session, temp_storage):
 
             assert result.total_items == 1
             assert result.data[0].file_type == file_type
+
+
+
+class TestFileBrowserModels:
+    """Test file browser model functionality"""
+
+    def test_file_browser_folder_model(self):
+        """Test FileBrowserFolder model"""
+        folder = FileBrowserFolder(
+            name="test_folder",
+            date="2023-01-01 12:00:00"
+        )
+        
+        assert folder.name == "test_folder"
+        assert folder.date == "2023-01-01 12:00:00"
+
+    def test_file_browser_file_model(self):
+        """Test FileBrowserFile model"""
+        file = FileBrowserFile(
+            name="test_file.txt",
+            date="2023-01-01 12:00:00",
+            size=1024
+        )
+        
+        assert file.name == "test_file.txt"
+        assert file.date == "2023-01-01 12:00:00"
+        assert file.size == 1024
+
+    def test_file_browser_data_model(self):
+        """Test FileBrowserData model"""
+        folder = FileBrowserFolder(name="folder1", date="2023-01-01 12:00:00")
+        file = FileBrowserFile(name="file1.txt", date="2023-01-01 12:00:00", size=100)
+        
+        browser_data = FileBrowserData(
+            folders=[folder],
+            files=[file]
+        )
+        
+        assert len(browser_data.folders) == 1
+        assert len(browser_data.files) == 1
+        assert browser_data.folders[0].name == "folder1"
+        assert browser_data.files[0].name == "file1.txt"
+
+
+class TestFileBrowserServices:
+    """Test file browser service functions"""
+
+    @pytest.fixture
+    def temp_storage(self):
+        """Create temporary storage with folder/file structure"""
+        temp_dir = tempfile.mkdtemp()
+        
+        # Create some folders
+        (Path(temp_dir) / "folder1").mkdir()
+        (Path(temp_dir) / "folder2").mkdir()
+        
+        # Create some files
+        (Path(temp_dir) / "file1.txt").write_text("content1")
+        (Path(temp_dir) / "file2.txt").write_text("content2")
+        
+        yield temp_dir
+        shutil.rmtree(temp_dir)
+
+    def test_browse_filesystem(self, temp_storage):
+        """Test filesystem browsing"""
+        result = browse_filesystem("", temp_storage)
+        
+        assert isinstance(result, FileBrowserData)
+        assert len(result.folders) == 2
+        assert len(result.files) == 2
+        
+        # Check folder names
+        folder_names = [f.name for f in result.folders]
+        assert "folder1" in folder_names
+        assert "folder2" in folder_names
+        
+        # Check file names
+        file_names = [f.name for f in result.files]
+        assert "file1.txt" in file_names
+        assert "file2.txt" in file_names
+        
+        # Check file sizes
+        for file in result.files:
+            assert file.size > 0
+            assert file.date is not None
+
+    def test_browse_filesystem_nonexistent_directory(self):
+        """Test browsing non-existent directory"""
+        with pytest.raises(HTTPException) as exc_info:
+            browse_filesystem("nonexistent", "nonexistent_root")
+        
+        assert exc_info.value.status_code == 404
+        assert "not found" in str(exc_info.value.detail)
+
+    def test_list_files_as_browser_data(self, session: Session):
+        """Test converting database files to browser data format"""
+        # Create test files in database
+        for i in range(3):
+            file_create = FileCreate(
+                filename=f"db_file_{i}.txt",
+                description=f"Database file {i}",
+                entity_type=EntityType.PROJECT,
+                entity_id="PROJ001",
+                file_type=FileType.DOCUMENT
+            )
+            create_file(session, file_create)
+
+        # Get files in browser format
+        result = list_files_as_browser_data(session)
+        
+        assert isinstance(result, FileBrowserData)
+        assert len(result.folders) == 0  # No folders for database files
+        assert len(result.files) == 3
+        
+        # Check file properties
+        for file in result.files:
+            assert file.name.startswith("db_file_")
+            assert file.date is not None
+            assert file.size >= 0
+
+
+class TestFileBrowserAPI:
+    """Test file browser API endpoints"""
+
+    def test_browse_filesystem_endpoint(self, client: TestClient, temp_storage):
+        """Test filesystem browsing endpoint"""
+        # Create a temporary directory structure for testing
+        temp_dir = tempfile.mkdtemp()
+        try:
+            # Create test structure
+            (Path(temp_dir) / "test_folder").mkdir()
+            (Path(temp_dir) / "test_file.txt").write_text("test content")
+            
+            # Test the endpoint
+            response = client.get(f"/api/v1/files/browse?storage_root={temp_dir}")
+            assert response.status_code == 200
+            
+            data = response.json()
+            assert "folders" in data
+            assert "files" in data
+            assert isinstance(data["folders"], list)
+            assert isinstance(data["files"], list)
+            
+        finally:
+            shutil.rmtree(temp_dir)
+
+    def test_browse_db_endpoint(self, client: TestClient, session: Session):
+        """Test database files browser endpoint"""
+        # Create test files
+        for i in range(2):
+            file_data = {
+                "filename": f"browser_test_{i}.txt",
+                "description": f"Browser test file {i}",
+                "file_type": "document",
+                "entity_type": "project",
+                "entity_id": "PROJ001",
+                "created_by": "browser_test_user"
+            }
+            client.post("/api/v1/files", data=file_data)
+
+        # Test the browser endpoint
+        response = client.get("/api/v1/files/browse-db")
+        assert response.status_code == 200
+        
+        data = response.json()
+        assert "folders" in data
+        assert "files" in data
+        assert len(data["folders"]) == 0  # No folders for database files
+        assert len(data["files"]) >= 2
+        
+        # Check file structure
+        for file in data["files"]:
+            assert "name" in file
+            assert "date" in file
+            assert "size" in file
+
+    def test_browse_filesystem_error_handling(self, client: TestClient):
+        """Test error handling for filesystem browsing"""
+        # Test non-existent directory
+        response = client.get(
+            "/api/v1/files/browse?directory_path=nonexistent&storage_root=nonexistent"
+        )
+        assert response.status_code == 404
+

From 7e647d02274f5278cac2586b5ad7472f9c66e287 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Fri, 26 Sep 2025 20:09:37 -0400
Subject: [PATCH 23/35] Fix test

---
 tests/api/test_files.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index aa259c0..971b20e 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -804,7 +804,6 @@ def test_file_type_filtering(self, session: Session, temp_storage):
             assert result.data[0].file_type == file_type
 
 
-
 class TestFileBrowserModels:
     """Test file browser model functionality"""
 
@@ -926,7 +925,7 @@ def test_list_files_as_browser_data(self, session: Session):
 class TestFileBrowserAPI:
     """Test file browser API endpoints"""
 
-    def test_browse_filesystem_endpoint(self, client: TestClient, temp_storage):
+    def test_browse_filesystem_endpoint(self, client: TestClient):
         """Test filesystem browsing endpoint"""
         # Create a temporary directory structure for testing
         temp_dir = tempfile.mkdtemp()

From 708c59e500415599c8a073f051eec8246f26c8fc Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Fri, 26 Sep 2025 20:10:57 -0400
Subject: [PATCH 24/35] Format with black

---
 api/files/models.py   |  18 +++++-
 api/files/routes.py   | 133 +++++++++++++++++++-----------------------
 api/files/services.py | 133 ++++++++++++++++++------------------------
 3 files changed, 134 insertions(+), 150 deletions(-)

diff --git a/api/files/models.py b/api/files/models.py
index 03b084d..51763a2 100644
--- a/api/files/models.py
+++ b/api/files/models.py
@@ -12,6 +12,7 @@
 
 class FileType(str, Enum):
     """File type categories"""
+
     FASTQ = "fastq"
     BAM = "bam"
     VCF = "vcf"
@@ -26,12 +27,14 @@ class FileType(str, Enum):
 
 class EntityType(str, Enum):
     """Entity types that can have files"""
+
     PROJECT = "project"
     RUN = "run"
 
 
 class StorageBackend(str, Enum):
     """Storage backend types"""
+
     LOCAL = "local"
     S3 = "s3"
     AZURE = "azure"
@@ -73,12 +76,14 @@ def generate_file_id(self) -> str:
         """Generate a unique file ID"""
         import secrets
         import string
+
         alphabet = string.ascii_letters + string.digits
-        return ''.join(secrets.choice(alphabet) for _ in range(12))
+        return "".join(secrets.choice(alphabet) for _ in range(12))
 
 
 class FileCreate(SQLModel):
     """Request model for creating a file"""
+
     filename: str
     original_filename: str | None = None
     description: str | None = None
@@ -93,6 +98,7 @@ class FileCreate(SQLModel):
 
 class FileUpdate(SQLModel):
     """Request model for updating file metadata"""
+
     filename: str | None = None
     description: str | None = None
     file_type: FileType | None = None
@@ -104,6 +110,7 @@ class FileUpdate(SQLModel):
 
 class FilePublic(SQLModel):
     """Public file representation"""
+
     file_id: str
     filename: str
     original_filename: str
@@ -123,6 +130,7 @@ class FilePublic(SQLModel):
 
 class FilesPublic(SQLModel):
     """Paginated file listing"""
+
     data: List[FilePublic]
     total_items: int
     total_pages: int
@@ -134,6 +142,7 @@ class FilesPublic(SQLModel):
 
 class FileUploadRequest(SQLModel):
     """Request model for file upload"""
+
     filename: str
     description: str | None = None
     file_type: FileType = FileType.OTHER
@@ -144,6 +153,7 @@ class FileUploadRequest(SQLModel):
 
 class FileUploadResponse(SQLModel):
     """Response model for file upload"""
+
     file_id: str
     filename: str
     file_size: int | None = None
@@ -154,6 +164,7 @@ class FileUploadResponse(SQLModel):
 
 class FileFilters(SQLModel):
     """File filtering options"""
+
     entity_type: EntityType | None = None
     entity_id: str | None = None
     file_type: FileType | None = None
@@ -168,6 +179,7 @@ class FileFilters(SQLModel):
 
 class PaginatedFileResponse(SQLModel):
     """Paginated response for file listings"""
+
     data: list[FilePublic]
     total_items: int
     total_pages: int
@@ -181,6 +193,7 @@ class PaginatedFileResponse(SQLModel):
 
 class FileBrowserColumns(SQLModel):
     """Individual file/folder item for file browser"""
+
     name: str
     date: str
     size: int | None = None  # None for directories
@@ -189,12 +202,14 @@ class FileBrowserColumns(SQLModel):
 
 class FileBrowserFolder(SQLModel):
     """Folder item for file browser"""
+
     name: str
     date: str
 
 
 class FileBrowserFile(SQLModel):
     """File item for file browser"""
+
     name: str
     date: str
     size: int
@@ -202,5 +217,6 @@ class FileBrowserFile(SQLModel):
 
 class FileBrowserData(SQLModel):
     """File browser data structure with separate folders and files"""
+
     folders: list[FileBrowserFolder]
     files: list[FileBrowserFile]
diff --git a/api/files/routes.py b/api/files/routes.py
index 5b07596..8d06ff9 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -3,7 +3,15 @@
 """
 
 from typing import Optional
-from fastapi import APIRouter, Query, HTTPException, status, UploadFile, File as FastAPIFile, Form
+from fastapi import (
+    APIRouter,
+    Query,
+    HTTPException,
+    status,
+    UploadFile,
+    File as FastAPIFile,
+    Form,
+)
 from fastapi.responses import StreamingResponse
 from core.deps import SessionDep
 from api.files.models import (
@@ -25,7 +33,7 @@
     "",
     response_model=FilePublic,
     status_code=status.HTTP_201_CREATED,
-    summary="Create a new file record"
+    summary="Create a new file record",
 )
 def create_file(
     session: SessionDep,
@@ -36,7 +44,7 @@ def create_file(
     file_type: FileType = Form(FileType.OTHER),
     is_public: bool = Form(False),
     created_by: Optional[str] = Form(None),
-    content: Optional[UploadFile] = FastAPIFile(None)
+    content: Optional[UploadFile] = FastAPIFile(None),
 ) -> FilePublic:
     """
     Create a new file record with optional file content upload.
@@ -57,7 +65,7 @@ def create_file(
         entity_type=entity_type,
         entity_id=entity_id,
         is_public=is_public,
-        created_by=created_by
+        created_by=created_by,
     )
 
     file_content = None
@@ -70,17 +78,23 @@ def create_file(
 @router.get(
     "",
     response_model=PaginatedFileResponse,
-    summary="List files with filtering and pagination"
+    summary="List files with filtering and pagination",
 )
 def list_files(
     session: SessionDep,
     page: int = Query(1, ge=1, description="Page number (1-indexed)"),
     per_page: int = Query(20, ge=1, le=100, description="Number of items per page"),
-    entity_type: Optional[EntityType] = Query(None, description="Filter by entity type"),
+    entity_type: Optional[EntityType] = Query(
+        None, description="Filter by entity type"
+    ),
     entity_id: Optional[str] = Query(None, description="Filter by entity ID"),
     file_type: Optional[FileType] = Query(None, description="Filter by file type"),
-    search: Optional[str] = Query(None, description="Search in filename and description"),
-    is_public: Optional[bool] = Query(None, description="Filter by public/private status"),
+    search: Optional[str] = Query(
+        None, description="Search in filename and description"
+    ),
+    is_public: Optional[bool] = Query(
+        None, description="Filter by public/private status"
+    ),
     created_by: Optional[str] = Query(None, description="Filter by creator"),
 ) -> PaginatedFileResponse:
     """
@@ -106,20 +120,18 @@ def list_files(
 
 
 @router.get(
-    "/browse",
-    response_model=FileBrowserData,
-    summary="Browse filesystem directory"
+    "/browse", response_model=FileBrowserData, summary="Browse filesystem directory"
 )
 def browse_filesystem(
     directory_path: str = Query("", description="Directory path to browse"),
-    storage_root: str = Query("storage", description="Storage root directory")
+    storage_root: str = Query("storage", description="Storage root directory"),
 ) -> FileBrowserData:
     """
     Browse a filesystem directory and return folders and files in structured format.
-    
+
     - **directory_path**: Path relative to storage root (empty for root)
     - **storage_root**: Base storage directory (defaults to 'storage')
-    
+
     Returns separate arrays for folders and files with name, date, and size information.
     """
     return services.browse_filesystem(directory_path, storage_root)
@@ -128,22 +140,28 @@ def browse_filesystem(
 @router.get(
     "/browse-db",
     response_model=FileBrowserData,
-    summary="List database files in browser format"
+    summary="List database files in browser format",
 )
 def list_files_browser_format(
     session: SessionDep,
     page: int = Query(1, ge=1, description="Page number (1-indexed)"),
     per_page: int = Query(20, ge=1, le=100, description="Number of items per page"),
-    entity_type: Optional[EntityType] = Query(None, description="Filter by entity type"),
+    entity_type: Optional[EntityType] = Query(
+        None, description="Filter by entity type"
+    ),
     entity_id: Optional[str] = Query(None, description="Filter by entity ID"),
     file_type: Optional[FileType] = Query(None, description="Filter by file type"),
-    search: Optional[str] = Query(None, description="Search in filename and description"),
-    is_public: Optional[bool] = Query(None, description="Filter by public/private status"),
+    search: Optional[str] = Query(
+        None, description="Search in filename and description"
+    ),
+    is_public: Optional[bool] = Query(
+        None, description="Filter by public/private status"
+    ),
     created_by: Optional[str] = Query(None, description="Filter by creator"),
 ) -> FileBrowserData:
     """
     Get database files in FileBrowserData format (files only, no folders).
-    
+
     This endpoint returns the same file data as the regular list_files endpoint,
     but formatted to match the FileBrowserData structure with separate folders and files arrays.
     Since database files don't have folder structure, the folders array will be empty.
@@ -160,15 +178,8 @@ def list_files_browser_format(
     return services.list_files_as_browser_data(session, filters, page, per_page)
 
 
-@router.get(
-    "/{file_id}",
-    response_model=FilePublic,
-    summary="Get file by ID"
-)
-def get_file(
-    session: SessionDep,
-    file_id: str
-) -> FilePublic:
+@router.get("/{file_id}", response_model=FilePublic, summary="Get file by ID")
+def get_file(session: SessionDep, file_id: str) -> FilePublic:
     """
     Get a single file by its file_id.
 
@@ -179,19 +190,13 @@ def get_file(
     except Exception:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File with id {file_id} not found"
+            detail=f"File with id {file_id} not found",
         )
 
 
-@router.put(
-    "/{file_id}",
-    response_model=FilePublic,
-    summary="Update file metadata"
-)
+@router.put("/{file_id}", response_model=FilePublic, summary="Update file metadata")
 def update_file(
-    session: SessionDep,
-    file_id: str,
-    file_update: FileUpdate
+    session: SessionDep, file_id: str, file_update: FileUpdate
 ) -> FilePublic:
     """
     Update file metadata.
@@ -204,19 +209,14 @@ def update_file(
     except Exception:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File with id {file_id} not found"
+            detail=f"File with id {file_id} not found",
         )
 
 
 @router.delete(
-    "/{file_id}",
-    status_code=status.HTTP_204_NO_CONTENT,
-    summary="Delete file"
+    "/{file_id}", status_code=status.HTTP_204_NO_CONTENT, summary="Delete file"
 )
-def delete_file(
-    session: SessionDep,
-    file_id: str
-) -> None:
+def delete_file(session: SessionDep, file_id: str) -> None:
     """
     Delete a file and its content.
 
@@ -227,23 +227,17 @@ def delete_file(
         if not success:
             raise HTTPException(
                 status_code=status.HTTP_404_NOT_FOUND,
-                detail=f"File with id {file_id} not found"
+                detail=f"File with id {file_id} not found",
             )
     except Exception:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File with id {file_id} not found"
+            detail=f"File with id {file_id} not found",
         )
 
 
-@router.get(
-    "/{file_id}/content",
-    summary="Download file content"
-)
-def download_file(
-    session: SessionDep,
-    file_id: str
-):
+@router.get("/{file_id}/content", summary="Download file content")
+def download_file(session: SessionDep, file_id: str):
     """
     Download the content of a file.
 
@@ -265,24 +259,20 @@ def generate():
             media_type=file_record.mime_type or "application/octet-stream",
             headers={
                 "Content-Disposition": f"attachment; filename={file_record.filename}"
-            }
+            },
         )
     except Exception:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File with id {file_id} not found or content not available"
+            detail=f"File with id {file_id} not found or content not available",
         )
 
 
 @router.post(
-    "/{file_id}/content",
-    response_model=FilePublic,
-    summary="Upload file content"
+    "/{file_id}/content", response_model=FilePublic, summary="Upload file content"
 )
 def upload_file_content(
-    session: SessionDep,
-    file_id: str,
-    content: UploadFile = FastAPIFile(...)
+    session: SessionDep, file_id: str, content: UploadFile = FastAPIFile(...)
 ) -> FilePublic:
     """
     Upload content for an existing file record.
@@ -296,14 +286,14 @@ def upload_file_content(
     except Exception:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File with id {file_id} not found"
+            detail=f"File with id {file_id} not found",
         )
 
 
 @router.get(
     "/entity/{entity_type}/{entity_id}",
     response_model=PaginatedFileResponse,
-    summary="List files for a specific entity."
+    summary="List files for a specific entity.",
 )
 def list_files_for_entity(
     session: SessionDep,
@@ -320,17 +310,16 @@ def list_files_for_entity(
     - **entity_type**: Either "project" or "run"
     - **entity_id**: The project ID or run barcode
     """
-    return services.list_files_for_entity(session, entity_type, entity_id, page, per_page, file_type)
+    return services.list_files_for_entity(
+        session, entity_type, entity_id, page, per_page, file_type
+    )
 
 
 @router.get(
-    "/entity/{entity_type}/{entity_id}/count",
-    summary="Get file count for entity"
+    "/entity/{entity_type}/{entity_id}/count", summary="Get file count for entity"
 )
 def get_file_count_for_entity(
-    session: SessionDep,
-    entity_type: EntityType,
-    entity_id: str
+    session: SessionDep, entity_type: EntityType, entity_id: str
 ) -> dict:
     """
     Get the total number of files for a specific project or run.
@@ -340,5 +329,3 @@ def get_file_count_for_entity(
     """
     count = services.get_file_count_for_entity(session, entity_type, entity_id)
     return {"entity_type": entity_type, "entity_id": entity_id, "file_count": count}
-
-
diff --git a/api/files/services.py b/api/files/services.py
index 7ef7049..dbbd83e 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -32,7 +32,7 @@
 def generate_file_id() -> str:
     """Generate a unique file ID"""
     alphabet = string.ascii_letters + string.digits
-    return ''.join(secrets.choice(alphabet) for _ in range(12))
+    return "".join(secrets.choice(alphabet) for _ in range(12))
 
 
 def generate_file_path(
@@ -40,19 +40,13 @@ def generate_file_path(
 ) -> str:
     """Generate a structured file path"""
     from datetime import datetime, timezone
+
     now = datetime.now(timezone.utc)
     year = now.strftime("%Y")
     month = now.strftime("%m")
 
     # Create path structure: /{entity_type}/{entity_id}/{file_type}/{year}/{month}/{filename}
-    path_parts = [
-        entity_type.value,
-        entity_id,
-        file_type.value,
-        year,
-        month,
-        filename
-    ]
+    path_parts = [entity_type.value, entity_id, file_type.value, year, month, filename]
     return "/".join(path_parts)
 
 
@@ -64,6 +58,7 @@ def calculate_file_checksum(file_content: bytes) -> str:
 def get_mime_type(filename: str) -> str:
     """Get MIME type based on file extension"""
     import mimetypes
+
     mime_type, _ = mimetypes.guess_type(filename)
     return mime_type or "application/octet-stream"
 
@@ -72,7 +67,7 @@ def create_file(
     session: Session,
     file_create: FileCreate,
     file_content: bytes | None = None,
-    storage_root: str = "storage"
+    storage_root: str = "storage",
 ) -> File:
     """Create a new file record and optionally store content"""
 
@@ -87,7 +82,7 @@ def create_file(
         file_create.entity_type,
         file_create.entity_id,
         file_create.file_type,
-        f"{file_id}_{file_create.filename}"
+        f"{file_id}_{file_create.filename}",
     )
 
     # Calculate file metadata if content is provided
@@ -110,7 +105,7 @@ def create_file(
         entity_type=file_create.entity_type,
         entity_id=file_create.entity_id,
         is_public=file_create.is_public,
-        storage_backend=StorageBackend.LOCAL
+        storage_backend=StorageBackend.LOCAL,
     )
 
     # Store file content if provided
@@ -130,14 +125,12 @@ def create_file(
 
 def get_file(session: Session, file_id: str) -> File:
     """Get a file by its file_id"""
-    file_record = session.exec(
-        select(File).where(File.file_id == file_id)
-    ).first()
+    file_record = session.exec(select(File).where(File.file_id == file_id)).first()
 
     if not file_record:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File with id {file_id} not found"
+            detail=f"File with id {file_id} not found",
         )
 
     return file_record
@@ -145,14 +138,12 @@ def get_file(session: Session, file_id: str) -> File:
 
 def get_file_by_id(session: Session, id: str) -> File:
     """Get a file by its internal UUID"""
-    file_record = session.exec(
-        select(File).where(File.id == id)
-    ).first()
+    file_record = session.exec(select(File).where(File.id == id)).first()
 
     if not file_record:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File with internal id {id} not found"
+            detail=f"File with internal id {id} not found",
         )
 
     return file_record
@@ -203,7 +194,7 @@ def list_files(
     page: PositiveInt = 1,
     per_page: PositiveInt = 20,
     sort_by: str = "upload_date",
-    sort_order: str = "desc"
+    sort_order: str = "desc",
 ) -> FilesPublic:
     """List files with filtering and pagination"""
 
@@ -229,14 +220,12 @@ def list_files(
         if filters.search_query:
             search_term = f"%{filters.search_query}%"
             query = query.where(
-                (File.filename.ilike(search_term)) |
-                (File.description.ilike(search_term))
+                (File.filename.ilike(search_term))
+                | (File.description.ilike(search_term))
             )
 
     # Get total count
-    total_count = session.exec(
-        select(func.count()).select_from(query.subquery())
-    ).one()
+    total_count = session.exec(select(func.count()).select_from(query.subquery())).one()
 
     # Calculate pagination
     total_pages = (total_count + per_page - 1) // per_page
@@ -271,7 +260,7 @@ def list_files(
             is_public=file.is_public,
             is_archived=file.is_archived,
             storage_backend=file.storage_backend,
-            checksum=file.checksum
+            checksum=file.checksum,
         )
         for file in files
     ]
@@ -283,11 +272,13 @@ def list_files(
         current_page=page,
         per_page=per_page,
         has_next=page < total_pages,
-        has_prev=page > 1
+        has_prev=page > 1,
     )
 
 
-def get_file_content(session: Session, file_id: str, storage_root: str = "storage") -> bytes:
+def get_file_content(
+    session: Session, file_id: str, storage_root: str = "storage"
+) -> bytes:
     """Get file content from storage"""
     file_record = get_file(session, file_id)
 
@@ -295,7 +286,7 @@ def get_file_content(session: Session, file_id: str, storage_root: str = "storag
     if not full_path.exists():
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File content not found at {file_record.file_path}"
+            detail=f"File content not found at {file_record.file_path}",
         )
 
     with open(full_path, "rb") as f:
@@ -308,34 +299,25 @@ def list_files_for_entity(
     entity_id: str,
     page: PositiveInt = 1,
     per_page: PositiveInt = 20,
-    file_type: FileType | None = None
+    file_type: FileType | None = None,
 ) -> FilesPublic:
     """List files for a specific entity (project or run)"""
     filters = FileFilters(
-        entity_type=entity_type,
-        entity_id=entity_id,
-        file_type=file_type
+        entity_type=entity_type, entity_id=entity_id, file_type=file_type
     )
 
-    return list_files(
-        session=session,
-        filters=filters,
-        page=page,
-        per_page=per_page
-    )
+    return list_files(session=session, filters=filters, page=page, per_page=per_page)
 
 
 def get_file_count_for_entity(
-    session: Session,
-    entity_type: EntityType,
-    entity_id: str
+    session: Session, entity_type: EntityType, entity_id: str
 ) -> int:
     """Get the count of files for a specific entity"""
     count = session.exec(
         select(func.count(File.id)).where(
             File.entity_type == entity_type,
             File.entity_id == entity_id,
-            ~File.is_archived
+            ~File.is_archived,
         )
     ).one()
 
@@ -369,31 +351,33 @@ def update_file_content(
     return file_record
 
 
-def browse_filesystem(directory_path: str, storage_root: str = "storage") -> FileBrowserData:
+def browse_filesystem(
+    directory_path: str, storage_root: str = "storage"
+) -> FileBrowserData:
     """Browse filesystem directory and return structured data"""
-    
+
     # Construct full path
     if os.path.isabs(directory_path):
         full_path = Path(directory_path)
     else:
         full_path = Path(storage_root) / directory_path
-    
+
     # Check if directory exists and is accessible
     if not full_path.exists():
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"Directory not found: {directory_path}"
+            detail=f"Directory not found: {directory_path}",
         )
-    
+
     if not full_path.is_dir():
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
-            detail=f"Path is not a directory: {directory_path}"
+            detail=f"Path is not a directory: {directory_path}",
         )
-    
+
     folders = []
     files = []
-    
+
     try:
         # List directory contents
         for item in full_path.iterdir():
@@ -401,29 +385,24 @@ def browse_filesystem(directory_path: str, storage_root: str = "storage") -> Fil
             stat = item.stat()
             mod_time = datetime.fromtimestamp(stat.st_mtime)
             date_str = mod_time.strftime("%Y-%m-%d %H:%M:%S")
-            
+
             if item.is_dir():
-                folders.append(FileBrowserFolder(
-                    name=item.name,
-                    date=date_str
-                ))
+                folders.append(FileBrowserFolder(name=item.name, date=date_str))
             else:
-                files.append(FileBrowserFile(
-                    name=item.name,
-                    date=date_str,
-                    size=stat.st_size
-                ))
-    
+                files.append(
+                    FileBrowserFile(name=item.name, date=date_str, size=stat.st_size)
+                )
+
     except PermissionError:
         raise HTTPException(
             status_code=status.HTTP_403_FORBIDDEN,
-            detail=f"Permission denied accessing directory: {directory_path}"
+            detail=f"Permission denied accessing directory: {directory_path}",
         )
-    
+
     # Sort folders and files by name
     folders.sort(key=lambda x: x.name.lower())
     files.sort(key=lambda x: x.name.lower())
-    
+
     return FileBrowserData(folders=folders, files=files)
 
 
@@ -433,24 +412,26 @@ def list_files_as_browser_data(
     page: PositiveInt = 1,
     per_page: PositiveInt = 20,
     sort_by: str = "upload_date",
-    sort_order: str = "desc"
+    sort_order: str = "desc",
 ) -> FileBrowserData:
     """List database files in FileBrowserData format (files only, no folders)"""
-    
+
     # Get files using existing list_files function
     files_result = list_files(session, filters, page, per_page, sort_by, sort_order)
-    
+
     # Convert to FileBrowserFile format
     browser_files = []
     for file_record in files_result.data:
         # Format date
         date_str = file_record.upload_date.strftime("%Y-%m-%d %H:%M:%S")
-        
-        browser_files.append(FileBrowserFile(
-            name=file_record.filename,
-            date=date_str,
-            size=file_record.file_size or 0
-        ))
-    
+
+        browser_files.append(
+            FileBrowserFile(
+                name=file_record.filename,
+                date=date_str,
+                size=file_record.file_size or 0,
+            )
+        )
+
     # No folders for database files
     return FileBrowserData(folders=[], files=browser_files)

From 0197383421fbf2c78e3fd9b7c383575eb740bc25 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Fri, 26 Sep 2025 20:11:15 -0400
Subject: [PATCH 25/35] Format with black

---
 tests/api/test_files.py | 184 ++++++++++++++++++++--------------------
 1 file changed, 90 insertions(+), 94 deletions(-)

diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index 971b20e..05398e6 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -77,7 +77,7 @@ def test_file_create_model(self):
             entity_type=EntityType.PROJECT,
             entity_id="PROJ001",
             is_public=True,
-            created_by="testuser"
+            created_by="testuser",
         )
 
         assert file_create.filename == "test.txt"
@@ -91,9 +91,7 @@ def test_file_create_model(self):
     def test_file_update_model(self):
         """Test FileUpdate model validation"""
         file_update = FileUpdate(
-            filename="updated.txt",
-            description="Updated description",
-            is_public=False
+            filename="updated.txt", description="Updated description", is_public=False
         )
 
         assert file_update.filename == "updated.txt"
@@ -124,10 +122,7 @@ def test_generate_file_id(self):
     def test_generate_file_path(self):
         """Test file path generation"""
         path = generate_file_path(
-            EntityType.PROJECT,
-            "PROJ001",
-            FileType.FASTQ,
-            "sample.fastq"
+            EntityType.PROJECT, "PROJ001", FileType.FASTQ, "sample.fastq"
         )
 
         # Should contain entity type, entity id, file type, year, month, filename
@@ -150,14 +145,19 @@ def test_calculate_file_checksum(self):
 
         # Should be SHA-256 hash
         assert len(checksum) == 64
-        assert checksum == "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
+        assert (
+            checksum
+            == "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
+        )
 
     def test_get_mime_type(self):
         """Test MIME type detection"""
         assert get_mime_type("test.txt") == "text/plain"
         assert get_mime_type("test.pdf") == "application/pdf"
         assert get_mime_type("test.jpg") == "image/jpeg"
-        assert get_mime_type("test.fastq") == "application/octet-stream"  # Unknown extension
+        assert (
+            get_mime_type("test.fastq") == "application/octet-stream"
+        )  # Unknown extension
 
     def test_create_file_without_content(self, session: Session, temp_storage):
         """Test creating file record without content"""
@@ -167,7 +167,7 @@ def test_create_file_without_content(self, session: Session, temp_storage):
             file_type=FileType.DOCUMENT,
             entity_type=EntityType.PROJECT,
             entity_id="PROJ001",
-            created_by="testuser"
+            created_by="testuser",
         )
 
         file_record = create_file(session, file_create, storage_root=temp_storage)
@@ -191,10 +191,12 @@ def test_create_file_with_content(self, session: Session, temp_storage):
             description="Test file",
             file_type=FileType.DOCUMENT,
             entity_type=EntityType.PROJECT,
-            entity_id="PROJ001"
+            entity_id="PROJ001",
         )
 
-        file_record = create_file(session, file_create, content, storage_root=temp_storage)
+        file_record = create_file(
+            session, file_create, content, storage_root=temp_storage
+        )
 
         assert file_record.file_size == len(content)
         assert file_record.checksum == calculate_file_checksum(content)
@@ -207,9 +209,7 @@ def test_create_file_with_content(self, session: Session, temp_storage):
     def test_get_file(self, session: Session, temp_storage):
         """Test getting file by file_id"""
         file_create = FileCreate(
-            filename="test.txt",
-            entity_type=EntityType.PROJECT,
-            entity_id="PROJ001"
+            filename="test.txt", entity_type=EntityType.PROJECT, entity_id="PROJ001"
         )
 
         created_file = create_file(session, file_create, storage_root=temp_storage)
@@ -232,15 +232,13 @@ def test_update_file(self, session: Session, temp_storage):
             filename="test.txt",
             description="Original description",
             entity_type=EntityType.PROJECT,
-            entity_id="PROJ001"
+            entity_id="PROJ001",
         )
 
         created_file = create_file(session, file_create, storage_root=temp_storage)
 
         file_update = FileUpdate(
-            filename="updated.txt",
-            description="Updated description",
-            is_public=True
+            filename="updated.txt", description="Updated description", is_public=True
         )
 
         updated_file = update_file(session, created_file.file_id, file_update)
@@ -253,12 +251,12 @@ def test_delete_file(self, session: Session, temp_storage):
         """Test deleting file and content"""
         content = b"Hello, World!"
         file_create = FileCreate(
-            filename="test.txt",
-            entity_type=EntityType.PROJECT,
-            entity_id="PROJ001"
+            filename="test.txt", entity_type=EntityType.PROJECT, entity_id="PROJ001"
         )
 
-        created_file = create_file(session, file_create, content, storage_root=temp_storage)
+        created_file = create_file(
+            session, file_create, content, storage_root=temp_storage
+        )
         file_path = Path(temp_storage) / created_file.file_path
 
         # Verify file exists
@@ -296,7 +294,7 @@ def test_list_files_with_data(self, session: Session, temp_storage):
                 description=f"Test file {i}",
                 entity_type=EntityType.PROJECT,
                 entity_id="PROJ001",
-                file_type=FileType.DOCUMENT
+                file_type=FileType.DOCUMENT,
             )
             create_file(session, file_create, storage_root=temp_storage)
 
@@ -318,7 +316,7 @@ def test_list_files_for_entity(self, session: Session, temp_storage):
                 file_create = FileCreate(
                     filename=f"test{i}.txt",
                     entity_type=EntityType.PROJECT,
-                    entity_id=entity_id
+                    entity_id=entity_id,
                 )
                 create_file(session, file_create, storage_root=temp_storage)
 
@@ -335,7 +333,7 @@ def test_get_file_count_for_entity(self, session: Session, temp_storage):
             file_create = FileCreate(
                 filename=f"test{i}.txt",
                 entity_type=EntityType.PROJECT,
-                entity_id="PROJ001"
+                entity_id="PROJ001",
             )
             create_file(session, file_create, storage_root=temp_storage)
 
@@ -346,12 +344,12 @@ def test_get_file_content(self, session: Session, temp_storage):
         """Test retrieving file content"""
         content = b"Hello, World!"
         file_create = FileCreate(
-            filename="test.txt",
-            entity_type=EntityType.PROJECT,
-            entity_id="PROJ001"
+            filename="test.txt", entity_type=EntityType.PROJECT, entity_id="PROJ001"
         )
 
-        created_file = create_file(session, file_create, content, storage_root=temp_storage)
+        created_file = create_file(
+            session, file_create, content, storage_root=temp_storage
+        )
         retrieved_content = get_file_content(
             session, created_file.file_id, storage_root=temp_storage
         )
@@ -361,9 +359,7 @@ def test_get_file_content(self, session: Session, temp_storage):
     def test_get_file_content_not_found(self, session: Session, temp_storage):
         """Test retrieving content for non-existent file"""
         file_create = FileCreate(
-            filename="test.txt",
-            entity_type=EntityType.PROJECT,
-            entity_id="PROJ001"
+            filename="test.txt", entity_type=EntityType.PROJECT, entity_id="PROJ001"
         )
 
         # Create file record without content
@@ -387,7 +383,7 @@ def test_create_file_endpoint(self, client: TestClient, session: Session):
             "entity_type": "project",
             "entity_id": "PROJ001",
             "is_public": "true",  # Form data sends as string
-            "created_by": "api_test_user"
+            "created_by": "api_test_user",
         }
 
         response = client.post("/api/v1/files", data=file_data)
@@ -414,7 +410,7 @@ def test_get_files_endpoint(self, client: TestClient, session: Session):
                 "file_type": "document",
                 "entity_type": "project",
                 "entity_id": "PROJ001",
-                "created_by": "list_test_user"
+                "created_by": "list_test_user",
             }
             client.post("/api/v1/files", data=file_data)
 
@@ -468,7 +464,7 @@ def test_get_file_endpoint(self, client: TestClient, session: Session):
             "file_type": "document",
             "entity_type": "project",
             "entity_id": "PROJ001",
-            "created_by": "get_test_user"
+            "created_by": "get_test_user",
         }
 
         create_response = client.post("/api/v1/files", data=file_data)
@@ -498,7 +494,7 @@ def test_update_file_endpoint(self, client: TestClient, session: Session):
             "file_type": "document",
             "entity_type": "project",
             "entity_id": "PROJ001",
-            "created_by": "update_test_user"
+            "created_by": "update_test_user",
         }
 
         create_response = client.post("/api/v1/files", data=file_data)
@@ -507,10 +503,7 @@ def test_update_file_endpoint(self, client: TestClient, session: Session):
         file_id = created_file["file_id"]
 
         # Test successful update
-        update_data = {
-            "description": "Updated description",
-            "is_public": True
-        }
+        update_data = {"description": "Updated description", "is_public": True}
 
         response = client.put(f"/api/v1/files/{file_id}", json=update_data)
         assert response.status_code == 200
@@ -534,7 +527,7 @@ def test_delete_file_endpoint(self, client: TestClient, session: Session):
             "file_type": "document",
             "entity_type": "project",
             "entity_id": "PROJ001",
-            "created_by": "delete_test_user"
+            "created_by": "delete_test_user",
         }
 
         create_response = client.post("/api/v1/files", data=file_data)
@@ -560,7 +553,7 @@ def test_list_files_for_entity_endpoint(self, client: TestClient, session: Sessi
         entities = [
             ("project", "PROJ001"),
             ("project", "PROJ002"),
-            ("run", "190110_MACHINE123_0001_FLOWCELL123")
+            ("run", "190110_MACHINE123_0001_FLOWCELL123"),
         ]
 
         for entity_type, entity_id in entities:
@@ -571,7 +564,7 @@ def test_list_files_for_entity_endpoint(self, client: TestClient, session: Sessi
                     "file_type": "document",
                     "entity_type": entity_type,
                     "entity_id": entity_id,
-                    "created_by": "entity_test_user"
+                    "created_by": "entity_test_user",
                 }
                 client.post("/api/v1/files", data=file_data)
 
@@ -586,7 +579,9 @@ def test_list_files_for_entity_endpoint(self, client: TestClient, session: Sessi
             assert item["entity_id"] == "PROJ001"
 
         # Test listing files for specific run
-        response = client.get("/api/v1/files/entity/run/190110_MACHINE123_0001_FLOWCELL123")
+        response = client.get(
+            "/api/v1/files/entity/run/190110_MACHINE123_0001_FLOWCELL123"
+        )
         assert response.status_code == 200
 
         data = response.json()
@@ -603,7 +598,9 @@ def test_list_files_for_entity_endpoint(self, client: TestClient, session: Sessi
         assert data["per_page"] == 1
         assert len(data["data"]) == 1
 
-    def test_get_file_count_for_entity_endpoint(self, client: TestClient, session: Session):
+    def test_get_file_count_for_entity_endpoint(
+        self, client: TestClient, session: Session
+    ):
         """Test getting file count for a specific entity"""
         # Create files for a specific entity
         entity_type = "project"
@@ -616,7 +613,7 @@ def test_get_file_count_for_entity_endpoint(self, client: TestClient, session: S
                 "file_type": "document",
                 "entity_type": entity_type,
                 "entity_id": entity_id,
-                "created_by": "count_test_user"
+                "created_by": "count_test_user",
             }
             client.post("/api/v1/files", data=file_data)
 
@@ -638,7 +635,9 @@ def test_get_file_count_for_entity_endpoint(self, client: TestClient, session: S
         assert data["entity_id"] == "EMPTY_PROJECT"
         assert data["file_count"] == 0
 
-    def test_create_file_with_content_endpoint(self, client: TestClient, session: Session):
+    def test_create_file_with_content_endpoint(
+        self, client: TestClient, session: Session
+    ):
         """Test file creation with content upload"""
         import io
 
@@ -649,12 +648,14 @@ def test_create_file_with_content_endpoint(self, client: TestClient, session: Se
             "file_type": "document",
             "entity_type": "project",
             "entity_id": "PROJ001",
-            "created_by": "content_test_user"
+            "created_by": "content_test_user",
         }
 
         # Create file content
         file_content = b"Hello, this is test content!"
-        files = {"content": ("test_content.txt", io.BytesIO(file_content), "text/plain")}
+        files = {
+            "content": ("test_content.txt", io.BytesIO(file_content), "text/plain")
+        }
 
         # Send multipart form data
         response = client.post("/api/v1/files", data=file_data, files=files)
@@ -672,7 +673,7 @@ def test_error_handling(self, client: TestClient, session: Session):
             "filename": "test.txt",
             "file_type": "invalid_type",
             "entity_type": "project",
-            "entity_id": "PROJ001"
+            "entity_id": "PROJ001",
         }
 
         response = client.post("/api/v1/files", data=invalid_file_data)
@@ -683,7 +684,7 @@ def test_error_handling(self, client: TestClient, session: Session):
             "filename": "test.txt",
             "file_type": "document",
             "entity_type": "invalid_entity",
-            "entity_id": "PROJ001"
+            "entity_id": "PROJ001",
         }
 
         response = client.post("/api/v1/files", data=invalid_entity_data)
@@ -711,10 +712,12 @@ def test_complete_file_lifecycle(self, session: Session, temp_storage):
             file_type=FileType.DOCUMENT,
             entity_type=EntityType.PROJECT,
             entity_id="PROJ001",
-            created_by="testuser"
+            created_by="testuser",
         )
 
-        created_file = create_file(session, file_create, content, storage_root=temp_storage)
+        created_file = create_file(
+            session, file_create, content, storage_root=temp_storage
+        )
         assert created_file.filename == "lifecycle.txt"
         assert created_file.file_size == len(content)
 
@@ -729,8 +732,7 @@ def test_complete_file_lifecycle(self, session: Session, temp_storage):
 
         # Update file metadata
         file_update = FileUpdate(
-            description="Updated lifecycle test file",
-            is_public=True
+            description="Updated lifecycle test file", is_public=True
         )
 
         updated_file = update_file(session, created_file.file_id, file_update)
@@ -750,7 +752,7 @@ def test_multiple_entities_file_management(self, session: Session, temp_storage)
         entities = [
             (EntityType.PROJECT, "PROJ001"),
             (EntityType.PROJECT, "PROJ002"),
-            (EntityType.RUN, "190110_MACHINE123_0001_FLOWCELL123")
+            (EntityType.RUN, "190110_MACHINE123_0001_FLOWCELL123"),
         ]
 
         created_files = []
@@ -762,10 +764,12 @@ def test_multiple_entities_file_management(self, session: Session, temp_storage)
                     filename=f"file{i}.txt",
                     entity_type=entity_type,
                     entity_id=entity_id,
-                    file_type=FileType.DOCUMENT
+                    file_type=FileType.DOCUMENT,
                 )
 
-                file_record = create_file(session, file_create, storage_root=temp_storage)
+                file_record = create_file(
+                    session, file_create, storage_root=temp_storage
+                )
                 created_files.append(file_record)
 
         # Verify total count
@@ -790,13 +794,14 @@ def test_file_type_filtering(self, session: Session, temp_storage):
                 filename=f"test.{file_type.value}",
                 entity_type=EntityType.PROJECT,
                 entity_id="PROJ001",
-                file_type=file_type
+                file_type=file_type,
             )
             create_file(session, file_create, storage_root=temp_storage)
 
         # Test filtering by each type
         for file_type in file_types:
             from api.files.models import FileFilters
+
             filters = FileFilters(file_type=file_type)
             result = list_files(session, filters=filters)
 
@@ -809,22 +814,17 @@ class TestFileBrowserModels:
 
     def test_file_browser_folder_model(self):
         """Test FileBrowserFolder model"""
-        folder = FileBrowserFolder(
-            name="test_folder",
-            date="2023-01-01 12:00:00"
-        )
-        
+        folder = FileBrowserFolder(name="test_folder", date="2023-01-01 12:00:00")
+
         assert folder.name == "test_folder"
         assert folder.date == "2023-01-01 12:00:00"
 
     def test_file_browser_file_model(self):
         """Test FileBrowserFile model"""
         file = FileBrowserFile(
-            name="test_file.txt",
-            date="2023-01-01 12:00:00",
-            size=1024
+            name="test_file.txt", date="2023-01-01 12:00:00", size=1024
         )
-        
+
         assert file.name == "test_file.txt"
         assert file.date == "2023-01-01 12:00:00"
         assert file.size == 1024
@@ -833,12 +833,9 @@ def test_file_browser_data_model(self):
         """Test FileBrowserData model"""
         folder = FileBrowserFolder(name="folder1", date="2023-01-01 12:00:00")
         file = FileBrowserFile(name="file1.txt", date="2023-01-01 12:00:00", size=100)
-        
-        browser_data = FileBrowserData(
-            folders=[folder],
-            files=[file]
-        )
-        
+
+        browser_data = FileBrowserData(folders=[folder], files=[file])
+
         assert len(browser_data.folders) == 1
         assert len(browser_data.files) == 1
         assert browser_data.folders[0].name == "folder1"
@@ -852,36 +849,36 @@ class TestFileBrowserServices:
     def temp_storage(self):
         """Create temporary storage with folder/file structure"""
         temp_dir = tempfile.mkdtemp()
-        
+
         # Create some folders
         (Path(temp_dir) / "folder1").mkdir()
         (Path(temp_dir) / "folder2").mkdir()
-        
+
         # Create some files
         (Path(temp_dir) / "file1.txt").write_text("content1")
         (Path(temp_dir) / "file2.txt").write_text("content2")
-        
+
         yield temp_dir
         shutil.rmtree(temp_dir)
 
     def test_browse_filesystem(self, temp_storage):
         """Test filesystem browsing"""
         result = browse_filesystem("", temp_storage)
-        
+
         assert isinstance(result, FileBrowserData)
         assert len(result.folders) == 2
         assert len(result.files) == 2
-        
+
         # Check folder names
         folder_names = [f.name for f in result.folders]
         assert "folder1" in folder_names
         assert "folder2" in folder_names
-        
+
         # Check file names
         file_names = [f.name for f in result.files]
         assert "file1.txt" in file_names
         assert "file2.txt" in file_names
-        
+
         # Check file sizes
         for file in result.files:
             assert file.size > 0
@@ -891,7 +888,7 @@ def test_browse_filesystem_nonexistent_directory(self):
         """Test browsing non-existent directory"""
         with pytest.raises(HTTPException) as exc_info:
             browse_filesystem("nonexistent", "nonexistent_root")
-        
+
         assert exc_info.value.status_code == 404
         assert "not found" in str(exc_info.value.detail)
 
@@ -904,17 +901,17 @@ def test_list_files_as_browser_data(self, session: Session):
                 description=f"Database file {i}",
                 entity_type=EntityType.PROJECT,
                 entity_id="PROJ001",
-                file_type=FileType.DOCUMENT
+                file_type=FileType.DOCUMENT,
             )
             create_file(session, file_create)
 
         # Get files in browser format
         result = list_files_as_browser_data(session)
-        
+
         assert isinstance(result, FileBrowserData)
         assert len(result.folders) == 0  # No folders for database files
         assert len(result.files) == 3
-        
+
         # Check file properties
         for file in result.files:
             assert file.name.startswith("db_file_")
@@ -933,17 +930,17 @@ def test_browse_filesystem_endpoint(self, client: TestClient):
             # Create test structure
             (Path(temp_dir) / "test_folder").mkdir()
             (Path(temp_dir) / "test_file.txt").write_text("test content")
-            
+
             # Test the endpoint
             response = client.get(f"/api/v1/files/browse?storage_root={temp_dir}")
             assert response.status_code == 200
-            
+
             data = response.json()
             assert "folders" in data
             assert "files" in data
             assert isinstance(data["folders"], list)
             assert isinstance(data["files"], list)
-            
+
         finally:
             shutil.rmtree(temp_dir)
 
@@ -957,20 +954,20 @@ def test_browse_db_endpoint(self, client: TestClient, session: Session):
                 "file_type": "document",
                 "entity_type": "project",
                 "entity_id": "PROJ001",
-                "created_by": "browser_test_user"
+                "created_by": "browser_test_user",
             }
             client.post("/api/v1/files", data=file_data)
 
         # Test the browser endpoint
         response = client.get("/api/v1/files/browse-db")
         assert response.status_code == 200
-        
+
         data = response.json()
         assert "folders" in data
         assert "files" in data
         assert len(data["folders"]) == 0  # No folders for database files
         assert len(data["files"]) >= 2
-        
+
         # Check file structure
         for file in data["files"]:
             assert "name" in file
@@ -984,4 +981,3 @@ def test_browse_filesystem_error_handling(self, client: TestClient):
             "/api/v1/files/browse?directory_path=nonexistent&storage_root=nonexistent"
         )
         assert response.status_code == 404
-

From 15ad92c2f3b627cebbfed7e36c9b0f6777e92c02 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Fri, 26 Sep 2025 21:39:16 -0400
Subject: [PATCH 26/35] Add s3 support

---
 api/files/routes.py     |  25 +++++--
 api/files/services.py   | 160 +++++++++++++++++++++++++++++++++++++++-
 tests/api/test_files.py |  45 +++++++++++
 3 files changed, 224 insertions(+), 6 deletions(-)

diff --git a/api/files/routes.py b/api/files/routes.py
index 8d06ff9..ea886fc 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -123,16 +123,31 @@ def list_files(
     "/browse", response_model=FileBrowserData, summary="Browse filesystem directory"
 )
 def browse_filesystem(
-    directory_path: str = Query("", description="Directory path to browse"),
-    storage_root: str = Query("storage", description="Storage root directory"),
+    directory_path: str = Query(
+        "", description="Directory path to browse (local path or s3://bucket/key)"
+    ),
+    storage_root: str = Query(
+        "storage", description="Storage root directory (ignored for S3 paths)"
+    ),
 ) -> FileBrowserData:
     """
-    Browse a filesystem directory and return folders and files in structured format.
+    Browse a filesystem directory or S3 bucket and return folders and files in structured format.
 
-    - **directory_path**: Path relative to storage root (empty for root)
-    - **storage_root**: Base storage directory (defaults to 'storage')
+    Supports both local filesystem and AWS S3:
+    - **Local paths**: Relative to storage_root (empty for root) or absolute paths
+    - **S3 paths**: Use s3://bucket/key format (e.g., s3://my-bucket/path/to/folder/)
+    - **storage_root**: Base storage directory for local paths (ignored for S3)
 
     Returns separate arrays for folders and files with name, date, and size information.
+    
+    For S3 paths:
+    - Requires AWS credentials to be configured
+    - Folders represent S3 prefixes (common prefixes)
+    - Files show S3 object metadata (size, last modified)
+    
+    Examples:
+    - Local: `/browse?directory_path=project1/data`
+    - S3: `/browse?directory_path=s3://my-bucket/project1/data/`
     """
     return services.browse_filesystem(directory_path, storage_root)
 
diff --git a/api/files/services.py b/api/files/services.py
index dbbd83e..a81969e 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -12,6 +12,13 @@
 from pydantic import PositiveInt
 from fastapi import HTTPException, status
 
+try:
+    import boto3
+    from botocore.exceptions import ClientError, NoCredentialsError
+    BOTO3_AVAILABLE = True
+except ImportError:
+    BOTO3_AVAILABLE = False
+
 from api.files.models import (
     File,
     FileCreate,
@@ -354,7 +361,24 @@ def update_file_content(
 def browse_filesystem(
     directory_path: str, storage_root: str = "storage"
 ) -> FileBrowserData:
-    """Browse filesystem directory and return structured data"""
+    """
+    Browse filesystem directory and return structured data.
+    
+    Automatically detects if the path is S3 (s3://bucket/key) or local filesystem
+    and routes to the appropriate handler.
+    """
+    # Check if this is an S3 path
+    if _is_s3_path(directory_path):
+        return browse_s3(directory_path)
+    
+    # Handle local filesystem
+    return _browse_local_filesystem(directory_path, storage_root)
+
+
+def _browse_local_filesystem(
+    directory_path: str, storage_root: str = "storage"
+) -> FileBrowserData:
+    """Browse local filesystem directory and return structured data"""
 
     # Construct full path
     if os.path.isabs(directory_path):
@@ -406,6 +430,140 @@ def browse_filesystem(
     return FileBrowserData(folders=folders, files=files)
 
 
+def _is_s3_path(path: str) -> bool:
+    """Check if a path is an S3 URI (s3://bucket/key)"""
+    return path.startswith("s3://")
+
+
+def _parse_s3_path(s3_path: str) -> tuple[str, str]:
+    """Parse S3 path into bucket and prefix"""
+    if not s3_path.startswith("s3://"):
+        raise ValueError("Invalid S3 path format. Must start with s3://")
+    
+    # Remove s3:// prefix
+    path_without_scheme = s3_path[5:]
+    
+    # Check for empty path after s3://
+    if not path_without_scheme:
+        raise ValueError("Invalid S3 path format. Bucket name is required")
+    
+    # Split into bucket and key
+    if "/" in path_without_scheme:
+        bucket, key = path_without_scheme.split("/", 1)
+    else:
+        bucket = path_without_scheme
+        key = ""
+    
+    # Validate bucket name is not empty
+    if not bucket:
+        raise ValueError("Invalid S3 path format. Bucket name cannot be empty")
+    
+    return bucket, key
+
+
+def browse_s3(s3_path: str) -> FileBrowserData:
+    """Browse S3 bucket/prefix and return structured data"""
+    if not BOTO3_AVAILABLE:
+        raise HTTPException(
+            status_code=status.HTTP_501_NOT_IMPLEMENTED,
+            detail="S3 support not available. Install boto3 to enable S3 browsing.",
+        )
+    
+    try:
+        bucket, prefix = _parse_s3_path(s3_path)
+    except ValueError as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=str(e),
+        )
+    
+    try:
+        # Initialize S3 client
+        s3_client = boto3.client('s3')
+         
+        # List objects with the given prefix
+        paginator = s3_client.get_paginator('list_objects_v2')
+        page_iterator = paginator.paginate(
+            Bucket=bucket,
+            Prefix=prefix,
+            Delimiter='/'  # This helps us get "folders"
+        )
+        
+        folders = []
+        files = []
+        
+        for page in page_iterator:
+            # Handle "folders" (common prefixes)
+            for common_prefix in page.get('CommonPrefixes', []):
+                folder_prefix = common_prefix['Prefix']
+                # Remove the current prefix to get just the folder name
+                folder_name = folder_prefix[len(prefix):].rstrip('/')
+                if folder_name:  # Skip empty names
+                    folders.append(FileBrowserFolder(
+                        name=folder_name,
+                        date=""  # S3 prefixes don't have modification dates
+                    ))
+            
+            # Handle actual files
+            for obj in page.get('Contents', []):
+                key = obj['Key']
+                # Skip if this is just the prefix itself (directory marker)
+                if key == prefix:
+                    continue
+                
+                # Remove the current prefix to get just the file name
+                file_name = key[len(prefix):]
+                
+                # Skip files that are in subdirectories (contain '/')
+                if '/' in file_name:
+                    continue
+                
+                if file_name:  # Skip empty names
+                    # Format the date
+                    mod_time = obj['LastModified']
+                    date_str = mod_time.strftime("%Y-%m-%d %H:%M:%S")
+                    
+                    files.append(FileBrowserFile(
+                        name=file_name,
+                        date=date_str,
+                        size=obj['Size']
+                    ))
+        
+        # Sort folders and files by name
+        folders.sort(key=lambda x: x.name.lower())
+        files.sort(key=lambda x: x.name.lower())
+        
+        return FileBrowserData(folders=folders, files=files)
+        
+    except NoCredentialsError:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="AWS credentials not found. Please configure AWS credentials.",
+        )
+    except ClientError as e:
+        error_code = e.response['Error']['Code']
+        if error_code == 'NoSuchBucket':
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"S3 bucket not found: {bucket}",
+            )
+        elif error_code == 'AccessDenied':
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail=f"Access denied to S3 bucket: {bucket}",
+            )
+        else:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=f"S3 error: {e.response['Error']['Message']}",
+            )
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Unexpected error browsing S3: {str(e)}",
+        )
+
+
 def list_files_as_browser_data(
     session: Session,
     filters: FileFilters | None = None,
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index 05398e6..47898c6 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -944,6 +944,51 @@ def test_browse_filesystem_endpoint(self, client: TestClient):
         finally:
             shutil.rmtree(temp_dir)
 
+    def test_s3_path_detection(self):
+        """Test S3 path detection functionality"""
+        from api.files.services import _is_s3_path, _parse_s3_path
+        
+        # Test S3 path detection
+        assert _is_s3_path("s3://bucket/key") is True
+        assert _is_s3_path("s3://bucket") is True
+        assert _is_s3_path("/local/path") is False
+        assert _is_s3_path("local/path") is False
+        assert _is_s3_path("") is False
+        
+        # Test S3 path parsing
+        bucket, key = _parse_s3_path("s3://my-bucket/path/to/folder/")
+        assert bucket == "my-bucket"
+        assert key == "path/to/folder/"
+        
+        bucket, key = _parse_s3_path("s3://my-bucket")
+        assert bucket == "my-bucket"
+        assert key == ""
+        
+        bucket, key = _parse_s3_path("s3://my-bucket/")
+        assert bucket == "my-bucket"
+        assert key == ""
+        
+        # Test invalid S3 path
+        with pytest.raises(ValueError):
+            _parse_s3_path("invalid://path")
+
+    def test_browse_s3_without_boto3(self, client: TestClient, monkeypatch):
+        """Test S3 browsing when boto3 is not available"""
+        # Mock BOTO3_AVAILABLE to False
+        import api.files.services as services
+        monkeypatch.setattr(services, "BOTO3_AVAILABLE", False)
+        
+        response = client.get("/api/v1/files/browse?directory_path=s3://test-bucket/")
+        assert response.status_code == 501
+        assert "S3 support not available" in response.json()["detail"]
+
+    def test_browse_s3_invalid_path(self, client: TestClient):
+        """Test S3 browsing with invalid S3 path"""
+        # Test with a path that looks like S3 but has invalid format
+        response = client.get("/api/v1/files/browse?directory_path=s3://")
+        assert response.status_code == 400
+        assert "Invalid S3 path format" in response.json()["detail"]
+
     def test_browse_db_endpoint(self, client: TestClient, session: Session):
         """Test database files browser endpoint"""
         # Create test files

From a9bd8d5ea968e323da0971e0ad4babcc40ab55c9 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Fri, 26 Sep 2025 21:39:45 -0400
Subject: [PATCH 27/35] Format with black

---
 api/files/routes.py     |  4 +-
 api/files/services.py   | 89 ++++++++++++++++++++---------------------
 tests/api/test_files.py | 13 +++---
 3 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/api/files/routes.py b/api/files/routes.py
index ea886fc..3b2afce 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -139,12 +139,12 @@ def browse_filesystem(
     - **storage_root**: Base storage directory for local paths (ignored for S3)
 
     Returns separate arrays for folders and files with name, date, and size information.
-    
+
     For S3 paths:
     - Requires AWS credentials to be configured
     - Folders represent S3 prefixes (common prefixes)
     - Files show S3 object metadata (size, last modified)
-    
+
     Examples:
     - Local: `/browse?directory_path=project1/data`
     - S3: `/browse?directory_path=s3://my-bucket/project1/data/`
diff --git a/api/files/services.py b/api/files/services.py
index a81969e..336d58b 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -15,6 +15,7 @@
 try:
     import boto3
     from botocore.exceptions import ClientError, NoCredentialsError
+
     BOTO3_AVAILABLE = True
 except ImportError:
     BOTO3_AVAILABLE = False
@@ -363,14 +364,14 @@ def browse_filesystem(
 ) -> FileBrowserData:
     """
     Browse filesystem directory and return structured data.
-    
+
     Automatically detects if the path is S3 (s3://bucket/key) or local filesystem
     and routes to the appropriate handler.
     """
     # Check if this is an S3 path
     if _is_s3_path(directory_path):
         return browse_s3(directory_path)
-    
+
     # Handle local filesystem
     return _browse_local_filesystem(directory_path, storage_root)
 
@@ -439,25 +440,25 @@ def _parse_s3_path(s3_path: str) -> tuple[str, str]:
     """Parse S3 path into bucket and prefix"""
     if not s3_path.startswith("s3://"):
         raise ValueError("Invalid S3 path format. Must start with s3://")
-    
+
     # Remove s3:// prefix
     path_without_scheme = s3_path[5:]
-    
+
     # Check for empty path after s3://
     if not path_without_scheme:
         raise ValueError("Invalid S3 path format. Bucket name is required")
-    
+
     # Split into bucket and key
     if "/" in path_without_scheme:
         bucket, key = path_without_scheme.split("/", 1)
     else:
         bucket = path_without_scheme
         key = ""
-    
+
     # Validate bucket name is not empty
     if not bucket:
         raise ValueError("Invalid S3 path format. Bucket name cannot be empty")
-    
+
     return bucket, key
 
 
@@ -468,7 +469,7 @@ def browse_s3(s3_path: str) -> FileBrowserData:
             status_code=status.HTTP_501_NOT_IMPLEMENTED,
             detail="S3 support not available. Install boto3 to enable S3 browsing.",
         )
-    
+
     try:
         bucket, prefix = _parse_s3_path(s3_path)
     except ValueError as e:
@@ -476,78 +477,76 @@ def browse_s3(s3_path: str) -> FileBrowserData:
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=str(e),
         )
-    
+
     try:
         # Initialize S3 client
-        s3_client = boto3.client('s3')
-         
+        s3_client = boto3.client("s3")
+
         # List objects with the given prefix
-        paginator = s3_client.get_paginator('list_objects_v2')
+        paginator = s3_client.get_paginator("list_objects_v2")
         page_iterator = paginator.paginate(
-            Bucket=bucket,
-            Prefix=prefix,
-            Delimiter='/'  # This helps us get "folders"
+            Bucket=bucket, Prefix=prefix, Delimiter="/"  # This helps us get "folders"
         )
-        
+
         folders = []
         files = []
-        
+
         for page in page_iterator:
             # Handle "folders" (common prefixes)
-            for common_prefix in page.get('CommonPrefixes', []):
-                folder_prefix = common_prefix['Prefix']
+            for common_prefix in page.get("CommonPrefixes", []):
+                folder_prefix = common_prefix["Prefix"]
                 # Remove the current prefix to get just the folder name
-                folder_name = folder_prefix[len(prefix):].rstrip('/')
+                folder_name = folder_prefix[len(prefix) :].rstrip("/")
                 if folder_name:  # Skip empty names
-                    folders.append(FileBrowserFolder(
-                        name=folder_name,
-                        date=""  # S3 prefixes don't have modification dates
-                    ))
-            
+                    folders.append(
+                        FileBrowserFolder(
+                            name=folder_name,
+                            date="",  # S3 prefixes don't have modification dates
+                        )
+                    )
+
             # Handle actual files
-            for obj in page.get('Contents', []):
-                key = obj['Key']
+            for obj in page.get("Contents", []):
+                key = obj["Key"]
                 # Skip if this is just the prefix itself (directory marker)
                 if key == prefix:
                     continue
-                
+
                 # Remove the current prefix to get just the file name
-                file_name = key[len(prefix):]
-                
+                file_name = key[len(prefix) :]
+
                 # Skip files that are in subdirectories (contain '/')
-                if '/' in file_name:
+                if "/" in file_name:
                     continue
-                
+
                 if file_name:  # Skip empty names
                     # Format the date
-                    mod_time = obj['LastModified']
+                    mod_time = obj["LastModified"]
                     date_str = mod_time.strftime("%Y-%m-%d %H:%M:%S")
-                    
-                    files.append(FileBrowserFile(
-                        name=file_name,
-                        date=date_str,
-                        size=obj['Size']
-                    ))
-        
+
+                    files.append(
+                        FileBrowserFile(name=file_name, date=date_str, size=obj["Size"])
+                    )
+
         # Sort folders and files by name
         folders.sort(key=lambda x: x.name.lower())
         files.sort(key=lambda x: x.name.lower())
-        
+
         return FileBrowserData(folders=folders, files=files)
-        
+
     except NoCredentialsError:
         raise HTTPException(
             status_code=status.HTTP_401_UNAUTHORIZED,
             detail="AWS credentials not found. Please configure AWS credentials.",
         )
     except ClientError as e:
-        error_code = e.response['Error']['Code']
-        if error_code == 'NoSuchBucket':
+        error_code = e.response["Error"]["Code"]
+        if error_code == "NoSuchBucket":
             raise HTTPException(
                 status_code=status.HTTP_404_NOT_FOUND,
                 detail=f"S3 bucket not found: {bucket}",
             )
-        elif error_code == 'AccessDenied':
+        elif error_code == "AccessDenied":
             raise HTTPException(
                 status_code=status.HTTP_403_FORBIDDEN,
                 detail=f"Access denied to S3 bucket: {bucket}",
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index 47898c6..a04ad18 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -947,27 +947,27 @@ def test_browse_filesystem_endpoint(self, client: TestClient):
     def test_s3_path_detection(self):
         """Test S3 path detection functionality"""
         from api.files.services import _is_s3_path, _parse_s3_path
-        
+
         # Test S3 path detection
         assert _is_s3_path("s3://bucket/key") is True
         assert _is_s3_path("s3://bucket") is True
         assert _is_s3_path("/local/path") is False
         assert _is_s3_path("local/path") is False
         assert _is_s3_path("") is False
-        
+
         # Test S3 path parsing
         bucket, key = _parse_s3_path("s3://my-bucket/path/to/folder/")
         assert bucket == "my-bucket"
         assert key == "path/to/folder/"
-        
+
         bucket, key = _parse_s3_path("s3://my-bucket")
         assert bucket == "my-bucket"
         assert key == ""
-        
+
         bucket, key = _parse_s3_path("s3://my-bucket/")
         assert bucket == "my-bucket"
         assert key == ""
-        
+
         # Test invalid S3 path
         with pytest.raises(ValueError):
             _parse_s3_path("invalid://path")
@@ -976,8 +976,9 @@ def test_browse_s3_without_boto3(self, client: TestClient, monkeypatch):
         """Test S3 browsing when boto3 is not available"""
         # Mock BOTO3_AVAILABLE to False
         import api.files.services as services
+
         monkeypatch.setattr(services, "BOTO3_AVAILABLE", False)
-        
+
         response = client.get("/api/v1/files/browse?directory_path=s3://test-bucket/")
         assert response.status_code == 501
         assert "S3 support not available" in response.json()["detail"]

From 67882b117795f9291a8a94e6e2e7fc40ae11342a Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Fri, 26 Sep 2025 21:43:56 -0400
Subject: [PATCH 28/35] fix Lint

---
 api/files/services.py | 7 +++----
 api/runs/routes.py    | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/api/files/services.py b/api/files/services.py
index 336d58b..b03c02d 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -33,7 +33,6 @@
     FileBrowserData,
     FileBrowserFolder,
     FileBrowserFile,
-    FileBrowserColumns,
 )
 
 
@@ -476,7 +475,7 @@ def browse_s3(s3_path: str) -> FileBrowserData:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=str(e),
-        )
+        ) from e
 
     try:
         # Initialize S3 client
@@ -496,7 +495,7 @@ def browse_s3(s3_path: str) -> FileBrowserData:
             for common_prefix in page.get("CommonPrefixes", []):
                 folder_prefix = common_prefix["Prefix"]
                 # Remove the current prefix to get just the folder name
-                folder_name = folder_prefix[len(prefix) :].rstrip("/")
+                folder_name = folder_prefix[len(prefix):].rstrip("/")
                 if folder_name:  # Skip empty names
                     folders.append(
                         FileBrowserFolder(
@@ -513,7 +512,7 @@ def browse_s3(s3_path: str) -> FileBrowserData:
                     continue
 
                 # Remove the current prefix to get just the file name
-                file_name = key[len(prefix) :]
+                file_name = key[len(prefix):]
 
                 # Skip files that are in subdirectories (contain '/')
                 if "/" in file_name:
diff --git a/api/runs/routes.py b/api/runs/routes.py
index 01e5978..4b40549 100644
--- a/api/runs/routes.py
+++ b/api/runs/routes.py
@@ -14,7 +14,7 @@
 """
 
 from typing import Literal
-from fastapi import APIRouter, Query, status, HTTPException
+from fastapi import APIRouter, Query, status
 from core.deps import SessionDep, OpenSearchDep
 from api.runs.models import (
     IlluminaMetricsResponseModel,

From d87eaee64da69b1944868ffbc5185ab8dc5a818d Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Fri, 26 Sep 2025 21:45:37 -0400
Subject: [PATCH 29/35] Remove FileBrowserColumns model

---
 api/files/models.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/api/files/models.py b/api/files/models.py
index 51763a2..13dd544 100644
--- a/api/files/models.py
+++ b/api/files/models.py
@@ -191,15 +191,6 @@ class PaginatedFileResponse(SQLModel):
     model_config = ConfigDict(from_attributes=True)
 
 
-class FileBrowserColumns(SQLModel):
-    """Individual file/folder item for file browser"""
-
-    name: str
-    date: str
-    size: int | None = None  # None for directories
-    dir: bool  # True for directories, False for files
-
-
 class FileBrowserFolder(SQLModel):
     """Folder item for file browser"""
 

From 564b3949c16406da12b3655e8d0329d1dbd38891 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Fri, 26 Sep 2025 21:49:03 -0400
Subject: [PATCH 30/35] Update exception handling

---
 api/files/routes.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/api/files/routes.py b/api/files/routes.py
index 3b2afce..8f6bd13 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -202,11 +202,11 @@ def get_file(session: SessionDep, file_id: str) -> FilePublic:
     """
     try:
         return services.get_file(session, file_id)
-    except Exception:
+    except Exception as exc:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"File with id {file_id} not found",
-        )
+        ) from exc
 
 
 @router.put("/{file_id}", response_model=FilePublic, summary="Update file metadata")
@@ -221,11 +221,11 @@ def update_file(
     """
     try:
         return services.update_file(session, file_id, file_update)
-    except Exception:
+    except Exception as exc:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"File with id {file_id} not found",
-        )
+        ) from exc
 
 
 @router.delete(
@@ -244,11 +244,11 @@ def delete_file(session: SessionDep, file_id: str) -> None:
                 status_code=status.HTTP_404_NOT_FOUND,
                 detail=f"File with id {file_id} not found",
             )
-    except Exception:
+    except Exception as exc:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"File with id {file_id} not found",
-        )
+        ) from exc
 
 
 @router.get("/{file_id}/content", summary="Download file content")
@@ -276,11 +276,11 @@ def generate():
                 "Content-Disposition": f"attachment; filename={file_record.filename}"
             },
         )
-    except Exception:
+    except Exception as exc:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"File with id {file_id} not found or content not available",
-        )
+        ) from exc
 
 
 @router.post(
@@ -298,11 +298,11 @@ def upload_file_content(
     try:
         file_content = content.file.read()
         return services.update_file_content(session, file_id, file_content)
-    except Exception:
+    except Exception as exc:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"File with id {file_id} not found",
-        )
+        ) from exc
 
 
 @router.get(

From 6a7d668510697a49ebba8464b348ae2e3781e206 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 30 Sep 2025 11:43:03 -0400
Subject: [PATCH 31/35] Implement dual-storage support for Illumina
 samplesheets

---
 api/files/services.py   |  66 ++++++++++++
 tests/api/test_files.py | 218 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 284 insertions(+)

diff --git a/api/files/services.py b/api/files/services.py
index b03c02d..8e33a02 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -6,6 +6,7 @@
 import secrets
 import string
 import os
+import logging
 from pathlib import Path
 from datetime import datetime
 from sqlmodel import select, Session, func
@@ -70,6 +71,53 @@ def get_mime_type(filename: str) -> str:
     return mime_type or "application/octet-stream"
 
 
+def _is_valid_storage_path(path: str) -> bool:
+    """Validate storage path format"""
+    # Allow S3 paths, local paths, network paths
+    valid_prefixes = ['s3://', '/', 'file://', 'smb://', 'ftp://']
+    return any(path.startswith(prefix) for prefix in valid_prefixes)
+
+
+def _save_samplesheet_to_run_folder(
+    session: Session, run_barcode: str, file_content: bytes
+) -> bool:
+    """
+    Save samplesheet content to the run's folder URI location.
+    Returns True if successful, False otherwise.
+    """
+    from smart_open import open as smart_open
+    
+    try:
+        # Import here to avoid circular imports
+        from api.runs.services import get_run
+        
+        # Get run information
+        run = get_run(session, run_barcode)
+        if not run or not run.run_folder_uri:
+            logging.warning(f"No run folder URI found for run {run_barcode}")
+            return False
+        
+        # Construct samplesheet path - always use SampleSheet.csv as the standard name
+        samplesheet_path = f"{run.run_folder_uri.rstrip('/')}/SampleSheet.csv"
+        
+        # Validate path format
+        if not _is_valid_storage_path(samplesheet_path):
+            logging.warning(f"Invalid storage path format: {samplesheet_path}")
+            return False
+        
+        # Save using smart_open (handles S3, local, network paths)
+        with smart_open(samplesheet_path, 'wb') as f:
+            f.write(file_content)
+        
+        logging.info(f"Successfully saved samplesheet to run folder: {samplesheet_path}")
+        return True
+        
+    except Exception as e:
+        # Log error but don't fail the upload
+        logging.error(f"Failed to save samplesheet to run folder {run_barcode}: {e}")
+        return False
+
+
 def create_file(
     session: Session,
     file_create: FileCreate,
@@ -117,11 +165,29 @@ def create_file(
 
     # Store file content if provided
     if file_content:
+        # 1. Save to standard database storage location
         full_path = Path(storage_root) / file_path
         full_path.parent.mkdir(parents=True, exist_ok=True)
         with open(full_path, "wb") as f:
             f.write(file_content)
 
+        # 2. SPECIAL HANDLING: If samplesheet for run, also save to run folder
+        if (file_create.file_type == FileType.SAMPLESHEET and
+                file_create.entity_type == EntityType.RUN):
+            dual_storage_success = _save_samplesheet_to_run_folder(
+                session, file_create.entity_id, file_content
+            )
+            # Add note to description about dual storage status
+            if dual_storage_success:
+                status_note = "[Dual-stored to run folder]"
+            else:
+                status_note = "[Database-only storage - run folder write failed]"
+            
+            if file_record.description:
+                file_record.description = f"{file_record.description} {status_note}"
+            else:
+                file_record.description = status_note
+
     # Save to database
     session.add(file_record)
     session.commit()
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index a04ad18..5140f02 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -808,6 +808,224 @@ def test_file_type_filtering(self, session: Session, temp_storage):
             assert result.total_items == 1
             assert result.data[0].file_type == file_type
 
+    def test_samplesheet_dual_storage_success(self, session: Session, temp_storage):
+        """Test that samplesheets are saved to both database storage and run folder"""
+        from api.runs.models import SequencingRun, RunStatus
+        from datetime import date
+        # Create a test run with run_folder_uri
+        run = SequencingRun(
+            run_date=date(2019, 1, 10),
+            machine_id="MACHINE123",
+            run_number=1,
+            flowcell_id="FLOWCELL123",
+            experiment_name="Test Experiment",
+            run_folder_uri=f"{temp_storage}/run_folder",
+            status=RunStatus.READY,
+        )
+        session.add(run)
+        session.commit()
+        session.refresh(run)
+        
+        # Create run folder directory
+        run_folder = Path(temp_storage) / "run_folder"
+        run_folder.mkdir(parents=True, exist_ok=True)
+        
+        # Create samplesheet content
+        samplesheet_content = b"""[Header]
+IEMFileVersion,4
+Investigator Name,Test User
+
+[Data]
+Sample_ID,Sample_Name,index
+Sample1,Sample1,ATCG
+"""
+        
+        # Create samplesheet file
+        file_create = FileCreate(
+            filename="SampleSheet.csv",
+            description="Test samplesheet",
+            file_type=FileType.SAMPLESHEET,
+            entity_type=EntityType.RUN,
+            entity_id=run.barcode,
+            created_by="testuser",
+        )
+        
+        created_file = create_file(
+            session, file_create, samplesheet_content, storage_root=temp_storage
+        )
+        
+        # Verify file was created in database storage
+        db_file_path = Path(temp_storage) / created_file.file_path
+        assert db_file_path.exists()
+        assert db_file_path.read_bytes() == samplesheet_content
+        
+        # Verify file was also saved to run folder
+        run_folder_samplesheet = run_folder / "SampleSheet.csv"
+        assert run_folder_samplesheet.exists()
+        assert run_folder_samplesheet.read_bytes() == samplesheet_content
+        
+        # Verify description indicates dual storage success
+        assert "[Dual-stored to run folder]" in created_file.description
+
+    def test_samplesheet_dual_storage_no_run_folder(self, session: Session, temp_storage):
+        """Test samplesheet upload when run has no run_folder_uri"""
+        from api.runs.models import SequencingRun, RunStatus
+        from datetime import date
+        
+        # Create a test run WITHOUT run_folder_uri
+        run = SequencingRun(
+            run_date=date(2019, 1, 10),
+            machine_id="MACHINE123",
+            run_number=2,
+            flowcell_id="FLOWCELL456",
+            experiment_name="Test Experiment No URI",
+            run_folder_uri=None,  # No URI
+            status=RunStatus.READY,
+        )
+        session.add(run)
+        session.commit()
+        session.refresh(run)
+        
+        samplesheet_content = b"Sample samplesheet content"
+        
+        # Create samplesheet file
+        file_create = FileCreate(
+            filename="SampleSheet.csv",
+            description="Test samplesheet",
+            file_type=FileType.SAMPLESHEET,
+            entity_type=EntityType.RUN,
+            entity_id=run.barcode,
+            created_by="testuser",
+        )
+        
+        created_file = create_file(
+            session, file_create, samplesheet_content, storage_root=temp_storage
+        )
+        
+        # Verify file was created in database storage
+        db_file_path = Path(temp_storage) / created_file.file_path
+        assert db_file_path.exists()
+        
+        # Verify description indicates database-only storage
+        assert "[Database-only storage - run folder write failed]" in created_file.description
+
+    def test_samplesheet_dual_storage_invalid_path(self, session: Session, temp_storage):
+        """Test samplesheet upload when run_folder_uri has invalid format"""
+        from api.runs.models import SequencingRun, RunStatus
+        from datetime import date
+        
+        # Create a test run with invalid run_folder_uri
+        run = SequencingRun(
+            run_date=date(2019, 1, 10),
+            machine_id="MACHINE123",
+            run_number=3,
+            flowcell_id="FLOWCELL789",
+            experiment_name="Test Experiment Invalid Path",
+            run_folder_uri="invalid://path/format",  # Invalid format
+            status=RunStatus.READY,
+        )
+        session.add(run)
+        session.commit()
+        session.refresh(run)
+        
+        samplesheet_content = b"Sample samplesheet content"
+        
+        # Create samplesheet file
+        file_create = FileCreate(
+            filename="SampleSheet.csv",
+            description="Test samplesheet",
+            file_type=FileType.SAMPLESHEET,
+            entity_type=EntityType.RUN,
+            entity_id=run.barcode,
+            created_by="testuser",
+        )
+        
+        created_file = create_file(
+            session, file_create, samplesheet_content, storage_root=temp_storage
+        )
+        
+        # Verify file was created in database storage
+        db_file_path = Path(temp_storage) / created_file.file_path
+        assert db_file_path.exists()
+        
+        # Verify description indicates database-only storage
+        assert "[Database-only storage - run folder write failed]" in created_file.description
+
+    def test_non_samplesheet_files_not_dual_stored(self, session: Session, temp_storage):
+        """Test that non-samplesheet files are not dual-stored"""
+        from api.runs.models import SequencingRun, RunStatus
+        from datetime import date
+        
+        # Create a test run with run_folder_uri
+        run = SequencingRun(
+            run_date=date(2019, 1, 10),
+            machine_id="MACHINE123",
+            run_number=4,
+            flowcell_id="FLOWCELL999",
+            experiment_name="Test Experiment",
+            run_folder_uri=f"{temp_storage}/run_folder_2",
+            status=RunStatus.READY,
+        )
+        session.add(run)
+        session.commit()
+        session.refresh(run)
+        
+        # Create run folder directory
+        run_folder = Path(temp_storage) / "run_folder_2"
+        run_folder.mkdir(parents=True, exist_ok=True)
+        
+        # Create a NON-samplesheet file for the run
+        file_content = b"Some other file content"
+        file_create = FileCreate(
+            filename="metrics.json",
+            description="Test metrics file",
+            file_type=FileType.METRICS,  # NOT a samplesheet
+            entity_type=EntityType.RUN,
+            entity_id=run.barcode,
+            created_by="testuser",
+        )
+        
+        created_file = create_file(
+            session, file_create, file_content, storage_root=temp_storage
+        )
+        
+        # Verify file was created in database storage
+        db_file_path = Path(temp_storage) / created_file.file_path
+        assert db_file_path.exists()
+        
+        # Verify file was NOT saved to run folder
+        run_folder_file = run_folder / "SampleSheet.csv"
+        assert not run_folder_file.exists()
+        
+        # Verify description does NOT contain dual storage note
+        assert "[Dual-stored to run folder]" not in (created_file.description or "")
+        assert "[Database-only storage" not in (created_file.description or "")
+
+    def test_samplesheet_for_project_not_dual_stored(self, session: Session, temp_storage):
+        """Test that samplesheets for projects are not dual-stored"""
+        # Create a samplesheet for a PROJECT (not a run)
+        samplesheet_content = b"Sample samplesheet content"
+        file_create = FileCreate(
+            filename="SampleSheet.csv",
+            description="Project samplesheet",
+            file_type=FileType.SAMPLESHEET,
+            entity_type=EntityType.PROJECT,  # PROJECT, not RUN
+            entity_id="PROJ001",
+            created_by="testuser",
+        )
+        
+        created_file = create_file(
+            session, file_create, samplesheet_content, storage_root=temp_storage
+        )
+        
+        # Verify file was created in database storage
+        db_file_path = Path(temp_storage) / created_file.file_path
+        assert db_file_path.exists()
+        
+        # Verify description does NOT contain dual storage note
+        assert "[Dual-stored to run folder]" not in (created_file.description or "")
+        assert "[Database-only storage" not in (created_file.description or "")
+
 
 class TestFileBrowserModels:
     """Test file browser model functionality"""

From 139ddfe993d4451374723d1a1191c47204ccc89f Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 30 Sep 2025 11:47:04 -0400
Subject: [PATCH 32/35] Black

---
 api/files/services.py   | 34 +++++++++--------
 tests/api/test_files.py | 85 ++++++++++++++++++++++++-----------------
 2 files changed, 69 insertions(+), 50 deletions(-)

diff --git a/api/files/services.py b/api/files/services.py
index 8e33a02..e4d015c 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -74,7 +74,7 @@ def get_mime_type(filename: str) -> str:
 def _is_valid_storage_path(path: str) -> bool:
     """Validate storage path format"""
     # Allow S3 paths, local paths, network paths
-    valid_prefixes = ['s3://', '/', 'file://', 'smb://', 'ftp://']
+    valid_prefixes = ["s3://", "/", "file://", "smb://", "ftp://"]
     return any(path.startswith(prefix) for prefix in valid_prefixes)
 
 
@@ -86,32 +86,34 @@ def _save_samplesheet_to_run_folder(
     Returns True if successful, False otherwise.
     """
     from smart_open import open as smart_open
-    
+
     try:
         # Import here to avoid circular imports
         from api.runs.services import get_run
-        
+
         # Get run information
         run = get_run(session, run_barcode)
         if not run or not run.run_folder_uri:
             logging.warning(f"No run folder URI found for run {run_barcode}")
             return False
-        
+
         # Construct samplesheet path - always use SampleSheet.csv as the standard name
         samplesheet_path = f"{run.run_folder_uri.rstrip('/')}/SampleSheet.csv"
-        
+
         # Validate path format
         if not _is_valid_storage_path(samplesheet_path):
             logging.warning(f"Invalid storage path format: {samplesheet_path}")
             return False
-        
+
         # Save using smart_open (handles S3, local, network paths)
-        with smart_open(samplesheet_path, 'wb') as f:
+        with smart_open(samplesheet_path, "wb") as f:
             f.write(file_content)
-        
-        logging.info(f"Successfully saved samplesheet to run folder: {samplesheet_path}")
+
+        logging.info(
+            f"Successfully saved samplesheet to run folder: {samplesheet_path}"
+        )
         return True
-        
+
     except Exception as e:
         # Log error but don't fail the upload
         logging.error(f"Failed to save samplesheet to run folder {run_barcode}: {e}")
@@ -172,8 +174,10 @@ def create_file(
             f.write(file_content)
 
         # 2. SPECIAL HANDLING: If samplesheet for run, also save to run folder
-        if (file_create.file_type == FileType.SAMPLESHEET and
-                file_create.entity_type == EntityType.RUN):
+        if (
+            file_create.file_type == FileType.SAMPLESHEET
+            and file_create.entity_type == EntityType.RUN
+        ):
             dual_storage_success = _save_samplesheet_to_run_folder(
                 session, file_create.entity_id, file_content
             )
@@ -182,7 +186,7 @@ def create_file(
                 status_note = "[Dual-stored to run folder]"
             else:
                 status_note = "[Database-only storage - run folder write failed]"
-            
+
             if file_record.description:
                 file_record.description = f"{file_record.description} {status_note}"
             else:
@@ -561,7 +565,7 @@ def browse_s3(s3_path: str) -> FileBrowserData:
             for common_prefix in page.get("CommonPrefixes", []):
                 folder_prefix = common_prefix["Prefix"]
                 # Remove the current prefix to get just the folder name
-                folder_name = folder_prefix[len(prefix):].rstrip("/")
+                folder_name = folder_prefix[len(prefix) :].rstrip("/")
                 if folder_name:  # Skip empty names
                     folders.append(
                         FileBrowserFolder(
@@ -578,7 +582,7 @@ def browse_s3(s3_path: str) -> FileBrowserData:
                     continue
 
                 # Remove the current prefix to get just the file name
-                file_name = key[len(prefix):]
+                file_name = key[len(prefix) :]
 
                 # Skip files that are in subdirectories (contain '/')
                 if "/" in file_name:
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index 5140f02..8bebbf9 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -812,6 +812,7 @@ def test_samplesheet_dual_storage_success(self, session: Session, temp_storage):
         """Test that samplesheets are saved to both database storage and run folder"""
         from api.runs.models import SequencingRun, RunStatus
         from datetime import date
+
         # Create a test run with run_folder_uri
         run = SequencingRun(
             run_date=date(2019, 1, 10),
@@ -825,11 +826,11 @@ def test_samplesheet_dual_storage_success(self, session: Session, temp_storage):
         session.add(run)
         session.commit()
         session.refresh(run)
-        
+
         # Create run folder directory
         run_folder = Path(temp_storage) / "run_folder"
         run_folder.mkdir(parents=True, exist_ok=True)
-        
+
         # Create samplesheet content
         samplesheet_content = b"""[Header]
 IEMFileVersion,4
@@ -839,7 +840,7 @@ def test_samplesheet_dual_storage_success(self, session: Session, temp_storage):
 Sample_ID,Sample_Name,index
 Sample1,Sample1,ATCG
 """
-        
+
         # Create samplesheet file
         file_create = FileCreate(
             filename="SampleSheet.csv",
@@ -849,29 +850,31 @@ def test_samplesheet_dual_storage_success(self, session: Session, temp_storage):
             entity_id=run.barcode,
             created_by="testuser",
         )
-        
+
         created_file = create_file(
             session, file_create, samplesheet_content, storage_root=temp_storage
         )
-        
+
         # Verify file was created in database storage
         db_file_path = Path(temp_storage) / created_file.file_path
         assert db_file_path.exists()
         assert db_file_path.read_bytes() == samplesheet_content
-        
+
         # Verify file was also saved to run folder
         run_folder_samplesheet = run_folder / "SampleSheet.csv"
         assert run_folder_samplesheet.exists()
         assert run_folder_samplesheet.read_bytes() == samplesheet_content
-        
+
         # Verify description indicates dual storage success
         assert "[Dual-stored to run folder]" in created_file.description
 
-    def test_samplesheet_dual_storage_no_run_folder(self, session: Session, temp_storage):
+    def test_samplesheet_dual_storage_no_run_folder(
+        self, session: Session, temp_storage
+    ):
         """Test samplesheet upload when run has no run_folder_uri"""
         from api.runs.models import SequencingRun, RunStatus
         from datetime import date
-        
+
         # Create a test run WITHOUT run_folder_uri
         run = SequencingRun(
             run_date=date(2019, 1, 10),
@@ -885,9 +888,9 @@ def test_samplesheet_dual_storage_no_run_folder(self, session: Session, temp_sto
         session.add(run)
         session.commit()
         session.refresh(run)
-        
+
         samplesheet_content = b"Sample samplesheet content"
-        
+
         # Create samplesheet file
         file_create = FileCreate(
             filename="SampleSheet.csv",
@@ -897,23 +900,28 @@ def test_samplesheet_dual_storage_no_run_folder(self, session: Session, temp_sto
             entity_id=run.barcode,
             created_by="testuser",
         )
-        
+
         created_file = create_file(
             session, file_create, samplesheet_content, storage_root=temp_storage
         )
-        
+
         # Verify file was created in database storage
         db_file_path = Path(temp_storage) / created_file.file_path
         assert db_file_path.exists()
-        
+
         # Verify description indicates database-only storage
-        assert "[Database-only storage - run folder write failed]" in created_file.description
+        assert (
+            "[Database-only storage - run folder write failed]"
+            in created_file.description
+        )
 
-    def test_samplesheet_dual_storage_invalid_path(self, session: Session, temp_storage):
+    def test_samplesheet_dual_storage_invalid_path(
+        self, session: Session, temp_storage
+    ):
         """Test samplesheet upload when run_folder_uri has invalid format"""
         from api.runs.models import SequencingRun, RunStatus
         from datetime import date
-        
+
         # Create a test run with invalid run_folder_uri
         run = SequencingRun(
             run_date=date(2019, 1, 10),
@@ -927,9 +935,9 @@ def test_samplesheet_dual_storage_invalid_path(self, session: Session, temp_stor
         session.add(run)
         session.commit()
         session.refresh(run)
-        
+
         samplesheet_content = b"Sample samplesheet content"
-        
+
         # Create samplesheet file
         file_create = FileCreate(
             filename="SampleSheet.csv",
@@ -939,23 +947,28 @@ def test_samplesheet_dual_storage_invalid_path(self, session: Session, temp_stor
             entity_id=run.barcode,
             created_by="testuser",
         )
-        
+
         created_file = create_file(
             session, file_create, samplesheet_content, storage_root=temp_storage
         )
-        
+
         # Verify file was created in database storage
         db_file_path = Path(temp_storage) / created_file.file_path
         assert db_file_path.exists()
-        
+
         # Verify description indicates database-only storage
-        assert "[Database-only storage - run folder write failed]" in created_file.description
+        assert (
+            "[Database-only storage - run folder write failed]"
+            in created_file.description
+        )
 
-    def test_non_samplesheet_files_not_dual_stored(self, session: Session, temp_storage):
+    def test_non_samplesheet_files_not_dual_stored(
+        self, session: Session, temp_storage
+    ):
         """Test that non-samplesheet files are not dual-stored"""
         from api.runs.models import SequencingRun, RunStatus
         from datetime import date
-        
+
         # Create a test run with run_folder_uri
         run = SequencingRun(
             run_date=date(2019, 1, 10),
@@ -969,11 +982,11 @@ def test_non_samplesheet_files_not_dual_stored(self, session: Session, temp_stor
         session.add(run)
         session.commit()
         session.refresh(run)
-        
+
         # Create run folder directory
         run_folder = Path(temp_storage) / "run_folder_2"
         run_folder.mkdir(parents=True, exist_ok=True)
-        
+
         # Create a NON-samplesheet file for the run
         file_content = b"Some other file content"
         file_create = FileCreate(
@@ -984,24 +997,26 @@ def test_non_samplesheet_files_not_dual_stored(self, session: Session, temp_stor
             entity_id=run.barcode,
             created_by="testuser",
         )
-        
+
         created_file = create_file(
             session, file_create, file_content, storage_root=temp_storage
         )
-        
+
         # Verify file was created in database storage
         db_file_path = Path(temp_storage) / created_file.file_path
         assert db_file_path.exists()
-        
+
         # Verify file was NOT saved to run folder
         run_folder_file = run_folder / "SampleSheet.csv"
         assert not run_folder_file.exists()
-        
+
         # Verify description does NOT contain dual storage note
         assert "[Dual-stored to run folder]" not in (created_file.description or "")
         assert "[Database-only storage" not in (created_file.description or "")
 
-    def test_samplesheet_for_project_not_dual_stored(self, session: Session, temp_storage):
+    def test_samplesheet_for_project_not_dual_stored(
+        self, session: Session, temp_storage
+    ):
         """Test that samplesheets for projects are not dual-stored"""
         # Create a samplesheet for a PROJECT (not a run)
         samplesheet_content = b"Sample samplesheet content"
@@ -1013,15 +1028,15 @@ def test_samplesheet_for_project_not_dual_stored(self, session: Session, temp_st
             entity_id="PROJ001",
             created_by="testuser",
         )
-        
+
         created_file = create_file(
             session, file_create, samplesheet_content, storage_root=temp_storage
         )
-        
+
         # Verify file was created in database storage
         db_file_path = Path(temp_storage) / created_file.file_path
         assert db_file_path.exists()
-        
+
         # Verify description does NOT contain dual storage note
         assert "[Dual-stored to run folder]" not in (created_file.description or "")
         assert "[Database-only storage" not in (created_file.description or "")

From 9a25b90af4e40fe9d3f9961e0c2b779ab19fbe1f Mon Sep 17 00:00:00 2001
From: Ryan Golhar <golharam@Ryans-iMac.local>
Date: Tue, 30 Sep 2025 11:48:19 -0400
Subject: [PATCH 33/35] Remove whitespace

---
 api/files/services.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/api/files/services.py b/api/files/services.py
index e4d015c..825a138 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -565,7 +565,7 @@ def browse_s3(s3_path: str) -> FileBrowserData:
             for common_prefix in page.get("CommonPrefixes", []):
                 folder_prefix = common_prefix["Prefix"]
                 # Remove the current prefix to get just the folder name
-                folder_name = folder_prefix[len(prefix) :].rstrip("/")
+                folder_name = folder_prefix[len(prefix):].rstrip("/")
                 if folder_name:  # Skip empty names
                     folders.append(
                         FileBrowserFolder(
@@ -582,7 +582,7 @@ def browse_s3(s3_path: str) -> FileBrowserData:
                     continue
 
                 # Remove the current prefix to get just the file name
-                file_name = key[len(prefix) :]
+                file_name = key[len(prefix):]
 
                 # Skip files that are in subdirectories (contain '/')
                 if "/" in file_name:

From 6e5f361b555fa8df7cf9d28553bf5dc15c194521 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <ryan.golhar@bms.com>
Date: Thu, 2 Oct 2025 09:33:18 -0400
Subject: [PATCH 34/35] Remove extraneous function

---
 api/files/services.py   | 7 +------
 tests/api/test_files.py | 9 +--------
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/api/files/services.py b/api/files/services.py
index 825a138..9871a75 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -438,7 +438,7 @@ def browse_filesystem(
     and routes to the appropriate handler.
     """
     # Check if this is an S3 path
-    if _is_s3_path(directory_path):
+    if directory_path.startswith("s3://"):
         return browse_s3(directory_path)
 
     # Handle local filesystem
@@ -500,11 +500,6 @@ def _browse_local_filesystem(
     return FileBrowserData(folders=folders, files=files)
 
 
-def _is_s3_path(path: str) -> bool:
-    """Check if a path is an S3 URI (s3://bucket/key)"""
-    return path.startswith("s3://")
-
-
 def _parse_s3_path(s3_path: str) -> tuple[str, str]:
     """Parse S3 path into bucket and prefix"""
     if not s3_path.startswith("s3://"):
diff --git a/tests/api/test_files.py b/tests/api/test_files.py
index 8bebbf9..5eb5926 100644
--- a/tests/api/test_files.py
+++ b/tests/api/test_files.py
@@ -1179,14 +1179,7 @@ def test_browse_filesystem_endpoint(self, client: TestClient):
 
     def test_s3_path_detection(self):
         """Test S3 path detection functionality"""
-        from api.files.services import _is_s3_path, _parse_s3_path
-
-        # Test S3 path detection
-        assert _is_s3_path("s3://bucket/key") is True
-        assert _is_s3_path("s3://bucket") is True
-        assert _is_s3_path("/local/path") is False
-        assert _is_s3_path("local/path") is False
-        assert _is_s3_path("") is False
+        from api.files.services import _parse_s3_path
 
         # Test S3 path parsing
         bucket, key = _parse_s3_path("s3://my-bucket/path/to/folder/")

From 581e260414170ee71f5c40762b8a3b76f4acf996 Mon Sep 17 00:00:00 2001
From: Ryan Golhar <ryan.golhar@bms.com>
Date: Fri, 9 Jan 2026 10:55:58 -0500
Subject: [PATCH 35/35] Add S3 support to create_file

---
 api/files/routes.py   |  25 +++-
 api/files/services.py | 279 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 253 insertions(+), 51 deletions(-)

diff --git a/api/files/routes.py b/api/files/routes.py
index 52d4e80..de7d702 100644
--- a/api/files/routes.py
+++ b/api/files/routes.py
@@ -48,10 +48,14 @@ def create_file(
     is_public: bool = Form(False),
     created_by: Optional[str] = Form(None),
     content: Optional[UploadFile] = FastAPIFile(None),
+    s3_client=Depends(get_s3_client),
 ) -> FilePublic:
     """
     Create a new file record with optional file content upload.
 
+    Storage backend (S3 vs Local) is automatically determined based on
+    configuration and entity type.
+
     - **filename**: Name of the file
     - **description**: Optional description of the file
     - **file_type**: Type of file (fastq, bam, vcf, etc.)
@@ -75,7 +79,7 @@ def create_file(
     if content and content.filename:
         file_content = content.file.read()
 
-    return services.create_file(session, file_in, file_content)
+    return services.create_file(session, file_in, file_content, s3_client=s3_client)
 
 
 @router.get(
@@ -190,14 +194,16 @@ def update_file(
 @router.delete(
     "/{file_id}", status_code=status.HTTP_204_NO_CONTENT, summary="Delete file"
 )
-def delete_file(session: SessionDep, file_id: str) -> None:
+def delete_file(
+    session: SessionDep, file_id: str, s3_client=Depends(get_s3_client)
+) -> None:
     """
-    Delete a file and its content.
+    Delete a file and its content from storage (local or S3).
 
     - **file_id**: The unique file identifier
     """
     try:
-        success = services.delete_file(session, file_id)
+        success = services.delete_file(session, file_id, s3_client=s3_client)
         if not success:
             raise HTTPException(
                 status_code=status.HTTP_404_NOT_FOUND,
@@ -214,17 +220,24 @@ def delete_file(session: SessionDep, file_id: str) -> None:
     "/{file_id}/content", response_model=FilePublic, summary="Upload file content"
 )
 def upload_file_content(
-    session: SessionDep, file_id: str, content: UploadFile = FastAPIFile(...)
+    session: SessionDep,
+    file_id: str,
+    content: UploadFile = FastAPIFile(...),
+    s3_client=Depends(get_s3_client),
 ) -> FilePublic:
     """
     Upload content for an existing file record.
+    
+    Updates file in appropriate storage backend (S3 or local).
 
     - **file_id**: The unique file identifier
     - **content**: The file content to upload
     """
     try:
         file_content = content.file.read()
-        return services.update_file_content(session, file_id, file_content)
+        return services.update_file_content(
+            session, file_id, file_content, s3_client=s3_client
+        )
     except Exception as exc:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
diff --git a/api/files/services.py b/api/files/services.py
index eb04e6a..6f9c265 100644
--- a/api/files/services.py
+++ b/api/files/services.py
@@ -70,6 +70,110 @@ def get_mime_type(filename: str) -> str:
     return mime_type or "application/octet-stream"
 
 
+def determine_storage_backend(
+    entity_type: EntityType, entity_id: str
+) -> tuple[StorageBackend, str]:
+    """
+    Determine storage backend and base URI from configuration and entity type.
+    
+    Args:
+        entity_type: The entity type (PROJECT or RUN)
+        entity_id: The entity identifier
+        
+    Returns:
+        Tuple of (StorageBackend, base_uri)
+    """
+    from core.config import get_settings
+    
+    settings = get_settings()
+    
+    # Check if S3 is configured based on entity type
+    if entity_type == EntityType.PROJECT:
+        if settings.DATA_BUCKET_URI.startswith("s3://"):
+            return StorageBackend.S3, settings.DATA_BUCKET_URI
+    elif entity_type == EntityType.RUN:
+        if settings.RESULTS_BUCKET_URI.startswith("s3://"):
+            return StorageBackend.S3, settings.RESULTS_BUCKET_URI
+    
+    # Fallback to local storage
+    return StorageBackend.LOCAL, "storage"
+
+
+def _upload_to_s3(
+    file_content: bytes, s3_uri: str, mime_type: str, s3_client=None
+) -> bool:
+    """
+    Upload file content to S3.
+    
+    Args:
+        file_content: File bytes to upload
+        s3_uri: Full S3 URI (s3://bucket/key)
+        mime_type: Content type for S3 metadata
+        s3_client: Optional boto3 S3 client
+        
+    Returns:
+        True if successful
+        
+    Raises:
+        HTTPException: If upload fails
+    """
+    if not BOTO3_AVAILABLE:
+        raise HTTPException(
+            status_code=status.HTTP_501_NOT_IMPLEMENTED,
+            detail="boto3 not available for S3 uploads. Install boto3 to enable S3 storage.",
+        )
+    
+    try:
+        bucket, key = _parse_s3_path(s3_uri)
+        
+        if s3_client is None:
+            s3_client = boto3.client("s3")
+        
+        s3_client.put_object(
+            Bucket=bucket,
+            Key=key,
+            Body=file_content,
+            ContentType=mime_type,
+            Metadata={
+                "uploaded-by": "ngs360-api",
+                "upload-timestamp": datetime.utcnow().isoformat(),
+            },
+        )
+        
+        logging.info(f"Successfully uploaded file to S3: {s3_uri}")
+        return True
+        
+    except NoCredentialsError as exc:
+        logging.error(f"AWS credentials not configured: {exc}")
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="AWS credentials not found. Please configure AWS credentials.",
+        ) from exc
+    except ClientError as exc:
+        error_code = exc.response["Error"]["Code"]
+        if error_code == "NoSuchBucket":
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"S3 bucket not found in URI: {s3_uri}",
+            ) from exc
+        elif error_code == "AccessDenied":
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail=f"Access denied to S3 bucket: {s3_uri}",
+            ) from exc
+        else:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=f"S3 error: {exc.response['Error']['Message']}",
+            ) from exc
+    except Exception as exc:
+        logging.error(f"Failed to upload to S3: {exc}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Unexpected error uploading to S3: {str(exc)}",
+        ) from exc
+
+
 def _is_valid_storage_path(path: str) -> bool:
     """Validate storage path format"""
     # Allow S3 paths, local paths, network paths
@@ -124,8 +228,14 @@ def create_file(
     file_create: FileCreate,
     file_content: bytes | None = None,
     storage_root: str = "storage",
+    s3_client=None,
 ) -> File:
-    """Create a new file record and optionally store content"""
+    """
+    Create a new file record and store content to appropriate backend.
+    
+    Automatically determines storage backend (S3 vs Local) based on
+    configuration and entity type.
+    """
 
     # Generate unique file ID
     file_id = generate_file_id()
@@ -133,14 +243,27 @@ def create_file(
     # Use original_filename if provided, otherwise use filename
     original_filename = file_create.original_filename or file_create.filename
 
-    # Generate file path
-    file_path = generate_file_path(
+    # Determine storage backend automatically from configuration
+    storage_backend, base_uri = determine_storage_backend(
+        file_create.entity_type, file_create.entity_id
+    )
+
+    # Generate file path structure
+    relative_file_path = generate_file_path(
         file_create.entity_type,
         file_create.entity_id,
         file_create.file_type,
         f"{file_id}_{file_create.filename}",
     )
 
+    # Construct full storage path based on backend
+    if storage_backend == StorageBackend.S3:
+        # S3 URI: s3://bucket/entity_type/entity_id/file_type/year/month/filename
+        storage_path = f"{base_uri.rstrip('/')}/{relative_file_path}"
+    else:
+        # Local filesystem: relative path
+        storage_path = relative_file_path
+
     # Calculate file metadata if content is provided
     file_size = len(file_content) if file_content else None
     checksum = calculate_file_checksum(file_content) if file_content else None
@@ -151,7 +274,7 @@ def create_file(
         file_id=file_id,
         filename=file_create.filename,
         original_filename=original_filename,
-        file_path=file_path,
+        file_path=storage_path,
         file_size=file_size,
         mime_type=mime_type,
         checksum=checksum,
@@ -161,18 +284,28 @@ def create_file(
         entity_type=file_create.entity_type,
         entity_id=file_create.entity_id,
         is_public=file_create.is_public,
-        storage_backend=StorageBackend.LOCAL,
+        storage_backend=storage_backend,
     )
 
-    # Store file content if provided
+    # Store file content based on backend
     if file_content:
-        # 1. Save to standard database storage location
-        full_path = Path(storage_root) / file_path
-        full_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(full_path, "wb") as f:
-            f.write(file_content)
+        if storage_backend == StorageBackend.S3:
+            # Upload to S3
+            _upload_to_s3(file_content, storage_path, mime_type, s3_client)
+            logging.info(
+                f"File {file_id} uploaded to S3: {storage_path}"
+            )
+        else:
+            # Write to local filesystem
+            full_path = Path(storage_root) / relative_file_path
+            full_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(full_path, "wb") as f:
+                f.write(file_content)
+            logging.info(
+                f"File {file_id} saved to local storage: {full_path}"
+            )
 
-        # 2. SPECIAL HANDLING: If samplesheet for run, also save to run folder
+        # SPECIAL HANDLING: If samplesheet for run, also save to run folder
         if (
             file_create.file_type == FileType.SAMPLESHEET
             and file_create.entity_type == EntityType.RUN
@@ -184,10 +317,12 @@ def create_file(
             if dual_storage_success:
                 status_note = "[Dual-stored to run folder]"
             else:
-                status_note = "[Database-only storage - run folder write failed]"
+                status_note = "[Database-only - run folder write failed]"
 
             if file_record.description:
-                file_record.description = f"{file_record.description} {status_note}"
+                file_record.description = (
+                    f"{file_record.description} {status_note}"
+                )
             else:
                 file_record.description = status_note
 
@@ -241,21 +376,41 @@ def update_file(session: Session, file_id: str, file_update: FileUpdate) -> File
     return file_record
 
 
-def delete_file(session: Session, file_id: str, storage_root: str = "storage") -> bool:
-    """Delete a file record and its content"""
+def delete_file(
+    session: Session,
+    file_id: str,
+    storage_root: str = "storage",
+    s3_client=None,
+) -> bool:
+    """Delete a file record and its content from storage (local or S3)"""
     file_record = get_file(session, file_id)
 
-    # Delete physical file if it exists
-    full_path = Path(storage_root) / file_record.file_path
-    if full_path.exists():
-        full_path.unlink()
-
-        # Try to remove empty directories
+    # Delete physical file based on storage backend
+    if file_record.storage_backend == StorageBackend.S3:
+        # Delete from S3
         try:
-            full_path.parent.rmdir()
-        except OSError:
-            # Directory not empty, that's fine
-            pass
+            bucket, key = _parse_s3_path(file_record.file_path)
+            if s3_client is None:
+                s3_client = boto3.client("s3")
+            s3_client.delete_object(Bucket=bucket, Key=key)
+            logging.info(f"Deleted file from S3: {file_record.file_path}")
+        except Exception as e:
+            logging.warning(
+                f"Failed to delete S3 object {file_record.file_path}: {e}"
+            )
+            # Continue with database deletion even if S3 delete fails
+    else:
+        # Delete from local filesystem
+        full_path = Path(storage_root) / file_record.file_path
+        if full_path.exists():
+            full_path.unlink()
+
+            # Try to remove empty directories
+            try:
+                full_path.parent.rmdir()
+            except OSError:
+                # Directory not empty, that's fine
+                pass
 
     # Delete from database
     session.delete(file_record)
@@ -265,20 +420,40 @@ def delete_file(session: Session, file_id: str, storage_root: str = "storage") -
 
 
 def get_file_content(
-    session: Session, file_id: str, storage_root: str = "storage"
+    session: Session,
+    file_id: str,
+    storage_root: str = "storage",
+    s3_client=None,
 ) -> bytes:
-    """Get file content from storage"""
+    """Get file content from storage (local or S3)"""
     file_record = get_file(session, file_id)
 
-    full_path = Path(storage_root) / file_record.file_path
-    if not full_path.exists():
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File content not found at {file_record.file_path}",
-        )
+    if file_record.storage_backend == StorageBackend.S3:
+        # Download from S3
+        try:
+            file_content, _, _ = download_file(
+                file_record.file_path, s3_client
+            )
+            return file_content
+        except HTTPException:
+            raise
+        except Exception as e:
+            logging.error(f"Error downloading from S3: {e}")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=f"Error retrieving file from S3: {str(e)}",
+            ) from e
+    else:
+        # Read from local filesystem
+        full_path = Path(storage_root) / file_record.file_path
+        if not full_path.exists():
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"File content not found at {file_record.file_path}",
+            )
 
-    with open(full_path, "rb") as f:
-        return f.read()
+        with open(full_path, "rb") as f:
+            return f.read()
 
 
 def list_files_for_entity(
@@ -313,9 +488,13 @@ def get_file_count_for_entity(
 
 
 def update_file_content(
-    session: Session, file_id: str, content: bytes, storage_root: str = "storage"
+    session: Session,
+    file_id: str,
+    content: bytes,
+    storage_root: str = "storage",
+    s3_client=None,
 ) -> File:
-    """Update file content"""
+    """Update file content (local or S3)"""
     # Get the file record
     file_record = get_file(session, file_id)
 
@@ -323,10 +502,20 @@ def update_file_content(
     file_size = len(content)
     checksum = calculate_file_checksum(content)
 
-    # Write content to storage
-    storage_path = Path(storage_root) / file_record.file_path
-    storage_path.parent.mkdir(parents=True, exist_ok=True)
-    storage_path.write_bytes(content)
+    # Write content to appropriate storage backend
+    if file_record.storage_backend == StorageBackend.S3:
+        # Upload to S3
+        mime_type = get_mime_type(file_record.filename)
+        _upload_to_s3(content, file_record.file_path, mime_type, s3_client)
+        logging.info(f"Updated file content in S3: {file_record.file_path}")
+    else:
+        # Write to local filesystem
+        storage_path = Path(storage_root) / file_record.file_path
+        storage_path.parent.mkdir(parents=True, exist_ok=True)
+        storage_path.write_bytes(content)
+        logging.info(
+            f"Updated file content in local storage: {storage_path}"
+        )
 
     # Update file record
     file_record.file_size = file_size
@@ -345,12 +534,12 @@ def browse_filesystem(
     """
     Browse filesystem directory and return structured data.
 
-    Automatically detects if the path is S3 (s3://bucket/key) or local filesystem
-    and routes to the appropriate handler.
+    Automatically detects if the path is S3 (s3://bucket/key) or local
+    filesystem and routes to the appropriate handler.
     """
     # Check if this is an S3 path
     if directory_path.startswith("s3://"):
-        return browse_s3(directory_path)
+        return _list_s3(directory_path)
 
     # Handle local filesystem
     return _browse_local_filesystem(directory_path, storage_root)