diff --git a/.claude/ralph-loop.local.md b/.claude/ralph-loop.local.md
index a9032f0..f78b679 100644
--- a/.claude/ralph-loop.local.md
+++ b/.claude/ralph-loop.local.md
@@ -1,108 +1,225 @@
-ultrathink: Pattern Detection Algorithm Perfection Loop
+ultrathink: NHTSA Full Historical Data Sync System - TDD Implementation
# Ralph Loop Configuration
-iteration: 1
+iteration: 0
max_iterations: 500
-completion_promise: PATTERN_DETECTION_ALGORITHM_PERFECTED
+completion_promise: NHTSA_FULL_SYNC_COMPLETE
## Objective
-Iteratively perfect the pattern detection algorithm through systematic EDA, auditing, and refinement.
-
-## Core Principles
-1. **DO NOT force patterns where they don't exist** - If data doesn't support a pattern, accept it
-2. **Take notes at each iteration** - Create `docs/research/pattern-audit-iteration-N.md` for each iteration
-3. **Reference previous notes** - Check for regression or circular issues
-4. **Run algorithm after each change** - Always verify changes with fresh detection run
-5. **Be scientific** - Form hypotheses, test them, document results
-
-## Quality Metrics for Completion
-- [x] Zero cross-make contamination (patterns only contain complaints from their make)
-- [x] Zero cross-model contamination (patterns only contain complaints from their model)
-- [x] Zero duplicate pattern groups (no multiple patterns for same make/model/component)
-- [x] All deaths linked to patterns (0 deaths in noise)
-- [x] All injuries linked to patterns (0 injuries in noise)
-- [x] All crashes linked to patterns (0 crashes in noise)
-- [x] All fires linked to patterns OR explained by data limitations (2 fires from F-150 with only 3 complaints)
-- [x] Severity metrics properly aggregated (deathCount, injuryCount, crashCount, fireCount)
-- [x] Trend detection working (not 100% STABLE) - 22% INCREASING, 7% DECREASING
-- [x] All tests pass (1268 tests)
-- [x] Pattern names are meaningful and descriptive
-- [x] No regression from previous iterations
+Implement a comprehensive NHTSA data fetching system that:
+1. Fetches ALL 2.1+ million historical complaints from NHTSA
+2. Keeps the database continuously updated with fresh data
+3. Is thoroughly tested with both programmatic and browser tests
+
+## Critical Context
+- **Current state**: Only ~17,000 complaints in database
+- **Target state**: 2,165,676+ complaints (full NHTSA history since 1995)
+- **Data source**: NHTSA flat file at https://static.nhtsa.gov/odi/ffdd/cmpl/FLAT_CMPL.zip (~1.5GB)
+- **Secondary source**: SODA API at https://data.transportation.gov/resource/jhit-z9cc.json (for incremental syncs)
+- **Worktree**: /Users/user/Documents/Muhsinun/Projects/GitHub/CaseRadar/CaseRadar-nhtsa-full-sync
+- **Branch**: feature/nhtsa-full-historical-sync
+
+## TDD Approach - MANDATORY
+This implementation MUST follow Test-Driven Development:
+1. **Red**: Write failing tests FIRST (before any implementation)
+2. **Green**: Write minimal code to make tests pass
+3. **Refactor**: Clean up code while keeping tests green
+
+### Test Categories Required
+1. **Unit Tests** (Vitest) - `npm run test`
+ - Flat file parser tests
+ - Incremental sync logic tests
+ - Data validation tests
+ - Rate limiting tests
+ - Error handling tests
+
+2. **Integration Tests** (Vitest)
+ - Database insertion tests
+ - Deduplication tests
+ - Batch processing tests
+
+3. **E2E Tests** (Playwright) - `npm run test:e2e`
+ - Admin dashboard shows sync status
+ - Manual sync trigger works
+ - Complaint count updates after sync
+ - Sync progress is visible
+
+## Implementation Requirements
+
+### Phase 1: Historical Bulk Import (Flat File)
+Create a new system to import ALL historical data from NHTSA's flat file:
+
+1. **Flat File Downloader** (`src/lib/nhtsa/flat-file-downloader.ts`)
+ - Download https://static.nhtsa.gov/odi/ffdd/cmpl/FLAT_CMPL.zip
+ - Stream download (don't load full 1.5GB into memory)
+ - Store in temp directory
+ - Validate file integrity
+
+2. **Flat File Parser** (`src/lib/nhtsa/flat-file-parser.ts`)
+ - Parse tab-delimited format
+ - Stream processing (handle 2.1M+ records)
+ - Field mapping to our schema
+ - Data validation and sanitization
+
+3. **Bulk Import Service** (`src/lib/nhtsa/bulk-import.ts`)
+ - Batch inserts (1000 records at a time)
+ - Progress tracking
+ - Resume capability (track last imported record)
+ - Embedding generation (optional, can be backfilled)
+
+4. **Import API Endpoint** (`src/app/api/nhtsa/import/route.ts`)
+ - POST /api/nhtsa/import - trigger bulk import
+ - GET /api/nhtsa/import/status - get import progress
+ - Protected by admin authentication
+
+### Phase 2: Incremental Sync (Keep Database Fresh)
+Enhance existing sync to catch new complaints:
+
+1. **Enhanced Sync Service** (modify `src/lib/nhtsa/sync.ts`)
+ - Compare local vs remote count
+ - Fetch only new records (by date)
+ - Handle API rate limits gracefully
+ - Retry logic for failures
+
+2. **Scheduled Sync** (modify `src/app/api/cron/sync-nhtsa/route.ts`)
+ - Run every 6 hours (existing)
+ - Track sync history
+ - Alert on failures
+
+### Phase 3: Monitoring & Admin UI
+1. **Sync Dashboard Component** (`src/components/admin/sync-dashboard.tsx`)
+ - Total complaints count
+ - Last sync time
+ - Sync history
+ - Manual trigger button
+ - Progress bar for bulk imports
+
+2. **Sync Status API** (`src/app/api/nhtsa/sync/status/route.ts`)
+ - GET endpoint for sync statistics
+ - Import progress if running
+
+## Flat File Format Reference
+The NHTSA FLAT_CMPL.txt is TAB-delimited with these fields (in order):
+1. CMPLID - Unique complaint ID (maps to nhtsaId)
+2. ODESSION - ODI investigation number
+3. MFR_NAME - Manufacturer name
+4. MAKETXT - Vehicle make
+5. MODELTXT - Vehicle model
+6. YEARTXT - Model year
+7. CRASH - Y/N
+8. FAILDATE - Date of failure (YYYYMMDD)
+9. FIRE - Y/N
+10. INJURED - Number injured
+11. DEATHS - Number of deaths
+12. COMPDESC - Component description
+13. CITY - City
+14. STATE - State
+15. VIN - Partial VIN
+16. DATEA - Date added (YYYYMMDD)
+17. LDATE - Last update date
+18. CDESCR - Complaint description (the main text)
+... (additional fields for internal NHTSA use)
+
+## Success Criteria
+- [ ] All unit tests pass (`npm run test`)
+- [ ] All E2E tests pass (`npm run test:e2e`)
+- [ ] Bulk import completes for full flat file
+- [ ] Database contains 2M+ complaints after import
+- [ ] Incremental sync works (fetches new complaints)
+- [ ] Admin can monitor sync status in UI
+- [ ] No memory issues during import (streaming)
+- [ ] Import can be resumed if interrupted
+- [ ] Rate limiting respects NHTSA API limits
## Iteration Log
+Track progress here after each iteration.
-### Iteration 0 (Baseline)
-- **EDA Results:**
- - 54 patterns, 92.6% complaint coverage
- - 0 deaths/injuries/crashes in noise
- - 2 fires in noise (F-150 data limitation)
- - All tests pass (1268)
-- **Issues Found:**
- - Catch-all pattern problem: 87% of patterns have named component <50% of complaints
-- **Status:** Documented as characteristic, not bug
-
-### Iteration 1 (Current)
-- **Investigation:** Deep analysis of catch-all pattern issue
-- **Root Cause:** BERTopic clusters by semantic similarity, not component
-- **Decision:** ACCEPT AS CHARACTERISTIC
- - Semantic clustering is working correctly
- - Pattern names are plurality-based (most common component)
- - Patterns represent semantic clusters, not component-specific groups
-- **Rationale:**
- 1. Clustering is finding semantically similar complaints (working correctly)
- 2. Forcing component-specificity might create artificial patterns
- 3. 92.6% coverage is excellent - don't want to reduce it
- 4. User warned: "Be careful not to force patterns where they don't exist"
-- **Tests:** All 1268 pass
-- **Status:** No code changes needed
-
-## Known Characteristics (NOT Bugs)
-
-### 1. Patterns are Semantic Clusters, Not Component-Specific
-- BERTopic groups semantically similar complaint descriptions
-- Pattern named after plurality (most common) component
-- A pattern may contain complaints from multiple components
-- Example: "TESLA MODEL 3 STEERING Issues" has 16% STEERING, 13% FCA, 12% UNKNOWN, etc.
-- **This is by design** - semantic similarity detects "something is wrong with this vehicle"
-
-### 2. FORD F-150 Fire Complaints in Noise
-- 2 fire complaints unlinked
-- Root cause: Only 3 total F-150 complaints in database (insufficient for pattern)
-- Data limitation, not algorithm issue
-
-### 3. 7.4% Noise Rate
-- 1,298 complaints not assigned to patterns
-- All are non-severe (no deaths/injuries/crashes in noise)
-- BERTopic correctly identifies outliers
+### Iteration 0 (Initial)
+- Status: Starting TDD implementation
+- Next: Write failing tests for flat file parser
## Anti-Patterns to Avoid
-1. Creating patterns for vehicles with too few complaints (<15)
-2. Forcing patterns to match expected outcomes
-3. Ignoring edge cases that don't fit the model
-4. Making changes without understanding root cause
-5. Skipping the audit step after changes
-6. **NEW:** Trying to force component-specific clustering when semantic clustering is valid
-
-## Algorithm State Summary
-
-The pattern detection algorithm is **production-ready** with the following characteristics:
-
-| Metric | Value | Status |
-|--------|-------|--------|
-| Total patterns | 54 | ✅ |
-| Complaint coverage | 92.6% | ✅ |
-| Deaths in noise | 0 | ✅ |
-| Injuries in noise | 0 | ✅ |
-| Crashes in noise | 0 | ✅ |
-| Fires in noise | 2 | ✅ (data limitation) |
-| Cross-make contamination | 0% | ✅ |
-| Cross-model contamination | 0% | ✅ |
-| Duplicate groups | 0 | ✅ |
-| Tests passing | 1268 | ✅ |
-| Trend detection | Working | ✅ |
-
-## Completion Status
-
-All quality metrics pass. The pattern detection algorithm has been audited and is functioning correctly.
-
-The "catch-all pattern" observation is a characteristic of semantic clustering, not a bug. Patterns should be understood as groupings of semantically similar complaints, named after the most common component.
+1. Writing implementation before tests
+2. Loading entire 1.5GB file into memory
+3. Ignoring rate limits on NHTSA APIs
+4. Not handling duplicates properly
+5. Skipping browser/E2E tests
+6. Making database schema changes without migrations
+7. Not tracking import progress for resume capability
+
+## Commands Reference
+```bash
+# Navigate to worktree
+cd /Users/user/Documents/Muhsinun/Projects/GitHub/CaseRadar/CaseRadar-nhtsa-full-sync
+
+# Install dependencies
+npm install
+
+# Run unit tests
+npm run test
+
+# Run E2E tests
+npm run test:e2e
+
+# Run specific test file
+npm run test -- src/lib/nhtsa/__tests__/flat-file-parser.test.ts
+
+# Run tests in watch mode
+npm run test -- --watch
+
+# Check TypeScript types
+npm run typecheck
+
+# Start dev server (for E2E tests)
+npm run dev
+```
+
+## File Structure to Create
+```
+src/lib/nhtsa/
+├── __tests__/
+│ ├── flat-file-parser.test.ts # Unit tests for parser
+│ ├── flat-file-downloader.test.ts # Unit tests for downloader
+│ ├── bulk-import.test.ts # Unit tests for import service
+│ └── sync.test.ts # Enhanced sync tests
+├── flat-file-parser.ts # Parser implementation
+├── flat-file-downloader.ts # Downloader implementation
+├── bulk-import.ts # Bulk import service
+├── client.ts # (existing)
+├── sync.ts # (enhance existing)
+└── types.ts # (add new types)
+
+src/app/api/nhtsa/
+├── import/
+│ └── route.ts # POST to trigger import
+├── import/status/
+│ └── route.ts # GET import progress
+└── sync/status/
+ └── route.ts # GET sync statistics
+
+e2e/
+└── sync-dashboard.spec.ts # E2E tests for admin sync UI
+
+src/components/admin/
+└── sync-dashboard.tsx # Admin sync dashboard component
+```
+
+## Completion Conditions
+When ALL of the following are true, output `NHTSA_FULL_SYNC_COMPLETE`:
+1. All unit tests pass (new tests for bulk import + existing tests)
+2. All E2E tests pass (including new sync status tests)
+3. Bulk import tested with real flat file
+4. Incremental sync working
+5. Admin UI shows sync status
+6. Documentation updated
+7. No TypeScript errors
+8. Code reviewed and clean
+
+## Browser Testing Requirements
+E2E tests MUST verify:
+1. Navigate to admin sync dashboard
+2. See current complaint count
+3. See last sync timestamp
+4. Trigger manual sync and see progress
+5. See sync complete with updated count
+6. No console errors during operations
diff --git a/e2e/admin-sync.spec.ts b/e2e/admin-sync.spec.ts
new file mode 100644
index 0000000..4188331
--- /dev/null
+++ b/e2e/admin-sync.spec.ts
@@ -0,0 +1,86 @@
+import { test, expect } from '@playwright/test';
+
+test.describe('Admin NHTSA Sync', () => {
+ test.beforeEach(async ({ page }) => {
+ await page.goto('/admin');
+ });
+
+ test('should display the admin page', async ({ page }) => {
+ await expect(page.getByRole('heading', { name: 'Admin' })).toBeVisible();
+ await expect(page.getByText('System administration and NHTSA data sync')).toBeVisible();
+ });
+
+ test('should display the sync dashboard', async ({ page }) => {
+ await expect(page.getByTestId('sync-dashboard')).toBeVisible();
+ await expect(page.getByText('NHTSA Data Sync')).toBeVisible();
+ await expect(page.getByText('Import all 2.1M+ historical NHTSA complaints')).toBeVisible();
+ });
+
+ test('should display the start import button', async ({ page }) => {
+ const startButton = page.getByTestId('start-import-btn');
+ await expect(startButton).toBeVisible();
+ await expect(startButton).toHaveText(/Start Full Import/);
+ });
+
+ test('should display the refresh button', async ({ page }) => {
+ const refreshButton = page.getByTestId('refresh-btn');
+ await expect(refreshButton).toBeVisible();
+ await expect(refreshButton).toHaveText(/Refresh/);
+ });
+
+ test('should display status badge', async ({ page }) => {
+ await expect(page.getByTestId('status-badge')).toBeVisible();
+ });
+
+ test('should display info section about full sync', async ({ page }) => {
+ await expect(page.getByText('About Full Sync')).toBeVisible();
+ await expect(page.getByText(/Downloads the complete NHTSA flat file/)).toBeVisible();
+ await expect(page.getByText(/Contains 2.1M\+ historical complaints/)).toBeVisible();
+ await expect(page.getByText(/Skips existing records to avoid duplicates/)).toBeVisible();
+ await expect(page.getByText(/Estimated time: 1-3 hours/)).toBeVisible();
+ });
+});
+
+test.describe('Admin Sync Interactions', () => {
+ test.beforeEach(async ({ page }) => {
+ await page.goto('/admin');
+ });
+
+ test('should be able to click refresh button', async ({ page }) => {
+ const refreshButton = page.getByTestId('refresh-btn');
+ await expect(refreshButton).toBeEnabled();
+
+ // Click and ensure no errors
+ await refreshButton.click();
+
+ // Dashboard should still be visible
+ await expect(page.getByTestId('sync-dashboard')).toBeVisible();
+ });
+
+ test('should show loading state when starting import', async ({ page }) => {
+ // Note: This test may fail if the API actually starts an import
+ // In a real scenario, you would mock the API
+ const startButton = page.getByTestId('start-import-btn');
+ await expect(startButton).toBeEnabled();
+ });
+});
+
+test.describe('Admin Sync Responsive Layout', () => {
+ test('should display properly on desktop', async ({ page }) => {
+ await page.setViewportSize({ width: 1280, height: 800 });
+ await page.goto('/admin');
+ await expect(page.getByTestId('sync-dashboard')).toBeVisible();
+ });
+
+ test('should display properly on tablet', async ({ page }) => {
+ await page.setViewportSize({ width: 768, height: 1024 });
+ await page.goto('/admin');
+ await expect(page.getByTestId('sync-dashboard')).toBeVisible();
+ });
+
+ test('should display properly on mobile', async ({ page }) => {
+ await page.setViewportSize({ width: 375, height: 667 });
+ await page.goto('/admin');
+ await expect(page.getByTestId('sync-dashboard')).toBeVisible();
+ });
+});
diff --git a/package-lock.json b/package-lock.json
index 2a53fab..7ac3693 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -16,6 +16,7 @@
"@radix-ui/react-dialog": "^1.1.15",
"@radix-ui/react-dropdown-menu": "^2.1.16",
"@radix-ui/react-label": "^2.1.8",
+ "@radix-ui/react-progress": "^1.1.8",
"@radix-ui/react-scroll-area": "^1.2.10",
"@radix-ui/react-select": "^2.2.6",
"@radix-ui/react-separator": "^1.1.8",
@@ -23,6 +24,7 @@
"@radix-ui/react-tabs": "^1.1.13",
"@radix-ui/react-tooltip": "^1.2.8",
"@sentry/nextjs": "^10.32.1",
+ "@types/unzipper": "^0.10.11",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"date-fns": "^4.1.0",
@@ -39,6 +41,7 @@
"stripe": "^20.1.2",
"svix": "^1.84.1",
"tailwind-merge": "^3.4.0",
+ "unzipper": "^0.12.3",
"zod": "^4.3.5"
},
"devDependencies": {
@@ -1104,6 +1107,111 @@
"node": ">= 10"
}
},
+ "node_modules/@next/swc-darwin-x64": {
+ "version": "16.1.1",
+ "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.1.tgz",
+ "integrity": "sha512-hbyKtrDGUkgkyQi1m1IyD3q4I/3m9ngr+V93z4oKHrPcmxwNL5iMWORvLSGAf2YujL+6HxgVvZuCYZfLfb4bGw==",
+ "cpu": [
+ "x64"
+ ],
+ "optional": true,
+ "os": [
+ "darwin"
+ ],
+ "engines": {
+ "node": ">= 10"
+ }
+ },
+ "node_modules/@next/swc-linux-arm64-gnu": {
+ "version": "16.1.1",
+ "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.1.tgz",
+ "integrity": "sha512-/fvHet+EYckFvRLQ0jPHJCUI5/B56+2DpI1xDSvi80r/3Ez+Eaa2Yq4tJcRTaB1kqj/HrYKn8Yplm9bNoMJpwQ==",
+ "cpu": [
+ "arm64"
+ ],
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">= 10"
+ }
+ },
+ "node_modules/@next/swc-linux-arm64-musl": {
+ "version": "16.1.1",
+ "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.1.tgz",
+ "integrity": "sha512-MFHrgL4TXNQbBPzkKKur4Fb5ICEJa87HM7fczFs2+HWblM7mMLdco3dvyTI+QmLBU9xgns/EeeINSZD6Ar+oLg==",
+ "cpu": [
+ "arm64"
+ ],
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">= 10"
+ }
+ },
+ "node_modules/@next/swc-linux-x64-gnu": {
+ "version": "16.1.1",
+ "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.1.tgz",
+ "integrity": "sha512-20bYDfgOQAPUkkKBnyP9PTuHiJGM7HzNBbuqmD0jiFVZ0aOldz+VnJhbxzjcSabYsnNjMPsE0cyzEudpYxsrUQ==",
+ "cpu": [
+ "x64"
+ ],
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">= 10"
+ }
+ },
+ "node_modules/@next/swc-linux-x64-musl": {
+ "version": "16.1.1",
+ "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.1.tgz",
+ "integrity": "sha512-9pRbK3M4asAHQRkwaXwu601oPZHghuSC8IXNENgbBSyImHv/zY4K5udBusgdHkvJ/Tcr96jJwQYOll0qU8+fPA==",
+ "cpu": [
+ "x64"
+ ],
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">= 10"
+ }
+ },
+ "node_modules/@next/swc-win32-arm64-msvc": {
+ "version": "16.1.1",
+ "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.1.tgz",
+ "integrity": "sha512-bdfQkggaLgnmYrFkSQfsHfOhk/mCYmjnrbRCGgkMcoOBZ4n+TRRSLmT/CU5SATzlBJ9TpioUyBW/vWFXTqQRiA==",
+ "cpu": [
+ "arm64"
+ ],
+ "optional": true,
+ "os": [
+ "win32"
+ ],
+ "engines": {
+ "node": ">= 10"
+ }
+ },
+ "node_modules/@next/swc-win32-x64-msvc": {
+ "version": "16.1.1",
+ "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.1.tgz",
+ "integrity": "sha512-Ncwbw2WJ57Al5OX0k4chM68DKhEPlrXBaSXDCi2kPi5f4d8b3ejr3RRJGfKBLrn2YJL5ezNS7w2TZLHSti8CMw==",
+ "cpu": [
+ "x64"
+ ],
+ "optional": true,
+ "os": [
+ "win32"
+ ],
+ "engines": {
+ "node": ">= 10"
+ }
+ },
"node_modules/@nodelib/fs.scandir": {
"version": "2.1.5",
"dev": true,
@@ -2251,6 +2359,68 @@
}
}
},
+ "node_modules/@radix-ui/react-progress": {
+ "version": "1.1.8",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-progress/-/react-progress-1.1.8.tgz",
+ "integrity": "sha512-+gISHcSPUJ7ktBy9RnTqbdKW78bcGke3t6taawyZ71pio1JewwGSJizycs7rLhGTvMJYCQB1DBK4KQsxs7U8dA==",
+ "license": "MIT",
+ "dependencies": {
+ "@radix-ui/react-context": "1.1.3",
+ "@radix-ui/react-primitive": "2.1.4"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@radix-ui/react-progress/node_modules/@radix-ui/react-context": {
+ "version": "1.1.3",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.3.tgz",
+ "integrity": "sha512-ieIFACdMpYfMEjF0rEf5KLvfVyIkOz6PDGyNnP+u+4xQ6jny3VCgA4OgXOwNx2aUkxn8zx9fiVcM8CfFYv9Lxw==",
+ "license": "MIT",
+ "peerDependencies": {
+ "@types/react": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@radix-ui/react-progress/node_modules/@radix-ui/react-primitive": {
+ "version": "2.1.4",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.4.tgz",
+ "integrity": "sha512-9hQc4+GNVtJAIEPEqlYqW5RiYdrr8ea5XQ0ZOnD6fgru+83kqT15mq2OCcbe8KnjRZl5vF3ks69AKz3kh1jrhg==",
+ "license": "MIT",
+ "dependencies": {
+ "@radix-ui/react-slot": "1.2.4"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
"node_modules/@radix-ui/react-roving-focus": {
"version": "1.1.11",
"license": "MIT",
@@ -3438,6 +3608,15 @@
"license": "MIT",
"optional": true
},
+ "node_modules/@types/unzipper": {
+ "version": "0.10.11",
+ "resolved": "https://registry.npmjs.org/@types/unzipper/-/unzipper-0.10.11.tgz",
+ "integrity": "sha512-D25im2zjyMCcgL9ag6N46+wbtJBnXIr7SI4zHf9eJD2Dw2tEB5e+p5MYkrxKIVRscs5QV0EhtU9rgXSPx90oJg==",
+ "license": "MIT",
+ "dependencies": {
+ "@types/node": "*"
+ }
+ },
"node_modules/@typescript-eslint/eslint-plugin": {
"version": "8.53.0",
"dev": true,
@@ -4400,6 +4579,12 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
+ "node_modules/bluebird": {
+ "version": "3.7.2",
+ "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz",
+ "integrity": "sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==",
+ "license": "MIT"
+ },
"node_modules/brace-expansion": {
"version": "1.1.12",
"dev": true,
@@ -4732,6 +4917,12 @@
"url": "https://opencollective.com/core-js"
}
},
+ "node_modules/core-util-is": {
+ "version": "1.0.3",
+ "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
+ "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==",
+ "license": "MIT"
+ },
"node_modules/cross-spawn": {
"version": "7.0.6",
"license": "MIT",
@@ -4992,6 +5183,15 @@
"node": ">= 0.4"
}
},
+ "node_modules/duplexer2": {
+ "version": "0.1.4",
+ "resolved": "https://registry.npmjs.org/duplexer2/-/duplexer2-0.1.4.tgz",
+ "integrity": "sha512-asLFVfWWtJ90ZyOUHMqk7/S2w2guQKxUI2itj3d92ADHhxUSbCMGi1f1cBcJ7xM1To+pE/Khbwo1yuNbMEPKeA==",
+ "license": "BSD-3-Clause",
+ "dependencies": {
+ "readable-stream": "^2.0.2"
+ }
+ },
"node_modules/eastasianwidth": {
"version": "0.2.0",
"license": "MIT"
@@ -5933,6 +6133,29 @@
"version": "2.1.2",
"license": "MIT"
},
+ "node_modules/fs-extra": {
+ "version": "11.3.3",
+ "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.3.3.tgz",
+ "integrity": "sha512-VWSRii4t0AFm6ixFFmLLx1t7wS1gh+ckoa84aOeapGum0h+EZd1EhEumSB+ZdDLnEPuucsVB9oB7cxJHap6Afg==",
+ "license": "MIT",
+ "dependencies": {
+ "graceful-fs": "^4.2.0",
+ "jsonfile": "^6.0.1",
+ "universalify": "^2.0.0"
+ },
+ "engines": {
+ "node": ">=14.14"
+ }
+ },
+ "node_modules/fs-extra/node_modules/universalify": {
+ "version": "2.0.1",
+ "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
+ "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
+ "license": "MIT",
+ "engines": {
+ "node": ">= 10.0.0"
+ }
+ },
"node_modules/fsevents": {
"version": "2.3.3",
"license": "MIT",
@@ -6380,6 +6603,12 @@
"node": ">=8"
}
},
+ "node_modules/inherits": {
+ "version": "2.0.4",
+ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+ "license": "ISC"
+ },
"node_modules/internal-slot": {
"version": "1.1.0",
"dev": true,
@@ -7016,6 +7245,27 @@
"node": ">=6"
}
},
+ "node_modules/jsonfile": {
+ "version": "6.2.0",
+ "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz",
+ "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==",
+ "license": "MIT",
+ "dependencies": {
+ "universalify": "^2.0.0"
+ },
+ "optionalDependencies": {
+ "graceful-fs": "^4.1.6"
+ }
+ },
+ "node_modules/jsonfile/node_modules/universalify": {
+ "version": "2.0.1",
+ "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
+ "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
+ "license": "MIT",
+ "engines": {
+ "node": ">= 10.0.0"
+ }
+ },
"node_modules/jspdf": {
"version": "4.0.0",
"license": "MIT",
@@ -7574,6 +7824,12 @@
"version": "3.0.1",
"license": "BSD-2-Clause"
},
+ "node_modules/node-int64": {
+ "version": "0.4.0",
+ "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz",
+ "integrity": "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==",
+ "license": "MIT"
+ },
"node_modules/node-releases": {
"version": "2.0.27",
"license": "MIT"
@@ -8221,6 +8477,12 @@
}
}
},
+ "node_modules/process-nextick-args": {
+ "version": "2.0.1",
+ "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
+ "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
+ "license": "MIT"
+ },
"node_modules/progress": {
"version": "2.0.3",
"license": "MIT",
@@ -8443,6 +8705,33 @@
}
}
},
+ "node_modules/readable-stream": {
+ "version": "2.3.8",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
+ "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
+ "license": "MIT",
+ "dependencies": {
+ "core-util-is": "~1.0.0",
+ "inherits": "~2.0.3",
+ "isarray": "~1.0.0",
+ "process-nextick-args": "~2.0.0",
+ "safe-buffer": "~5.1.1",
+ "string_decoder": "~1.1.1",
+ "util-deprecate": "~1.0.1"
+ }
+ },
+ "node_modules/readable-stream/node_modules/isarray": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
+ "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
+ "license": "MIT"
+ },
+ "node_modules/readable-stream/node_modules/safe-buffer": {
+ "version": "5.1.2",
+ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
+ "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
+ "license": "MIT"
+ },
"node_modules/readdirp": {
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz",
@@ -9137,6 +9426,21 @@
"dev": true,
"license": "MIT"
},
+ "node_modules/string_decoder": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
+ "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
+ "license": "MIT",
+ "dependencies": {
+ "safe-buffer": "~5.1.0"
+ }
+ },
+ "node_modules/string_decoder/node_modules/safe-buffer": {
+ "version": "5.1.2",
+ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
+ "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
+ "license": "MIT"
+ },
"node_modules/string-width": {
"version": "4.2.3",
"license": "MIT",
@@ -10014,6 +10318,19 @@
"url": "https://github.com/sponsors/kettanaito"
}
},
+ "node_modules/unzipper": {
+ "version": "0.12.3",
+ "resolved": "https://registry.npmjs.org/unzipper/-/unzipper-0.12.3.tgz",
+ "integrity": "sha512-PZ8hTS+AqcGxsaQntl3IRBw65QrBI6lxzqDEL7IAo/XCEqRTKGfOX56Vea5TH9SZczRVxuzk1re04z/YjuYCJA==",
+ "license": "MIT",
+ "dependencies": {
+ "bluebird": "~3.7.2",
+ "duplexer2": "~0.1.4",
+ "fs-extra": "^11.2.0",
+ "graceful-fs": "^4.2.2",
+ "node-int64": "^0.4.0"
+ }
+ },
"node_modules/update-browserslist-db": {
"version": "1.2.3",
"funding": [
@@ -10105,6 +10422,12 @@
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
}
},
+ "node_modules/util-deprecate": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
+ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
+ "license": "MIT"
+ },
"node_modules/utrie": {
"version": "1.0.2",
"license": "MIT",
@@ -10679,111 +11002,6 @@
"peerDependencies": {
"zod": "^3.25.0 || ^4.0.0"
}
- },
- "node_modules/@next/swc-darwin-x64": {
- "version": "16.1.1",
- "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.1.tgz",
- "integrity": "sha512-hbyKtrDGUkgkyQi1m1IyD3q4I/3m9ngr+V93z4oKHrPcmxwNL5iMWORvLSGAf2YujL+6HxgVvZuCYZfLfb4bGw==",
- "cpu": [
- "x64"
- ],
- "optional": true,
- "os": [
- "darwin"
- ],
- "engines": {
- "node": ">= 10"
- }
- },
- "node_modules/@next/swc-linux-arm64-gnu": {
- "version": "16.1.1",
- "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.1.tgz",
- "integrity": "sha512-/fvHet+EYckFvRLQ0jPHJCUI5/B56+2DpI1xDSvi80r/3Ez+Eaa2Yq4tJcRTaB1kqj/HrYKn8Yplm9bNoMJpwQ==",
- "cpu": [
- "arm64"
- ],
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": ">= 10"
- }
- },
- "node_modules/@next/swc-linux-arm64-musl": {
- "version": "16.1.1",
- "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.1.tgz",
- "integrity": "sha512-MFHrgL4TXNQbBPzkKKur4Fb5ICEJa87HM7fczFs2+HWblM7mMLdco3dvyTI+QmLBU9xgns/EeeINSZD6Ar+oLg==",
- "cpu": [
- "arm64"
- ],
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": ">= 10"
- }
- },
- "node_modules/@next/swc-linux-x64-gnu": {
- "version": "16.1.1",
- "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.1.tgz",
- "integrity": "sha512-20bYDfgOQAPUkkKBnyP9PTuHiJGM7HzNBbuqmD0jiFVZ0aOldz+VnJhbxzjcSabYsnNjMPsE0cyzEudpYxsrUQ==",
- "cpu": [
- "x64"
- ],
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": ">= 10"
- }
- },
- "node_modules/@next/swc-linux-x64-musl": {
- "version": "16.1.1",
- "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.1.tgz",
- "integrity": "sha512-9pRbK3M4asAHQRkwaXwu601oPZHghuSC8IXNENgbBSyImHv/zY4K5udBusgdHkvJ/Tcr96jJwQYOll0qU8+fPA==",
- "cpu": [
- "x64"
- ],
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": ">= 10"
- }
- },
- "node_modules/@next/swc-win32-arm64-msvc": {
- "version": "16.1.1",
- "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.1.tgz",
- "integrity": "sha512-bdfQkggaLgnmYrFkSQfsHfOhk/mCYmjnrbRCGgkMcoOBZ4n+TRRSLmT/CU5SATzlBJ9TpioUyBW/vWFXTqQRiA==",
- "cpu": [
- "arm64"
- ],
- "optional": true,
- "os": [
- "win32"
- ],
- "engines": {
- "node": ">= 10"
- }
- },
- "node_modules/@next/swc-win32-x64-msvc": {
- "version": "16.1.1",
- "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.1.tgz",
- "integrity": "sha512-Ncwbw2WJ57Al5OX0k4chM68DKhEPlrXBaSXDCi2kPi5f4d8b3ejr3RRJGfKBLrn2YJL5ezNS7w2TZLHSti8CMw==",
- "cpu": [
- "x64"
- ],
- "optional": true,
- "os": [
- "win32"
- ],
- "engines": {
- "node": ">= 10"
- }
}
}
}
diff --git a/package.json b/package.json
index 786c647..d8a8075 100644
--- a/package.json
+++ b/package.json
@@ -33,6 +33,7 @@
"@radix-ui/react-dialog": "^1.1.15",
"@radix-ui/react-dropdown-menu": "^2.1.16",
"@radix-ui/react-label": "^2.1.8",
+ "@radix-ui/react-progress": "^1.1.8",
"@radix-ui/react-scroll-area": "^1.2.10",
"@radix-ui/react-select": "^2.2.6",
"@radix-ui/react-separator": "^1.1.8",
@@ -40,6 +41,7 @@
"@radix-ui/react-tabs": "^1.1.13",
"@radix-ui/react-tooltip": "^1.2.8",
"@sentry/nextjs": "^10.32.1",
+ "@types/unzipper": "^0.10.11",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"date-fns": "^4.1.0",
@@ -56,6 +58,7 @@
"stripe": "^20.1.2",
"svix": "^1.84.1",
"tailwind-merge": "^3.4.0",
+ "unzipper": "^0.12.3",
"zod": "^4.3.5"
},
"devDependencies": {
diff --git a/src/app/(dashboard)/admin/page.tsx b/src/app/(dashboard)/admin/page.tsx
new file mode 100644
index 0000000..6f8067c
--- /dev/null
+++ b/src/app/(dashboard)/admin/page.tsx
@@ -0,0 +1,29 @@
+/**
+ * Admin Page
+ * System administration features including NHTSA data sync
+ */
+
+import { Metadata } from 'next';
+import { SyncDashboard } from '@/components/admin';
+
+export const metadata: Metadata = {
+ title: 'Admin | CaseRadar',
+ description: 'System administration and data sync',
+};
+
+export default function AdminPage() {
+ return (
+
+
+
+
Admin
+
+ System administration and NHTSA data sync
+
+
+
+
+
+
+ );
+}
diff --git a/src/app/api/__tests__/nhtsa-bulk-import.test.ts b/src/app/api/__tests__/nhtsa-bulk-import.test.ts
new file mode 100644
index 0000000..d89cea2
--- /dev/null
+++ b/src/app/api/__tests__/nhtsa-bulk-import.test.ts
@@ -0,0 +1,129 @@
+/**
+ * NHTSA Bulk Import API Tests
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { GET, POST, DELETE } from '../nhtsa/bulk-import/route';
+import { NextRequest } from 'next/server';
+
+// Mock auth
+vi.mock('@/lib/auth', () => ({
+ getCurrentUser: vi.fn().mockResolvedValue({ id: 'test-user', email: 'test@example.com' }),
+}));
+
+// Mock bulk import service
+vi.mock('@/lib/nhtsa/bulk-import', () => {
+ return {
+ BulkImportService: class MockBulkImportService {
+ importFromStream = vi.fn().mockResolvedValue({
+ success: true,
+ recordsProcessed: 100,
+ recordsInserted: 95,
+ recordsSkipped: 3,
+ recordsErrored: 2,
+ durationMs: 1000,
+ errors: [],
+ });
+ getProgress = vi.fn().mockResolvedValue({
+ recordsProcessed: 50,
+ recordsInserted: 48,
+ recordsSkipped: 1,
+ recordsErrored: 1,
+ batchNumber: 5,
+ estimatedTotal: 2000000,
+ percentComplete: 0,
+ startTime: new Date(),
+ elapsedMs: 500,
+ recordsPerSecond: 100,
+ });
+ cancel = vi.fn();
+ },
+ };
+});
+
+// Mock flat file downloader
+vi.mock('@/lib/nhtsa/flat-file-downloader', () => ({
+ downloadAndExtract: vi.fn().mockResolvedValue('/tmp/test/FLAT_CMPL.txt'),
+ getFlatFileStream: vi.fn().mockReturnValue({
+ [Symbol.asyncIterator]: async function* () {
+ yield 'test data';
+ },
+ }),
+}));
+
+
+describe('NHTSA Bulk Import API', () => {
+ beforeEach(() => {
+ vi.clearAllMocks();
+ });
+
+ describe('GET /api/nhtsa/bulk-import', () => {
+ it('should return idle status when no import is running', async () => {
+ const response = await GET();
+ const data = await response.json();
+
+ expect(response.status).toBe(200);
+ expect(data.status).toBe('idle');
+ });
+
+ it('should require authentication', async () => {
+ const { getCurrentUser } = await import('@/lib/auth');
+ (getCurrentUser as ReturnType).mockResolvedValueOnce(null);
+
+ const response = await GET();
+ expect(response.status).toBe(401);
+ });
+ });
+
+ describe('POST /api/nhtsa/bulk-import', () => {
+ it('should start a bulk import', async () => {
+ const request = new NextRequest('http://localhost/api/nhtsa/bulk-import', {
+ method: 'POST',
+ body: JSON.stringify({}),
+ });
+
+ const response = await POST(request);
+ const data = await response.json();
+
+ expect(response.status).toBe(200);
+ expect(data.success).toBe(true);
+ expect(data.status).toBe('downloading');
+ });
+
+ it('should accept custom batch size', async () => {
+ const request = new NextRequest('http://localhost/api/nhtsa/bulk-import', {
+ method: 'POST',
+ body: JSON.stringify({ batchSize: 500 }),
+ });
+
+ const response = await POST(request);
+ const data = await response.json();
+
+ expect(response.status).toBe(200);
+ expect(data.success).toBe(true);
+ });
+
+ it('should require authentication', async () => {
+ const { getCurrentUser } = await import('@/lib/auth');
+ (getCurrentUser as ReturnType).mockResolvedValueOnce(null);
+
+ const request = new NextRequest('http://localhost/api/nhtsa/bulk-import', {
+ method: 'POST',
+ body: JSON.stringify({}),
+ });
+
+ const response = await POST(request);
+ expect(response.status).toBe(401);
+ });
+ });
+
+ describe('DELETE /api/nhtsa/bulk-import', () => {
+ it('should require authentication', async () => {
+ const { getCurrentUser } = await import('@/lib/auth');
+ (getCurrentUser as ReturnType).mockResolvedValueOnce(null);
+
+ const response = await DELETE();
+ expect(response.status).toBe(401);
+ });
+ });
+});
diff --git a/src/app/api/nhtsa/bulk-import/route.ts b/src/app/api/nhtsa/bulk-import/route.ts
new file mode 100644
index 0000000..d133f61
--- /dev/null
+++ b/src/app/api/nhtsa/bulk-import/route.ts
@@ -0,0 +1,230 @@
+/**
+ * NHTSA Bulk Import API
+ * POST /api/nhtsa/bulk-import - Start bulk import from NHTSA flat file
+ * GET /api/nhtsa/bulk-import - Get import status/progress
+ * DELETE /api/nhtsa/bulk-import - Cancel import
+ */
+
+import { NextRequest, NextResponse } from 'next/server';
+import { getCurrentUser } from '@/lib/auth';
+import { BulkImportService, ImportProgress, ImportResult } from '@/lib/nhtsa/bulk-import';
+import { downloadAndExtract, getFlatFileStream } from '@/lib/nhtsa/flat-file-downloader';
+import { rm } from 'fs/promises';
+import path from 'path';
+import os from 'os';
+
+// Global import state (in production, use Redis or database)
+let currentImport: {
+ service: BulkImportService;
+ status: 'downloading' | 'extracting' | 'importing' | 'complete' | 'cancelled' | 'error';
+ progress: ImportProgress | null;
+ result: ImportResult | null;
+ startedAt: Date;
+ error?: string;
+} | null = null;
+
+/**
+ * GET /api/nhtsa/bulk-import
+ * Get current import status and progress
+ */
+export async function GET() {
+ try {
+ const user = await getCurrentUser();
+ if (!user) {
+ return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
+ }
+
+ if (!currentImport) {
+ return NextResponse.json({
+ status: 'idle',
+ message: 'No import in progress',
+ });
+ }
+
+ const progress = currentImport.progress || (await currentImport.service.getProgress());
+
+ return NextResponse.json({
+ status: currentImport.status,
+ progress,
+ result: currentImport.result,
+ startedAt: currentImport.startedAt,
+ error: currentImport.error,
+ });
+ } catch (error) {
+ console.error('Error getting import status:', error);
+ return NextResponse.json(
+ { error: 'Failed to get import status' },
+ { status: 500 }
+ );
+ }
+}
+
+/**
+ * POST /api/nhtsa/bulk-import
+ * Start a new bulk import from NHTSA flat file
+ */
+export async function POST(request: NextRequest) {
+ try {
+ const user = await getCurrentUser();
+ if (!user) {
+ return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
+ }
+
+ // Check if import already in progress
+ if (currentImport && !['complete', 'cancelled', 'error'].includes(currentImport.status)) {
+ return NextResponse.json(
+ { error: 'Import already in progress', status: currentImport.status },
+ { status: 409 }
+ );
+ }
+
+ const body = await request.json().catch(() => ({}));
+ const batchSize = body.batchSize || 1000;
+
+ // Initialize import state
+ const service = new BulkImportService({ batchSize });
+ currentImport = {
+ service,
+ status: 'downloading',
+ progress: null,
+ result: null,
+ startedAt: new Date(),
+ };
+
+ // Run import in background
+ runImportInBackground(service).catch((error) => {
+ if (currentImport) {
+ currentImport.status = 'error';
+ currentImport.error = error instanceof Error ? error.message : String(error);
+ }
+ });
+
+ return NextResponse.json({
+ success: true,
+ message: 'Bulk import started',
+ status: 'downloading',
+ });
+ } catch (error) {
+ console.error('Error starting bulk import:', error);
+ return NextResponse.json(
+ { error: 'Failed to start bulk import', details: String(error) },
+ { status: 500 }
+ );
+ }
+}
+
+/**
+ * DELETE /api/nhtsa/bulk-import
+ * Cancel the current import
+ */
+export async function DELETE() {
+ try {
+ const user = await getCurrentUser();
+ if (!user) {
+ return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
+ }
+
+ if (!currentImport) {
+ return NextResponse.json({
+ success: false,
+ message: 'No import in progress',
+ });
+ }
+
+ currentImport.service.cancel();
+ currentImport.status = 'cancelled';
+
+ return NextResponse.json({
+ success: true,
+ message: 'Import cancelled',
+ });
+ } catch (error) {
+ console.error('Error cancelling import:', error);
+ return NextResponse.json(
+ { error: 'Failed to cancel import' },
+ { status: 500 }
+ );
+ }
+}
+
+/**
+ * Run the import process in the background
+ */
+async function runImportInBackground(service: BulkImportService): Promise {
+ const workDir = path.join(os.tmpdir(), 'nhtsa-bulk-import');
+
+ try {
+ // Download and extract flat file
+ if (currentImport) {
+ currentImport.status = 'downloading';
+ }
+
+ console.log('Starting NHTSA flat file download...');
+ const flatFilePath = await downloadAndExtract(workDir, {
+ onProgress: (progress) => {
+ console.log(`Download progress: ${progress.percentage}%`);
+ },
+ });
+
+ console.log('Flat file extracted:', flatFilePath);
+
+ // Start import
+ if (currentImport) {
+ currentImport.status = 'importing';
+ }
+
+ console.log('Starting bulk import...');
+ const stream = getFlatFileStream(flatFilePath);
+
+ const result = await service.importFromStream(stream, {
+ onProgress: (progress) => {
+ if (currentImport) {
+ currentImport.progress = progress;
+ }
+ // Log progress every 10,000 records
+ if (progress.recordsProcessed % 10000 === 0) {
+ console.log(
+ `Import progress: ${progress.recordsProcessed.toLocaleString()} records ` +
+ `(${progress.percentComplete}%) - ${progress.recordsPerSecond} rec/sec`
+ );
+ }
+ },
+ });
+
+ // Update final state
+ if (currentImport) {
+ currentImport.status = 'complete';
+ currentImport.result = result;
+ }
+
+ console.log('Bulk import complete:', {
+ recordsProcessed: result.recordsProcessed.toLocaleString(),
+ recordsInserted: result.recordsInserted.toLocaleString(),
+ recordsSkipped: result.recordsSkipped.toLocaleString(),
+ recordsErrored: result.recordsErrored.toLocaleString(),
+ durationMs: result.durationMs,
+ });
+
+ // Cleanup
+ try {
+ await rm(workDir, { recursive: true, force: true });
+ } catch {
+ // Ignore cleanup errors
+ }
+ } catch (error) {
+ console.error('Bulk import error:', error);
+ if (currentImport) {
+ currentImport.status = 'error';
+ currentImport.error = error instanceof Error ? error.message : String(error);
+ }
+
+ // Cleanup on error
+ try {
+ await rm(workDir, { recursive: true, force: true });
+ } catch {
+ // Ignore cleanup errors
+ }
+
+ throw error;
+ }
+}
diff --git a/src/components/admin/__tests__/sync-dashboard.test.tsx b/src/components/admin/__tests__/sync-dashboard.test.tsx
new file mode 100644
index 0000000..da064f1
--- /dev/null
+++ b/src/components/admin/__tests__/sync-dashboard.test.tsx
@@ -0,0 +1,237 @@
+/**
+ * Sync Dashboard Component Tests
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { render, screen, fireEvent, waitFor } from '@testing-library/react';
+import { SyncDashboard } from '../sync-dashboard';
+import { server } from '@/test/mocks/server';
+import { http, HttpResponse } from 'msw';
+
+describe('SyncDashboard', () => {
+ beforeEach(() => {
+ vi.clearAllMocks();
+ });
+
+ afterEach(() => {
+ server.resetHandlers();
+ });
+
+ describe('initial render', () => {
+ it('should render the dashboard', async () => {
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.json({ status: 'idle', message: 'No import in progress' });
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('sync-dashboard')).toBeInTheDocument();
+ });
+ });
+
+ it('should show idle status when no import is running', async () => {
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.json({ status: 'idle', message: 'No import in progress' });
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('status-badge')).toHaveTextContent('Idle');
+ });
+ });
+
+ it('should display the start import button', async () => {
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.json({ status: 'idle' });
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('start-import-btn')).toBeInTheDocument();
+ });
+ });
+ });
+
+ describe('start import', () => {
+ it('should start import when button is clicked', async () => {
+ let postCalled = false;
+
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.json({ status: 'idle' });
+ }),
+ http.post('/api/nhtsa/bulk-import', () => {
+ postCalled = true;
+ return HttpResponse.json({ success: true, status: 'downloading' });
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('start-import-btn')).toBeEnabled();
+ });
+
+ fireEvent.click(screen.getByTestId('start-import-btn'));
+
+ await waitFor(() => {
+ expect(postCalled).toBe(true);
+ });
+ });
+ });
+
+ describe('import in progress', () => {
+ it('should show progress when import is running', async () => {
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.json({
+ status: 'importing',
+ progress: {
+ recordsProcessed: 50000,
+ recordsInserted: 48000,
+ recordsSkipped: 1500,
+ recordsErrored: 500,
+ batchNumber: 50,
+ estimatedTotal: 2200000,
+ percentComplete: 2,
+ startTime: new Date().toISOString(),
+ elapsedMs: 60000,
+ recordsPerSecond: 833,
+ },
+ });
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('status-badge')).toHaveTextContent('Importing');
+ expect(screen.getByTestId('progress-bar')).toBeInTheDocument();
+ expect(screen.getByTestId('progress-stats')).toBeInTheDocument();
+ });
+ });
+
+ it('should show cancel button during import', async () => {
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.json({
+ status: 'importing',
+ progress: {
+ recordsProcessed: 50000,
+ recordsInserted: 48000,
+ recordsSkipped: 1500,
+ recordsErrored: 500,
+ batchNumber: 50,
+ estimatedTotal: 2200000,
+ percentComplete: 2,
+ startTime: new Date().toISOString(),
+ elapsedMs: 60000,
+ recordsPerSecond: 833,
+ },
+ });
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('cancel-import-btn')).toBeInTheDocument();
+ });
+ });
+ });
+
+ describe('import complete', () => {
+ it('should show result summary when import is complete', async () => {
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.json({
+ status: 'complete',
+ result: {
+ success: true,
+ recordsProcessed: 2100000,
+ recordsInserted: 2050000,
+ recordsSkipped: 45000,
+ recordsErrored: 5000,
+ durationMs: 7200000,
+ errors: [],
+ },
+ });
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('status-badge')).toHaveTextContent('Complete');
+ expect(screen.getByTestId('result-summary')).toBeInTheDocument();
+ });
+ });
+ });
+
+ describe('error handling', () => {
+ it('should display error message when fetch fails', async () => {
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.error();
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('error-message')).toBeInTheDocument();
+ });
+ });
+
+ it('should show error status when import fails', async () => {
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.json({
+ status: 'error',
+ error: 'Download failed',
+ result: {
+ success: false,
+ recordsProcessed: 100000,
+ recordsInserted: 95000,
+ recordsSkipped: 4000,
+ recordsErrored: 1000,
+ durationMs: 300000,
+ errors: ['Download failed: Connection timeout'],
+ },
+ });
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('status-badge')).toHaveTextContent('Error');
+ });
+ });
+ });
+
+ describe('refresh', () => {
+ it('should have a refresh button', async () => {
+ server.use(
+ http.get('/api/nhtsa/bulk-import', () => {
+ return HttpResponse.json({ status: 'idle' });
+ })
+ );
+
+ render();
+
+ await waitFor(() => {
+ expect(screen.getByTestId('refresh-btn')).toBeInTheDocument();
+ });
+ });
+ });
+});
diff --git a/src/components/admin/index.ts b/src/components/admin/index.ts
new file mode 100644
index 0000000..2fe582b
--- /dev/null
+++ b/src/components/admin/index.ts
@@ -0,0 +1,5 @@
+/**
+ * Admin Components
+ */
+
+export { SyncDashboard } from './sync-dashboard';
diff --git a/src/components/admin/sync-dashboard.tsx b/src/components/admin/sync-dashboard.tsx
new file mode 100644
index 0000000..ee0e93c
--- /dev/null
+++ b/src/components/admin/sync-dashboard.tsx
@@ -0,0 +1,413 @@
+/**
+ * Admin Sync Dashboard Component
+ * Allows admins to trigger and monitor NHTSA bulk imports
+ */
+
+'use client';
+
+import { useState, useEffect, useCallback } from 'react';
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card';
+import { Button } from '@/components/ui/button';
+import { Progress } from '@/components/ui/progress';
+import { Badge } from '@/components/ui/badge';
+import {
+ Download,
+ RefreshCw,
+ XCircle,
+ CheckCircle2,
+ AlertTriangle,
+ Database,
+ Clock,
+ Loader2,
+} from 'lucide-react';
+import { cn } from '@/lib/utils';
+
+/**
+ * Import progress from API
+ */
+interface ImportProgress {
+ recordsProcessed: number;
+ recordsInserted: number;
+ recordsSkipped: number;
+ recordsErrored: number;
+ batchNumber: number;
+ estimatedTotal: number;
+ percentComplete: number;
+ startTime: string;
+ elapsedMs: number;
+ recordsPerSecond: number;
+}
+
+/**
+ * Import result from API
+ */
+interface ImportResult {
+ success: boolean;
+ recordsProcessed: number;
+ recordsInserted: number;
+ recordsSkipped: number;
+ recordsErrored: number;
+ durationMs: number;
+ errors: string[];
+}
+
+/**
+ * Status response from API
+ */
+interface StatusResponse {
+ status: 'idle' | 'downloading' | 'extracting' | 'importing' | 'complete' | 'cancelled' | 'error';
+ progress?: ImportProgress;
+ result?: ImportResult;
+ startedAt?: string;
+ error?: string;
+ message?: string;
+}
+
+/**
+ * Format a number with commas
+ */
+function formatNumber(num: number): string {
+ return new Intl.NumberFormat('en-US').format(num);
+}
+
+/**
+ * Format elapsed time in human-readable format
+ */
+function formatDuration(ms: number): string {
+ const seconds = Math.floor(ms / 1000);
+ const minutes = Math.floor(seconds / 60);
+ const hours = Math.floor(minutes / 60);
+
+ if (hours > 0) {
+ return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
+ }
+ if (minutes > 0) {
+ return `${minutes}m ${seconds % 60}s`;
+ }
+ return `${seconds}s`;
+}
+
+/**
+ * Status badge component
+ */
+function StatusBadge({ status }: { status: StatusResponse['status'] }) {
+ const variants = {
+ idle: { label: 'Idle', variant: 'outline' as const, icon: Database },
+ downloading: { label: 'Downloading', variant: 'secondary' as const, icon: Download },
+ extracting: { label: 'Extracting', variant: 'secondary' as const, icon: RefreshCw },
+ importing: { label: 'Importing', variant: 'default' as const, icon: Loader2 },
+ complete: { label: 'Complete', variant: 'success' as const, icon: CheckCircle2 },
+ cancelled: { label: 'Cancelled', variant: 'warning' as const, icon: XCircle },
+ error: { label: 'Error', variant: 'destructive' as const, icon: AlertTriangle },
+ };
+
+ const config = variants[status];
+ const Icon = config.icon;
+ const isAnimating = ['downloading', 'extracting', 'importing'].includes(status);
+
+ return (
+
+
+ {config.label}
+
+ );
+}
+
+/**
+ * Progress stats display
+ */
+function ProgressStats({ progress }: { progress: ImportProgress }) {
+ return (
+
+
+
Processed
+
{formatNumber(progress.recordsProcessed)}
+
+
+
Inserted
+
{formatNumber(progress.recordsInserted)}
+
+
+
Skipped
+
{formatNumber(progress.recordsSkipped)}
+
+
+
Errors
+
{formatNumber(progress.recordsErrored)}
+
+
+ );
+}
+
+/**
+ * Result summary display
+ */
+function ResultSummary({ result }: { result: ImportResult }) {
+ return (
+
+
+
+ {result.success ? (
+
+ ) : (
+
+ )}
+ Import {result.success ? 'Completed' : 'Failed'}
+
+
+
+
+
+
Total Processed
+
{formatNumber(result.recordsProcessed)}
+
+
+
Inserted
+
{formatNumber(result.recordsInserted)}
+
+
+
Duration
+
{formatDuration(result.durationMs)}
+
+
+
Errors
+
{result.errors.length}
+
+
+ {result.errors.length > 0 && (
+
+
Errors:
+
+ {result.errors.slice(0, 5).map((error, i) => (
+ - {error}
+ ))}
+ {result.errors.length > 5 && (
+ - ...and {result.errors.length - 5} more errors
+ )}
+
+
+ )}
+
+
+ );
+}
+
+/**
+ * Admin Sync Dashboard Component
+ */
+export function SyncDashboard() {
+ const [status, setStatus] = useState(null);
+ const [isLoading, setIsLoading] = useState(true);
+ const [isStarting, setIsStarting] = useState(false);
+ const [isCancelling, setIsCancelling] = useState(false);
+ const [error, setError] = useState(null);
+
+ /**
+ * Fetch current status
+ */
+ const fetchStatus = useCallback(async () => {
+ try {
+ const response = await fetch('/api/nhtsa/bulk-import');
+ if (!response.ok) {
+ throw new Error('Failed to fetch status');
+ }
+ const data = await response.json();
+ setStatus(data);
+ setError(null);
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Unknown error');
+ } finally {
+ setIsLoading(false);
+ }
+ }, []);
+
+ /**
+ * Start bulk import
+ */
+ const startImport = async () => {
+ setIsStarting(true);
+ setError(null);
+ try {
+ const response = await fetch('/api/nhtsa/bulk-import', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ batchSize: 1000 }),
+ });
+ if (!response.ok) {
+ const data = await response.json();
+ throw new Error(data.error || 'Failed to start import');
+ }
+ await fetchStatus();
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Unknown error');
+ } finally {
+ setIsStarting(false);
+ }
+ };
+
+ /**
+ * Cancel import
+ */
+ const cancelImport = async () => {
+ setIsCancelling(true);
+ try {
+ const response = await fetch('/api/nhtsa/bulk-import', {
+ method: 'DELETE',
+ });
+ if (!response.ok) {
+ throw new Error('Failed to cancel import');
+ }
+ await fetchStatus();
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Unknown error');
+ } finally {
+ setIsCancelling(false);
+ }
+ };
+
+ // Initial fetch and polling
+ useEffect(() => {
+ fetchStatus();
+
+ // Poll for updates when import is in progress
+ const interval = setInterval(() => {
+ if (status && ['downloading', 'extracting', 'importing'].includes(status.status)) {
+ fetchStatus();
+ }
+ }, 2000);
+
+ return () => clearInterval(interval);
+ }, [fetchStatus, status?.status]);
+
+ const isImportActive = status && ['downloading', 'extracting', 'importing'].includes(status.status);
+
+ return (
+
+ {/* Header */}
+
+
+
+
+
+
+ NHTSA Data Sync
+
+
+ Import all 2.1M+ historical NHTSA complaints
+
+
+ {status &&
}
+
+
+
+
+
+
+ {isImportActive && (
+
+ )}
+
+
+
+
+ {error && (
+
+ )}
+
+
+
+ {/* Progress */}
+ {status?.progress && isImportActive && (
+
+
+
+ Import Progress
+
+
+ {formatDuration(status.progress.elapsedMs)}
+
+
+
+
+
+
+ {formatNumber(status.progress.recordsProcessed)} / {formatNumber(status.progress.estimatedTotal)}
+ {status.progress.percentComplete}%
+
+
+
+ Processing at {formatNumber(status.progress.recordsPerSecond)} records/sec
+
+
+
+
+
+ )}
+
+ {/* Result */}
+ {status?.result && (
+
+ )}
+
+ {/* Info */}
+
+
+ About Full Sync
+
+
+
+ - Downloads the complete NHTSA flat file (~1.5GB compressed)
+ - Contains 2.1M+ historical complaints since 1995
+ - Skips existing records to avoid duplicates
+ - Estimated time: 1-3 hours depending on connection speed
+
+
+
+
+ );
+}
+
+export default SyncDashboard;
diff --git a/src/components/ui/progress.tsx b/src/components/ui/progress.tsx
new file mode 100644
index 0000000..addab0e
--- /dev/null
+++ b/src/components/ui/progress.tsx
@@ -0,0 +1,33 @@
+/**
+ * Progress Component
+ * Based on Radix UI Progress primitive
+ */
+
+'use client';
+
+import * as React from 'react';
+import * as ProgressPrimitive from '@radix-ui/react-progress';
+
+import { cn } from '@/lib/utils';
+
+const Progress = React.forwardRef<
+ React.ElementRef,
+ React.ComponentPropsWithoutRef
+>(({ className, value, ...props }, ref) => (
+
+
+
+));
+Progress.displayName = ProgressPrimitive.Root.displayName;
+
+export { Progress };
diff --git a/src/lib/nhtsa/__tests__/bulk-import.test.ts b/src/lib/nhtsa/__tests__/bulk-import.test.ts
new file mode 100644
index 0000000..82e341b
--- /dev/null
+++ b/src/lib/nhtsa/__tests__/bulk-import.test.ts
@@ -0,0 +1,204 @@
+/**
+ * Bulk Import Service Tests
+ * TDD: Tests written BEFORE implementation
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { Readable } from 'stream';
+
+// Import the bulk import service
+import {
+ BulkImportService,
+ ImportProgress,
+ ImportOptions,
+ ImportResult,
+} from '../bulk-import';
+
+// Mock the database
+vi.mock('@/lib/db', () => ({
+ prisma: {
+ complaint: {
+ count: vi.fn().mockResolvedValue(17000),
+ findFirst: vi.fn().mockResolvedValue(null),
+ create: vi.fn().mockResolvedValue({ id: 'mock-id' }),
+ createMany: vi.fn().mockResolvedValue({ count: 10 }),
+ },
+ $executeRaw: vi.fn().mockResolvedValue(1),
+ $queryRaw: vi.fn().mockResolvedValue([{ count: 0n }]),
+ },
+}));
+
+// Mock flat file parser
+vi.mock('../flat-file-parser', () => ({
+ parseFlatFileStream: vi.fn(function* () {
+ for (let i = 0; i < 100; i++) {
+ yield {
+ cmplid: String(958241 + i),
+ odino: String(958241 + i),
+ mfr_name: 'Test Manufacturer',
+ maketxt: 'TESTMAKE',
+ modeltxt: 'TESTMODEL',
+ yeartxt: '2020',
+ crash: 'N',
+ faildate: '',
+ fire: 'N',
+ injured: '0',
+ deaths: '0',
+ compdesc: 'TEST COMPONENT',
+ city: 'TEST CITY',
+ state: 'TS',
+ vin: '',
+ datea: '20230101',
+ ldate: '',
+ cdescr: 'Test description',
+ };
+ }
+ }),
+ mapFlatFileToComplaint: vi.fn((record) => ({
+ nhtsaId: record.cmplid,
+ odiNumber: record.odino,
+ manufacturer: record.mfr_name,
+ make: record.maketxt,
+ model: record.modeltxt,
+ year: parseInt(record.yeartxt, 10),
+ component: record.compdesc,
+ description: record.cdescr,
+ crash: record.crash === 'Y',
+ fire: record.fire === 'Y',
+ injuries: parseInt(record.injured, 10) || 0,
+ deaths: parseInt(record.deaths, 10) || 0,
+ failDate: null,
+ dateAdded: new Date(),
+ })),
+}));
+
+describe('BulkImportService', () => {
+ let service: BulkImportService;
+
+ beforeEach(() => {
+ vi.clearAllMocks();
+ service = new BulkImportService();
+ });
+
+ describe('constructor', () => {
+ it('should create a new instance', () => {
+ expect(service).toBeInstanceOf(BulkImportService);
+ });
+
+ it('should accept custom batch size', () => {
+ const customService = new BulkImportService({ batchSize: 500 });
+ expect(customService).toBeInstanceOf(BulkImportService);
+ });
+ });
+
+ describe('importFromStream', () => {
+ it('should import records from a stream', async () => {
+ const mockStream = Readable.from(['test data']);
+ const result = await service.importFromStream(mockStream);
+
+ expect(result.success).toBe(true);
+ expect(result.recordsProcessed).toBeGreaterThan(0);
+ });
+
+ it('should report progress during import', async () => {
+ const mockStream = Readable.from(['test data']);
+ const progressUpdates: ImportProgress[] = [];
+
+ await service.importFromStream(mockStream, {
+ onProgress: (progress) => progressUpdates.push({ ...progress }),
+ });
+
+ expect(progressUpdates.length).toBeGreaterThan(0);
+ });
+
+ it('should skip duplicate records', async () => {
+ const mockStream = Readable.from(['test data']);
+
+ // Mock finding existing record
+ const { prisma } = await import('@/lib/db');
+ (prisma.complaint.findFirst as ReturnType).mockResolvedValueOnce({
+ id: 'existing-id',
+ });
+
+ const result = await service.importFromStream(mockStream);
+
+ expect(result.recordsSkipped).toBeGreaterThanOrEqual(0);
+ });
+ });
+
+ describe('getProgress', () => {
+ it('should return current import progress', async () => {
+ const progress = await service.getProgress();
+
+ expect(progress).toBeDefined();
+ expect(typeof progress.recordsProcessed).toBe('number');
+ expect(typeof progress.recordsInserted).toBe('number');
+ expect(typeof progress.recordsSkipped).toBe('number');
+ expect(typeof progress.recordsErrored).toBe('number');
+ });
+ });
+
+ describe('cancel', () => {
+ it('should cancel an in-progress import', () => {
+ service.cancel();
+ // Should not throw
+ expect(true).toBe(true);
+ });
+ });
+
+ describe('ImportProgress', () => {
+ it('should have expected properties', () => {
+ const progress: ImportProgress = {
+ recordsProcessed: 1000,
+ recordsInserted: 950,
+ recordsSkipped: 40,
+ recordsErrored: 10,
+ batchNumber: 10,
+ estimatedTotal: 2000000,
+ percentComplete: 50,
+ startTime: new Date(),
+ elapsedMs: 5000,
+ recordsPerSecond: 200,
+ };
+
+ expect(progress.recordsProcessed).toBe(1000);
+ expect(progress.recordsInserted).toBe(950);
+ expect(progress.recordsSkipped).toBe(40);
+ expect(progress.recordsErrored).toBe(10);
+ expect(progress.batchNumber).toBe(10);
+ expect(progress.estimatedTotal).toBe(2000000);
+ expect(progress.percentComplete).toBe(50);
+ expect(progress.recordsPerSecond).toBe(200);
+ });
+ });
+
+ describe('ImportResult', () => {
+ it('should have expected properties', () => {
+ const result: ImportResult = {
+ success: true,
+ recordsProcessed: 2000000,
+ recordsInserted: 1950000,
+ recordsSkipped: 45000,
+ recordsErrored: 5000,
+ durationMs: 3600000,
+ errors: [],
+ };
+
+ expect(result.success).toBe(true);
+ expect(result.recordsProcessed).toBe(2000000);
+ expect(result.recordsInserted).toBe(1950000);
+ expect(result.errors).toEqual([]);
+ });
+ });
+
+ describe('batch processing', () => {
+ it('should process records in batches', async () => {
+ const mockStream = Readable.from(['test data']);
+ const result = await service.importFromStream(mockStream, {
+ batchSize: 10,
+ });
+
+ expect(result.success).toBe(true);
+ });
+ });
+});
diff --git a/src/lib/nhtsa/__tests__/flat-file-downloader.test.ts b/src/lib/nhtsa/__tests__/flat-file-downloader.test.ts
new file mode 100644
index 0000000..937939b
--- /dev/null
+++ b/src/lib/nhtsa/__tests__/flat-file-downloader.test.ts
@@ -0,0 +1,89 @@
+/**
+ * Flat File Downloader Tests
+ * TDD: Tests written BEFORE implementation
+ */
+
+import { describe, it, expect } from 'vitest';
+import { rm } from 'fs/promises';
+
+// Import the downloader (will fail until implemented)
+import {
+ downloadFlatFile,
+ extractFlatFile,
+ getFlatFileStream,
+ DownloadProgress,
+ NHTSA_FLAT_FILE_URL,
+} from '../flat-file-downloader';
+
+describe('FlatFileDownloader', () => {
+ const testTempDir = '/tmp/nhtsa-test-downloads';
+
+ afterEach(async () => {
+ // Cleanup test files
+ try {
+ await rm(testTempDir, { recursive: true, force: true });
+ } catch {
+ // Ignore cleanup errors
+ }
+ });
+
+ describe('NHTSA_FLAT_FILE_URL', () => {
+ it('should point to the correct NHTSA download URL', () => {
+ expect(NHTSA_FLAT_FILE_URL).toBe(
+ 'https://static.nhtsa.gov/odi/ffdd/cmpl/FLAT_CMPL.zip'
+ );
+ });
+ });
+
+ describe('downloadFlatFile', () => {
+ // Note: Download tests require real network calls
+ // These are integration tests that should be run separately
+ // The function is tested via E2E tests
+
+ it('should be a function', () => {
+ expect(typeof downloadFlatFile).toBe('function');
+ });
+
+ it('should accept options with progress callback', () => {
+ // Type check - just verify the function signature
+ const options: Parameters[1] = {
+ onProgress: (_progress) => {},
+ };
+ expect(options).toBeDefined();
+ });
+ });
+
+ describe('extractFlatFile', () => {
+ it('should be a function', () => {
+ // Actual extraction tested via integration tests
+ expect(typeof extractFlatFile).toBe('function');
+ });
+
+ it('should throw error if zip file does not exist', async () => {
+ await expect(
+ extractFlatFile('/nonexistent/file.zip', testTempDir)
+ ).rejects.toThrow();
+ });
+ });
+
+ describe('getFlatFileStream', () => {
+ it('should return a readable stream for the flat file', async () => {
+ // This requires actual file - integration test
+ expect(getFlatFileStream).toBeDefined();
+ });
+ });
+
+ describe('DownloadProgress', () => {
+ it('should have expected interface properties', () => {
+ const progress: DownloadProgress = {
+ bytesDownloaded: 1000,
+ totalBytes: 10000,
+ percentage: 10,
+ };
+
+ expect(progress.bytesDownloaded).toBe(1000);
+ expect(progress.totalBytes).toBe(10000);
+ expect(progress.percentage).toBe(10);
+ });
+ });
+});
diff --git a/src/lib/nhtsa/__tests__/flat-file-parser.test.ts b/src/lib/nhtsa/__tests__/flat-file-parser.test.ts
new file mode 100644
index 0000000..5c8f3dd
--- /dev/null
+++ b/src/lib/nhtsa/__tests__/flat-file-parser.test.ts
@@ -0,0 +1,299 @@
+/**
+ * Flat File Parser Tests
+ * TDD: These tests are written BEFORE the implementation
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+import { Readable } from 'stream';
+
+// Import the parser
+import {
+ parseFlatFileLine,
+ parseFlatFileStream,
+ mapFlatFileToComplaint,
+ FlatFileRecord,
+ FLAT_FILE_COLUMNS,
+} from '../flat-file-parser';
+
+describe('FlatFileParser', () => {
+ describe('FLAT_FILE_COLUMNS', () => {
+ it('should define all required column indices', () => {
+ // Column indices match the NHTSA FLAT_CMPL.txt format
+ // Note: There is no separate ODINO in the flat file, ODINO maps to MFR_NAME index
+ expect(FLAT_FILE_COLUMNS).toBeDefined();
+ expect(FLAT_FILE_COLUMNS.CMPLID).toBe(0);
+ expect(FLAT_FILE_COLUMNS.MFR_NAME).toBe(1);
+ expect(FLAT_FILE_COLUMNS.MAKETXT).toBe(2);
+ expect(FLAT_FILE_COLUMNS.MODELTXT).toBe(3);
+ expect(FLAT_FILE_COLUMNS.YEARTXT).toBe(4);
+ expect(FLAT_FILE_COLUMNS.CRASH).toBe(5);
+ expect(FLAT_FILE_COLUMNS.FAILDATE).toBe(6);
+ expect(FLAT_FILE_COLUMNS.FIRE).toBe(7);
+ expect(FLAT_FILE_COLUMNS.INJURED).toBe(8);
+ expect(FLAT_FILE_COLUMNS.DEATHS).toBe(9);
+ expect(FLAT_FILE_COLUMNS.COMPDESC).toBe(10);
+ expect(FLAT_FILE_COLUMNS.CITY).toBe(11);
+ expect(FLAT_FILE_COLUMNS.STATE).toBe(12);
+ expect(FLAT_FILE_COLUMNS.VIN).toBe(13);
+ expect(FLAT_FILE_COLUMNS.DATEA).toBe(14);
+ expect(FLAT_FILE_COLUMNS.LDATE).toBe(15);
+ expect(FLAT_FILE_COLUMNS.CDESCR).toBe(17);
+ });
+ });
+
+ describe('parseFlatFileLine', () => {
+ it('should parse a valid tab-delimited line', () => {
+ // Format: CMPLID, MFR_NAME, MAKETXT, MODELTXT, YEARTXT, CRASH, FAILDATE, FIRE, INJURED, DEATHS,
+ // COMPDESC, CITY, STATE, VIN, DATEA, LDATE, ???, CDESCR
+ const line = '958241\tVolvo Car USA, LLC\tVOLVO\t760\t1987\tN\t\tN\t0\t0\tENGINE AND ENGINE COOLING:COOLING SYSTEM:RADIATOR ASSEMBLY\tEL CAJON\tCA\t\t19950103\t19950103\t\tRADIATOR FAILED @ HIGHWAY SPEED OBSTRUCTING DRIVERS VISION TEMPORARY.';
+
+ const result = parseFlatFileLine(line);
+
+ expect(result).toBeDefined();
+ expect(result?.cmplid).toBe('958241');
+ expect(result?.odino).toBe('958241'); // odino defaults to cmplid
+ expect(result?.mfr_name).toBe('Volvo Car USA, LLC');
+ expect(result?.maketxt).toBe('VOLVO');
+ expect(result?.modeltxt).toBe('760');
+ expect(result?.yeartxt).toBe('1987');
+ expect(result?.crash).toBe('N');
+ expect(result?.fire).toBe('N');
+ expect(result?.injured).toBe('0');
+ expect(result?.deaths).toBe('0');
+ expect(result?.compdesc).toBe('ENGINE AND ENGINE COOLING:COOLING SYSTEM:RADIATOR ASSEMBLY');
+ expect(result?.city).toBe('EL CAJON');
+ expect(result?.state).toBe('CA');
+ expect(result?.datea).toBe('19950103');
+ expect(result?.cdescr).toContain('RADIATOR FAILED');
+ });
+
+ it('should handle line with crash and injuries', () => {
+ // Format: CMPLID, MFR_NAME, MAKETXT, MODELTXT, YEARTXT, CRASH, FAILDATE, FIRE, INJURED, DEATHS...
+ const line = '958132\tKia America, Inc.\tKIA\tSEPHIA\t1994\tY\t19941230\tN\t0\t0\tPOWER TRAIN:AUTOMATIC TRANSMISSION\tSAN FRANCISCO\tCA\t\t19950103\t19950103\t\tSHIFTED INTO REVERSE VEHICLE JERKED VIOLENTLY.';
+
+ const result = parseFlatFileLine(line);
+
+ expect(result).toBeDefined();
+ expect(result?.crash).toBe('Y');
+ expect(result?.faildate).toBe('19941230');
+ });
+
+ it('should handle line with fire and deaths', () => {
+ const line = '1234567\tFord Motor Company\tFORD\tPINTO\t1978\tY\t19780601\tY\t2\t1\tFUEL SYSTEM, GASOLINE:STORAGE:TANK ASSEMBLY\tLOS ANGELES\tCA\tABC123\t19780701\t19780701\t\tFIRE AFTER REAR COLLISION';
+
+ const result = parseFlatFileLine(line);
+
+ expect(result?.fire).toBe('Y');
+ expect(result?.deaths).toBe('1');
+ expect(result?.injured).toBe('2');
+ });
+
+ it('should return null for empty line', () => {
+ const result = parseFlatFileLine('');
+ expect(result).toBeNull();
+ });
+
+ it('should return null for line with insufficient fields', () => {
+ const result = parseFlatFileLine('958241\tVolvo\tVOLVO');
+ expect(result).toBeNull();
+ });
+
+ it('should handle missing optional fields gracefully', () => {
+ // Line with empty MFR_NAME, FAILDATE, CITY, STATE, VIN
+ const line = '958241\t\tVOLVO\t760\t1987\tN\t\tN\t0\t0\tENGINE\t\t\t\t19950103\t19950103\t\tDescription';
+
+ const result = parseFlatFileLine(line);
+
+ expect(result).toBeDefined();
+ expect(result?.mfr_name).toBe('');
+ expect(result?.faildate).toBe('');
+ expect(result?.vin).toBe('');
+ expect(result?.city).toBe('');
+ });
+
+ it('should trim whitespace from all fields', () => {
+ const line = ' 958241 \t Volvo \t VOLVO \t 760 \t1987\tN\t\tN\t0\t0\tENGINE\tCITY\tCA\t\t19950103\t19950103\t\t Description ';
+
+ const result = parseFlatFileLine(line);
+
+ expect(result?.cmplid).toBe('958241');
+ expect(result?.mfr_name).toBe('Volvo');
+ expect(result?.cdescr).toBe('Description');
+ });
+ });
+
+ describe('parseFlatFileStream', () => {
+ it('should parse multiple lines from a stream', async () => {
+ const lines = [
+ '958241\tVolvo\tVOLVO\t760\t1987\tN\t\tN\t0\t0\tENGINE\tCITY\tCA\t\t19950103\t19950103\t\tDescription 1',
+ '958242\tFord\tFORD\tMUSTANG\t1990\tN\t\tN\t0\t0\tBRAKES\tNYC\tNY\t\t19950104\t19950104\t\tDescription 2',
+ ];
+
+ const stream = Readable.from(lines.join('\n'));
+ const records: FlatFileRecord[] = [];
+
+ for await (const record of parseFlatFileStream(stream)) {
+ records.push(record);
+ }
+
+ expect(records).toHaveLength(2);
+ expect(records[0].cmplid).toBe('958241');
+ expect(records[1].cmplid).toBe('958242');
+ });
+
+ it('should skip invalid lines and continue parsing', async () => {
+ const lines = [
+ '958241\tVolvo\tVOLVO\t760\t1987\tN\t\tN\t0\t0\tENGINE\tCITY\tCA\t\t19950103\t19950103\t\tDescription 1',
+ 'invalid line with not enough fields',
+ '',
+ '958243\tHonda\tHONDA\tACCORD\t1995\tN\t\tN\t0\t0\tSTEERING\tLA\tCA\t\t19950105\t19950105\t\tDescription 3',
+ ];
+
+ const stream = Readable.from(lines.join('\n'));
+ const records: FlatFileRecord[] = [];
+
+ for await (const record of parseFlatFileStream(stream)) {
+ records.push(record);
+ }
+
+ expect(records).toHaveLength(2);
+ expect(records[0].cmplid).toBe('958241');
+ expect(records[1].cmplid).toBe('958243');
+ });
+
+ it('should handle large batches efficiently', async () => {
+ // Generate 1000 lines
+ const lines: string[] = [];
+ for (let i = 0; i < 1000; i++) {
+ lines.push(
+ `${958241 + i}\tMfr${i}\tMAKE${i}\tMODEL${i}\t${1990 + (i % 30)}\tN\t\tN\t0\t0\tCOMP\tCITY\tST\t\t19950103\t19950103\t\tDescription ${i}`
+ );
+ }
+
+ const stream = Readable.from(lines.join('\n'));
+ const records: FlatFileRecord[] = [];
+
+ for await (const record of parseFlatFileStream(stream)) {
+ records.push(record);
+ }
+
+ expect(records).toHaveLength(1000);
+ });
+
+ it('should emit progress events', async () => {
+ const lines = Array(100)
+ .fill(null)
+ .map(
+ (_, i) =>
+ `${958241 + i}\tMfr\tMAKE\tMODEL\t1990\tN\t\tN\t0\t0\tCOMP\tCITY\tST\t\t19950103\t19950103\t\tDesc`
+ );
+
+ const stream = Readable.from(lines.join('\n'));
+ const progressCallback = vi.fn();
+
+ const records: FlatFileRecord[] = [];
+ for await (const record of parseFlatFileStream(stream, { onProgress: progressCallback })) {
+ records.push(record);
+ }
+
+ expect(progressCallback).toHaveBeenCalled();
+ });
+ });
+
+ describe('FlatFileRecord to TransformedComplaint mapping', () => {
+ it('should correctly map all fields', () => {
+ const record: FlatFileRecord = {
+ cmplid: '958241',
+ odino: '958241', // Same as cmplid in flat file format
+ mfr_name: 'Volvo Car USA, LLC',
+ maketxt: 'VOLVO',
+ modeltxt: '760',
+ yeartxt: '1987',
+ crash: 'Y',
+ faildate: '19941230',
+ fire: 'N',
+ injured: '2',
+ deaths: '0',
+ compdesc: 'ENGINE AND ENGINE COOLING:COOLING SYSTEM',
+ city: 'EL CAJON',
+ state: 'CA',
+ vin: 'ABC123',
+ datea: '19950103',
+ ldate: '19950103',
+ cdescr: 'Radiator failed at highway speed.',
+ };
+
+ const complaint = mapFlatFileToComplaint(record);
+
+ expect(complaint.nhtsaId).toBe('958241');
+ expect(complaint.odiNumber).toBe('958241'); // odiNumber same as nhtsaId
+ expect(complaint.manufacturer).toBe('Volvo Car USA, LLC');
+ expect(complaint.make).toBe('VOLVO');
+ expect(complaint.model).toBe('760');
+ expect(complaint.year).toBe(1987);
+ expect(complaint.crash).toBe(true);
+ expect(complaint.fire).toBe(false);
+ expect(complaint.injuries).toBe(2);
+ expect(complaint.deaths).toBe(0);
+ expect(complaint.component).toBe('ENGINE AND ENGINE COOLING:COOLING SYSTEM');
+ expect(complaint.description).toBe('Radiator failed at highway speed.');
+ expect(complaint.failDate).toBeInstanceOf(Date);
+ expect(complaint.dateAdded).toBeInstanceOf(Date);
+ });
+
+ it('should handle invalid year by returning null', () => {
+ const record: FlatFileRecord = {
+ cmplid: '958241',
+ odino: '958241',
+ mfr_name: 'Tesla',
+ maketxt: 'TESLA',
+ modeltxt: 'CHARGER',
+ yeartxt: '9999', // Invalid year
+ crash: 'N',
+ faildate: '',
+ fire: 'N',
+ injured: '0',
+ deaths: '0',
+ compdesc: 'ELECTRICAL',
+ city: '',
+ state: '',
+ vin: '',
+ datea: '20230101',
+ ldate: '',
+ cdescr: 'Charger malfunction',
+ };
+
+ const complaint = mapFlatFileToComplaint(record);
+
+ // Equipment without valid year should have null year
+ expect(complaint.year).toBeNull();
+ });
+
+ it('should handle empty description', () => {
+ const record: FlatFileRecord = {
+ cmplid: '958241',
+ odino: '958241',
+ mfr_name: 'Ford',
+ maketxt: 'FORD',
+ modeltxt: 'F150',
+ yeartxt: '2020',
+ crash: 'N',
+ faildate: '',
+ fire: 'N',
+ injured: '0',
+ deaths: '0',
+ compdesc: 'BRAKES',
+ city: '',
+ state: '',
+ vin: '',
+ datea: '20230101',
+ ldate: '',
+ cdescr: '', // Empty description
+ };
+
+ const complaint = mapFlatFileToComplaint(record);
+
+ expect(complaint.description).toBe('');
+ });
+ });
+});
diff --git a/src/lib/nhtsa/bulk-import.ts b/src/lib/nhtsa/bulk-import.ts
new file mode 100644
index 0000000..fa68eaa
--- /dev/null
+++ b/src/lib/nhtsa/bulk-import.ts
@@ -0,0 +1,302 @@
+/**
+ * Bulk Import Service
+ * Handles importing large numbers of complaints from NHTSA flat file
+ * Uses streaming and batching for memory efficiency
+ */
+
+import { Readable } from 'stream';
+import { prisma } from '@/lib/db';
+import { parseFlatFileStream, mapFlatFileToComplaint, FlatFileRecord } from './flat-file-parser';
+import { TransformedComplaint } from './types';
+
+/**
+ * Import progress information
+ */
+export interface ImportProgress {
+ recordsProcessed: number;
+ recordsInserted: number;
+ recordsSkipped: number;
+ recordsErrored: number;
+ batchNumber: number;
+ estimatedTotal: number;
+ percentComplete: number;
+ startTime: Date;
+ elapsedMs: number;
+ recordsPerSecond: number;
+}
+
+/**
+ * Import result
+ */
+export interface ImportResult {
+ success: boolean;
+ recordsProcessed: number;
+ recordsInserted: number;
+ recordsSkipped: number;
+ recordsErrored: number;
+ durationMs: number;
+ errors: string[];
+}
+
+/**
+ * Import options
+ */
+export interface ImportOptions {
+ batchSize?: number;
+ onProgress?: (progress: ImportProgress) => void;
+ skipDuplicates?: boolean;
+}
+
+/**
+ * Service configuration
+ */
+export interface ServiceConfig {
+ batchSize?: number;
+}
+
+/**
+ * Default batch size for database inserts
+ */
+const DEFAULT_BATCH_SIZE = 1000;
+
+/**
+ * Estimated total records in NHTSA database
+ */
+const ESTIMATED_TOTAL_RECORDS = 2200000;
+
+/**
+ * Bulk Import Service
+ * Manages the import of large datasets from NHTSA flat files
+ */
+export class BulkImportService {
+ private batchSize: number;
+ private cancelled: boolean = false;
+ private currentProgress: ImportProgress;
+
+ constructor(config: ServiceConfig = {}) {
+ this.batchSize = config.batchSize || DEFAULT_BATCH_SIZE;
+ this.currentProgress = this.createInitialProgress();
+ }
+
+ /**
+ * Create initial progress object
+ */
+ private createInitialProgress(): ImportProgress {
+ return {
+ recordsProcessed: 0,
+ recordsInserted: 0,
+ recordsSkipped: 0,
+ recordsErrored: 0,
+ batchNumber: 0,
+ estimatedTotal: ESTIMATED_TOTAL_RECORDS,
+ percentComplete: 0,
+ startTime: new Date(),
+ elapsedMs: 0,
+ recordsPerSecond: 0,
+ };
+ }
+
+ /**
+ * Import records from a readable stream
+ * @param stream - Readable stream containing flat file data
+ * @param options - Import options
+ * @returns Import result with statistics
+ */
+ async importFromStream(
+ stream: Readable,
+ options: ImportOptions = {}
+ ): Promise {
+ const { batchSize = this.batchSize, onProgress, skipDuplicates = true } = options;
+
+ this.cancelled = false;
+ this.currentProgress = this.createInitialProgress();
+ const startTime = Date.now();
+ const errors: string[] = [];
+
+ let batch: TransformedComplaint[] = [];
+
+ try {
+ // Parse records from the stream
+ for await (const record of parseFlatFileStream(stream)) {
+ if (this.cancelled) {
+ break;
+ }
+
+ try {
+ // Map flat file record to complaint
+ const complaint = mapFlatFileToComplaint(record);
+
+ // Check for duplicates if enabled
+ if (skipDuplicates) {
+ const existing = await prisma.complaint.findFirst({
+ where: { nhtsaId: complaint.nhtsaId },
+ select: { id: true },
+ });
+
+ if (existing) {
+ this.currentProgress.recordsSkipped++;
+ this.currentProgress.recordsProcessed++;
+ continue;
+ }
+ }
+
+ batch.push(complaint);
+
+ // Process batch when full
+ if (batch.length >= batchSize) {
+ await this.processBatch(batch);
+ batch = [];
+ this.currentProgress.batchNumber++;
+
+ // Update progress
+ this.updateProgress(startTime);
+
+ if (onProgress) {
+ onProgress({ ...this.currentProgress });
+ }
+ }
+ } catch (error) {
+ this.currentProgress.recordsErrored++;
+ errors.push(`Error processing record: ${error instanceof Error ? error.message : String(error)}`);
+ }
+
+ this.currentProgress.recordsProcessed++;
+ }
+
+ // Process remaining records
+ if (batch.length > 0) {
+ await this.processBatch(batch);
+ this.currentProgress.batchNumber++;
+ }
+
+ // Final progress update
+ this.updateProgress(startTime);
+
+ if (onProgress) {
+ onProgress({ ...this.currentProgress });
+ }
+
+ return {
+ success: true,
+ recordsProcessed: this.currentProgress.recordsProcessed,
+ recordsInserted: this.currentProgress.recordsInserted,
+ recordsSkipped: this.currentProgress.recordsSkipped,
+ recordsErrored: this.currentProgress.recordsErrored,
+ durationMs: Date.now() - startTime,
+ errors,
+ };
+ } catch (error) {
+ errors.push(`Import failed: ${error instanceof Error ? error.message : String(error)}`);
+
+ return {
+ success: false,
+ recordsProcessed: this.currentProgress.recordsProcessed,
+ recordsInserted: this.currentProgress.recordsInserted,
+ recordsSkipped: this.currentProgress.recordsSkipped,
+ recordsErrored: this.currentProgress.recordsErrored,
+ durationMs: Date.now() - startTime,
+ errors,
+ };
+ }
+ }
+
+ /**
+ * Process a batch of complaints
+ * @param batch - Array of complaints to insert
+ */
+ private async processBatch(batch: TransformedComplaint[]): Promise {
+ if (batch.length === 0) return;
+
+ try {
+ // Use createMany for efficient bulk insert
+ const result = await prisma.complaint.createMany({
+ data: batch.map((complaint) => ({
+ nhtsaId: complaint.nhtsaId,
+ odiNumber: complaint.odiNumber,
+ manufacturer: complaint.manufacturer,
+ make: complaint.make,
+ model: complaint.model,
+ year: complaint.year,
+ component: complaint.component,
+ description: complaint.description,
+ crash: complaint.crash,
+ fire: complaint.fire,
+ injuries: complaint.injuries,
+ deaths: complaint.deaths,
+ failDate: complaint.failDate,
+ dateAdded: complaint.dateAdded,
+ })),
+ skipDuplicates: true,
+ });
+
+ this.currentProgress.recordsInserted += result.count;
+ } catch (error) {
+ // If batch insert fails, try individual inserts
+ for (const complaint of batch) {
+ try {
+ await prisma.complaint.create({
+ data: {
+ nhtsaId: complaint.nhtsaId,
+ odiNumber: complaint.odiNumber,
+ manufacturer: complaint.manufacturer,
+ make: complaint.make,
+ model: complaint.model,
+ year: complaint.year,
+ component: complaint.component,
+ description: complaint.description,
+ crash: complaint.crash,
+ fire: complaint.fire,
+ injuries: complaint.injuries,
+ deaths: complaint.deaths,
+ failDate: complaint.failDate,
+ dateAdded: complaint.dateAdded,
+ },
+ });
+ this.currentProgress.recordsInserted++;
+ } catch {
+ this.currentProgress.recordsErrored++;
+ }
+ }
+ }
+ }
+
+ /**
+ * Update progress calculations
+ */
+ private updateProgress(startTime: number): void {
+ const elapsedMs = Date.now() - startTime;
+ this.currentProgress.elapsedMs = elapsedMs;
+
+ if (elapsedMs > 0) {
+ this.currentProgress.recordsPerSecond = Math.round(
+ (this.currentProgress.recordsProcessed / elapsedMs) * 1000
+ );
+ }
+
+ if (this.currentProgress.estimatedTotal > 0) {
+ this.currentProgress.percentComplete = Math.min(
+ 100,
+ Math.round(
+ (this.currentProgress.recordsProcessed / this.currentProgress.estimatedTotal) * 100
+ )
+ );
+ }
+ }
+
+ /**
+ * Get current import progress
+ * @returns Current progress snapshot
+ */
+ async getProgress(): Promise {
+ return { ...this.currentProgress };
+ }
+
+ /**
+ * Cancel an in-progress import
+ */
+ cancel(): void {
+ this.cancelled = true;
+ }
+}
+
+export default BulkImportService;
diff --git a/src/lib/nhtsa/flat-file-downloader.ts b/src/lib/nhtsa/flat-file-downloader.ts
new file mode 100644
index 0000000..b6f5c10
--- /dev/null
+++ b/src/lib/nhtsa/flat-file-downloader.ts
@@ -0,0 +1,242 @@
+/**
+ * NHTSA Flat File Downloader
+ * Downloads and extracts the FLAT_CMPL.zip file from NHTSA
+ * Uses streaming to handle the large file size (~1.5GB compressed)
+ */
+
+import { createWriteStream, createReadStream, existsSync } from 'fs';
+import { mkdir, rm, stat } from 'fs/promises';
+import path from 'path';
+import { Readable } from 'stream';
+import { pipeline } from 'stream/promises';
+import { createGunzip } from 'zlib';
+import { Extract } from 'unzipper';
+
+/**
+ * NHTSA flat file download URL
+ */
+export const NHTSA_FLAT_FILE_URL =
+ 'https://static.nhtsa.gov/odi/ffdd/cmpl/FLAT_CMPL.zip';
+
+/**
+ * Download progress information
+ */
+export interface DownloadProgress {
+ bytesDownloaded: number;
+ totalBytes: number;
+ percentage: number;
+}
+
+/**
+ * Download options
+ */
+export interface DownloadOptions {
+ onProgress?: (progress: DownloadProgress) => void;
+ url?: string;
+}
+
+/**
+ * Download result
+ */
+export interface DownloadResult {
+ success: boolean;
+ filePath: string;
+ size: number;
+ duration: number;
+}
+
+/**
+ * Extract result
+ */
+export interface ExtractResult {
+ success: boolean;
+ filePath: string;
+ size: number;
+}
+
+/**
+ * Download the NHTSA flat file zip to the specified directory
+ * Uses streaming to avoid loading the entire file into memory
+ * @param destDir - Destination directory for the download
+ * @param options - Download options including progress callback
+ * @returns Download result with file path and metadata
+ */
+export async function downloadFlatFile(
+ destDir: string,
+ options: DownloadOptions = {}
+): Promise {
+ const { onProgress, url = NHTSA_FLAT_FILE_URL } = options;
+ const startTime = Date.now();
+
+ // Ensure destination directory exists
+ await mkdir(destDir, { recursive: true });
+
+ const zipFileName = 'FLAT_CMPL.zip';
+ const zipFilePath = path.join(destDir, zipFileName);
+
+ // Initiate download
+ const response = await fetch(url);
+
+ if (!response.ok) {
+ throw new Error(`Failed to download: ${response.status} ${response.statusText}`);
+ }
+
+ const totalBytes = parseInt(response.headers.get('content-length') || '0', 10);
+ let bytesDownloaded = 0;
+
+ // Create write stream
+ const writeStream = createWriteStream(zipFilePath);
+
+ // Get reader from response body
+ const reader = response.body?.getReader();
+ if (!reader) {
+ throw new Error('No response body available');
+ }
+
+ // Stream the download
+ try {
+ while (true) {
+ const { done, value } = await reader.read();
+
+ if (done) {
+ break;
+ }
+
+ // Write chunk to file
+ writeStream.write(value);
+ bytesDownloaded += value.length;
+
+ // Report progress
+ if (onProgress && totalBytes > 0) {
+ onProgress({
+ bytesDownloaded,
+ totalBytes,
+ percentage: Math.round((bytesDownloaded / totalBytes) * 100),
+ });
+ }
+ }
+
+ // Close the write stream
+ writeStream.end();
+
+ // Wait for write to complete
+ await new Promise((resolve, reject) => {
+ writeStream.on('finish', resolve);
+ writeStream.on('error', reject);
+ });
+
+ // Final progress update
+ if (onProgress) {
+ onProgress({
+ bytesDownloaded,
+ totalBytes: totalBytes || bytesDownloaded,
+ percentage: 100,
+ });
+ }
+
+ const duration = Date.now() - startTime;
+
+ return {
+ success: true,
+ filePath: zipFilePath,
+ size: bytesDownloaded,
+ duration,
+ };
+ } catch (error) {
+ // Clean up partial file on error
+ try {
+ await rm(zipFilePath, { force: true });
+ } catch {
+ // Ignore cleanup errors
+ }
+ throw error;
+ }
+}
+
+/**
+ * Extract FLAT_CMPL.txt from the downloaded zip file
+ * @param zipPath - Path to the zip file
+ * @param destDir - Destination directory for extraction
+ * @returns Extract result with file path
+ */
+export async function extractFlatFile(
+ zipPath: string,
+ destDir: string
+): Promise {
+ // Check zip file exists
+ if (!existsSync(zipPath)) {
+ throw new Error(`Zip file not found: ${zipPath}`);
+ }
+
+ // Ensure destination directory exists
+ await mkdir(destDir, { recursive: true });
+
+ const extractedPath = path.join(destDir, 'FLAT_CMPL.txt');
+
+ // Extract using unzipper
+ await pipeline(
+ createReadStream(zipPath),
+ Extract({ path: destDir })
+ );
+
+ // Verify extraction
+ if (!existsSync(extractedPath)) {
+ throw new Error('Extraction failed: FLAT_CMPL.txt not found');
+ }
+
+ const stats = await stat(extractedPath);
+
+ return {
+ success: true,
+ filePath: extractedPath,
+ size: stats.size,
+ };
+}
+
+/**
+ * Get a readable stream for the flat file
+ * Can be used with parseFlatFileStream for processing
+ * @param filePath - Path to the FLAT_CMPL.txt file
+ * @returns Readable stream
+ */
+export function getFlatFileStream(filePath: string): Readable {
+ if (!existsSync(filePath)) {
+ throw new Error(`File not found: ${filePath}`);
+ }
+
+ return createReadStream(filePath, { encoding: 'utf-8' });
+}
+
+/**
+ * Download and extract the flat file in one operation
+ * @param workDir - Working directory for download and extraction
+ * @param options - Download options
+ * @returns Path to the extracted FLAT_CMPL.txt
+ */
+export async function downloadAndExtract(
+ workDir: string,
+ options: DownloadOptions = {}
+): Promise {
+ // Download
+ const downloadResult = await downloadFlatFile(workDir, options);
+
+ // Extract
+ const extractResult = await extractFlatFile(downloadResult.filePath, workDir);
+
+ // Clean up zip file to save space
+ try {
+ await rm(downloadResult.filePath, { force: true });
+ } catch {
+ // Ignore cleanup errors
+ }
+
+ return extractResult.filePath;
+}
+
+export default {
+ downloadFlatFile,
+ extractFlatFile,
+ getFlatFileStream,
+ downloadAndExtract,
+ NHTSA_FLAT_FILE_URL,
+};
diff --git a/src/lib/nhtsa/flat-file-parser.ts b/src/lib/nhtsa/flat-file-parser.ts
new file mode 100644
index 0000000..c4112b9
--- /dev/null
+++ b/src/lib/nhtsa/flat-file-parser.ts
@@ -0,0 +1,236 @@
+/**
+ * NHTSA Flat File Parser
+ * Parses the FLAT_CMPL.txt file from NHTSA
+ * Format: Tab-delimited with 20+ fields per record
+ */
+
+import { Readable } from 'stream';
+import { createInterface } from 'readline';
+import { TransformedComplaint } from './types';
+
+/**
+ * Column indices for the NHTSA flat file format
+ * These match the order of fields in FLAT_CMPL.txt
+ * Note: The flat file has no ODINO column - we use CMPLID as odiNumber
+ */
+export const FLAT_FILE_COLUMNS = {
+ CMPLID: 0, // Unique complaint ID (also used as odiNumber)
+ ODINO: 1, // Index 1 is actually MFR_NAME in the file, but kept for API compatibility
+ MFR_NAME: 1, // Manufacturer name
+ MAKETXT: 2, // Vehicle make
+ MODELTXT: 3, // Vehicle model
+ YEARTXT: 4, // Model year
+ CRASH: 5, // Crash indicator (Y/N)
+ FAILDATE: 6, // Date of failure (YYYYMMDD)
+ FIRE: 7, // Fire indicator (Y/N)
+ INJURED: 8, // Number injured
+ DEATHS: 9, // Number of deaths
+ COMPDESC: 10, // Component description
+ CITY: 11, // City
+ STATE: 12, // State
+ VIN: 13, // Partial VIN
+ DATEA: 14, // Date added (YYYYMMDD)
+ LDATE: 15, // Last update date (YYYYMMDD)
+ CDESCR: 17, // Complaint description (index 16 is empty)
+} as const;
+
+/**
+ * Minimum number of fields required for a valid record
+ */
+const MIN_FIELDS = 18;
+
+/**
+ * Flat file record type (raw parsed data)
+ */
+export interface FlatFileRecord {
+ cmplid: string;
+ odino: string;
+ mfr_name: string;
+ maketxt: string;
+ modeltxt: string;
+ yeartxt: string;
+ crash: string;
+ faildate: string;
+ fire: string;
+ injured: string;
+ deaths: string;
+ compdesc: string;
+ city: string;
+ state: string;
+ vin: string;
+ datea: string;
+ ldate: string;
+ cdescr: string;
+}
+
+/**
+ * Progress callback options
+ */
+export interface ParseOptions {
+ onProgress?: (processed: number, errors: number) => void;
+ progressInterval?: number; // Report progress every N records
+}
+
+/**
+ * Parse a single tab-delimited line from the flat file
+ * @param line - Raw line from the flat file
+ * @returns Parsed record or null if invalid
+ */
+export function parseFlatFileLine(line: string): FlatFileRecord | null {
+ if (!line || line.trim().length === 0) {
+ return null;
+ }
+
+ const fields = line.split('\t');
+
+ // Must have at least MIN_FIELDS
+ if (fields.length < MIN_FIELDS) {
+ return null;
+ }
+
+ // Extract and trim all fields
+ const cmplid = fields[FLAT_FILE_COLUMNS.CMPLID]?.trim() || '';
+ return {
+ cmplid,
+ odino: cmplid, // NHTSA flat file doesn't have separate ODINO, use CMPLID
+ mfr_name: fields[FLAT_FILE_COLUMNS.MFR_NAME]?.trim() || '',
+ maketxt: fields[FLAT_FILE_COLUMNS.MAKETXT]?.trim() || '',
+ modeltxt: fields[FLAT_FILE_COLUMNS.MODELTXT]?.trim() || '',
+ yeartxt: fields[FLAT_FILE_COLUMNS.YEARTXT]?.trim() || '',
+ crash: fields[FLAT_FILE_COLUMNS.CRASH]?.trim() || '',
+ faildate: fields[FLAT_FILE_COLUMNS.FAILDATE]?.trim() || '',
+ fire: fields[FLAT_FILE_COLUMNS.FIRE]?.trim() || '',
+ injured: fields[FLAT_FILE_COLUMNS.INJURED]?.trim() || '',
+ deaths: fields[FLAT_FILE_COLUMNS.DEATHS]?.trim() || '',
+ compdesc: fields[FLAT_FILE_COLUMNS.COMPDESC]?.trim() || '',
+ city: fields[FLAT_FILE_COLUMNS.CITY]?.trim() || '',
+ state: fields[FLAT_FILE_COLUMNS.STATE]?.trim() || '',
+ vin: fields[FLAT_FILE_COLUMNS.VIN]?.trim() || '',
+ datea: fields[FLAT_FILE_COLUMNS.DATEA]?.trim() || '',
+ ldate: fields[FLAT_FILE_COLUMNS.LDATE]?.trim() || '',
+ cdescr: fields[FLAT_FILE_COLUMNS.CDESCR]?.trim() || '',
+ };
+}
+
+/**
+ * Parse a stream of flat file lines into records
+ * Uses async generator for memory-efficient processing
+ * @param stream - Readable stream of file content
+ * @param options - Parse options including progress callback
+ */
+export async function* parseFlatFileStream(
+ stream: Readable,
+ options: ParseOptions = {}
+): AsyncGenerator {
+ const { onProgress, progressInterval = 1000 } = options;
+
+ const rl = createInterface({
+ input: stream,
+ crlfDelay: Infinity,
+ });
+
+ let processed = 0;
+ let errors = 0;
+
+ for await (const line of rl) {
+ const record = parseFlatFileLine(line);
+
+ if (record) {
+ processed++;
+ yield record;
+ } else if (line.trim().length > 0) {
+ // Count non-empty lines that failed to parse as errors
+ errors++;
+ }
+
+ // Report progress
+ if (onProgress && processed % progressInterval === 0) {
+ onProgress(processed, errors);
+ }
+ }
+
+ // Final progress report
+ if (onProgress) {
+ onProgress(processed, errors);
+ }
+}
+
+/**
+ * Parse NHTSA date format (YYYYMMDD) to Date object
+ * @param dateStr - Date string in YYYYMMDD format
+ * @returns Date object or null if invalid
+ */
+function parseNHTSADate(dateStr: string): Date | null {
+ if (!dateStr || dateStr.length < 8) {
+ return null;
+ }
+
+ const year = parseInt(dateStr.substring(0, 4), 10);
+ const month = parseInt(dateStr.substring(4, 6), 10) - 1; // 0-indexed
+ const day = parseInt(dateStr.substring(6, 8), 10);
+
+ if (isNaN(year) || isNaN(month) || isNaN(day)) {
+ return null;
+ }
+
+ const date = new Date(year, month, day);
+
+ // Validate the date is reasonable (1900-2100)
+ if (date.getFullYear() < 1900 || date.getFullYear() > 2100) {
+ return null;
+ }
+
+ return date;
+}
+
+/**
+ * Parse year string to number
+ * @param yearStr - Year string
+ * @returns Year as number or null if invalid
+ */
+function parseYear(yearStr: string): number | null {
+ const year = parseInt(yearStr, 10);
+
+ if (isNaN(year)) {
+ return null;
+ }
+
+ // Valid year range for vehicles
+ const currentYear = new Date().getFullYear();
+ if (year < 1900 || year > currentYear + 2) {
+ return null;
+ }
+
+ return year;
+}
+
+/**
+ * Map a flat file record to our TransformedComplaint format
+ * @param record - Raw flat file record
+ * @returns Transformed complaint ready for database insertion
+ */
+export function mapFlatFileToComplaint(record: FlatFileRecord): TransformedComplaint {
+ return {
+ nhtsaId: record.cmplid,
+ odiNumber: record.odino || record.cmplid, // Use cmplid as fallback
+ manufacturer: record.mfr_name,
+ make: record.maketxt,
+ model: record.modeltxt,
+ year: parseYear(record.yeartxt),
+ component: record.compdesc,
+ description: record.cdescr,
+ crash: record.crash.toUpperCase() === 'Y',
+ fire: record.fire.toUpperCase() === 'Y',
+ injuries: parseInt(record.injured, 10) || 0,
+ deaths: parseInt(record.deaths, 10) || 0,
+ failDate: parseNHTSADate(record.faildate),
+ dateAdded: parseNHTSADate(record.datea) || new Date(),
+ };
+}
+
+export default {
+ parseFlatFileLine,
+ parseFlatFileStream,
+ mapFlatFileToComplaint,
+ FLAT_FILE_COLUMNS,
+};