NexGenStudioDev · morningstarxcdcode · Jan 4, 2026 · Copilot · Jan 4, 2026 · Copilot
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.controller.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.controller.ts
@@ -0,0 +1,243 @@
+import { Request, Response, NextFunction } from 'express'
+import TrainingDatasetService from './TrainingDataset.service'
+import * as multer from 'multer'
+import * as path from 'path'
+import * as os from 'os'
+
+// Configure multer for file uploads
+const uploadDir = path.join(os.tmpdir(), 'training-datasets')
+const storage = multer.diskStorage({
+  destination: (req, file, cb) => {
+    cb(null, uploadDir)
+  },
+  filename: (req, file, cb) => {
+    const timestamp = Date.now()
+    const random = Math.random().toString(36).substring(7)
+    cb(null, `${timestamp}-${random}-${file.originalname}`)
+  },
+})
+
+const upload = multer({
+  storage,
+  limits: { fileSize: 100 * 1024 * 1024 }, // 100MB max
+  fileFilter: (req, file, cb) => {
+    const allowedMimes = ['text/csv', 'application/json', 'text/markdown', 'text/plain']
+    const allowedExts = ['.csv', '.json', '.md', '.markdown', '.txt']
+
+    const ext = path.extname(file.originalname).toLowerCase()
+    const isMimeAllowed = allowedMimes.includes(file.mimetype)
+    const isExtAllowed = allowedExts.includes(ext)
+
+    if (isMimeAllowed || isExtAllowed) {
+      cb(null, true)
+    } else {
+      cb(new Error(`File type not supported. Allowed: ${allowedExts.join(', ')}`))
+    }
+  },
+})
+
+class TrainingDatasetController {
+  /**
+   * POST /api/v1/training-datasets/upload
+   * Upload and process a training dataset file
+   */
+  async uploadDataset(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      if (!req.file) {
+        res.status(400).json({ error: 'No file provided' })
+        return
+      }
+
+      const { name, description } = req.body
+
+      if (!name) {
+        res.status(400).json({ error: 'Dataset name is required' })
+        return
+      }
+
+      // Determine file type from extension
+      const ext = path.extname(req.file.originalname).toLowerCase()
+      let fileType: 'csv' | 'json' | 'markdown' | 'text'
+
+      switch (ext) {
+        case '.csv':
+          fileType = 'csv'
+          break
+        case '.json':
+          fileType = 'json'
+          break
+        case '.md':
+        case '.markdown':
+          fileType = 'markdown'
+          break
+        case '.txt':
+          fileType = 'text'
+          break
+        default:
+          res.status(400).json({ error: 'Unsupported file type' })
+          return
+      }
+
+      // Create dataset record
+      const dataset = await TrainingDatasetService.createDataset(
+        userId,
+        req.file.originalname,
+        fileType,
+        req.file.size,
+        name,
+        description
+      )
+
+      // Process dataset asynchronously
+      TrainingDatasetService.processDataset(dataset._id.toString(), userId, req.file.path).catch(error => {
+        console.error('Error processing dataset:', error)
+      })
+
+      res.status(201).json({
+        success: true,
+        data: dataset,
+        message: 'Dataset uploaded successfully and is being processed',
+      })
+    } catch (error: any) {
+      console.error('Error uploading dataset:', error)
+      res.status(500).json({ error: error.message || 'Failed to upload dataset' })
+    }
+  }
+
+  /**
+   * GET /api/v1/training-datasets
+   * Get all datasets for a user
+   */
+  async getDatasets(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const { skip = 0, limit = 20 } = req.query
+
+      const result = await TrainingDatasetService.getDatasets(
+        userId,
+        parseInt(skip as string) || 0,
+        parseInt(limit as string) || 20
+      )
+
+      res.status(200).json({
+        success: true,
+        data: result.datasets,
+        pagination: {
+          skip: parseInt(skip as string) || 0,
+          limit: parseInt(limit as string) || 20,
+          total: result.total,
+        },
+      })
+    } catch (error) {
+      console.error('Error fetching datasets:', error)
+      res.status(500).json({ error: 'Failed to fetch datasets' })
+    }
+  }
+
+  /**
+   * GET /api/v1/training-datasets/:id
+   * Get dataset by ID
+   */
+  async getDataset(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      const { id } = req.params
+
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const dataset = await TrainingDatasetService.getDataset(id, userId)
+
+      if (!dataset) {
+        res.status(404).json({ error: 'Dataset not found' })
+        return
+      }
+
+      res.status(200).json({
+        success: true,
+        data: dataset,
+      })
+    } catch (error) {
+      console.error('Error fetching dataset:', error)
+      res.status(500).json({ error: 'Failed to fetch dataset' })
+    }
+  }
+
+  /**
+   * DELETE /api/v1/training-datasets/:id
+   * Delete dataset
+   */
+  async deleteDataset(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      const { id } = req.params
+      const { deleteSamples = false } = req.query
+
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const success = await TrainingDatasetService.deleteDataset(id, userId, deleteSamples === 'true')
+
+      if (!success) {
+        res.status(404).json({ error: 'Dataset not found' })
+        return
+      }
+
+      res.status(200).json({
+        success: true,
+        message: 'Dataset deleted successfully',
+      })
+    } catch (error) {
+      console.error('Error deleting dataset:', error)
+      res.status(500).json({ error: 'Failed to delete dataset' })
+    }
+  }
+
+  /**
+   * GET /api/v1/training-datasets/stats
+   * Get dataset statistics
+   */
+  async getStatistics(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const stats = await TrainingDatasetService.getStatistics(userId)
+
+      res.status(200).json({
+        success: true,
+        data: stats,
+      })
+    } catch (error) {
+      console.error('Error fetching statistics:', error)
+      res.status(500).json({ error: 'Failed to fetch statistics' })
+    }
+  }
+
+  /**
+   * Get multer upload middleware
+   */
+  getUploadMiddleware() {
+    return upload.single('file')
+  }
+}
+
+export default new TrainingDatasetController()
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.model.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.model.ts
@@ -0,0 +1,87 @@
+import { Schema, Document, Types, model } from 'mongoose'
+
+export interface ITrainingDataset extends Document {
+  userId: Types.ObjectId
+  name: string
+  description?: string
+  fileName: string
+  fileType: 'csv' | 'json' | 'markdown' | 'text'
+  fileSize: number // in bytes
+  sampleCount: number
+  status: 'pending' | 'processing' | 'completed' | 'failed'
+  errorMessage?: string
+  importedAt?: Date
+  isActive: boolean
+  metadata?: {
+    headers?: string[]
+    delimiter?: string
+    encoding?: string
+  }
+  createdAt: Date
+  updatedAt: Date
+}
+
+const TrainingDatasetSchema = new Schema<ITrainingDataset>(
+  {
+    userId: {
+      type: Schema.Types.ObjectId,
+      ref: 'User',
+      required: true,
+    },
+    name: {
+      type: String,
+      required: true,
+      maxlength: 255,
+    },
+    description: {
+      type: String,
+      maxlength: 1000,
+    },
+    fileName: {
+      type: String,
+      required: true,
+      index: true,
+    },
+    fileType: {
+      type: String,
+      enum: ['csv', 'json', 'markdown', 'text'],
+      required: true,
+    },
+    fileSize: {
+      type: Number,
+      required: true,
+    },
+    sampleCount: {
+      type: Number,
+      default: 0,
+    },
+    status: {
+      type: String,
+      enum: ['pending', 'processing', 'completed', 'failed'],
+      default: 'pending',
+      index: true,
+    },
+    errorMessage: String,
+    importedAt: Date,
+    isActive: {
+      type: Boolean,
+      default: true,
+      index: true,
+    },
+    metadata: {
+      headers: [String],
+      delimiter: String,
+      encoding: String,
+    },
+  },
+  {
+    timestamps: true,
+  }
+)
+
+// Compound indices for common queries
+TrainingDatasetSchema.index({ userId: 1, status: 1 })
+TrainingDatasetSchema.index({ userId: 1, isActive: 1 })
+TrainingDatasetSchema.index({ userId: 1, createdAt: -1 })
+
+export const TrainingDataset = model<ITrainingDataset>('TrainingDataset', TrainingDatasetSchema)
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.routes.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.routes.ts
@@ -0,0 +1,52 @@
+import { Router } from 'express'
+import TrainingDatasetController from './TrainingDataset.controller'
+
+const router = Router()
+
+/**
+ * Training Dataset Routes
+ * All routes require authentication
+ */
+
+/**
+ * POST /api/v1/training-datasets/upload
+ * Upload a training dataset file
+ * Supports: CSV, JSON, Markdown, Text files
+ * Max file size: 100MB
+ */
+router.post(
+  '/upload',
+  TrainingDatasetController.getUploadMiddleware(),
+  TrainingDatasetController.uploadDataset.bind(TrainingDatasetController)
+)
+
+/**
+ * GET /api/v1/training-datasets
+ * Get all datasets for the user
+ * Query params:
+ *   - skip: number (default: 0)
+ *   - limit: number (default: 20)
+ */
+router.get('/', TrainingDatasetController.getDatasets.bind(TrainingDatasetController))
+
+/**
+ * GET /api/v1/training-datasets/stats
+ * Get statistics about datasets
+ */
+router.get('/stats', TrainingDatasetController.getStatistics.bind(TrainingDatasetController))
+
+/**
+ * GET /api/v1/training-datasets/:id
+ * Get a single dataset
+ */
+router.get('/:id', TrainingDatasetController.getDataset.bind(TrainingDatasetController))
+
+/**
+ * DELETE /api/v1/training-datasets/:id
+ * Delete a dataset
+ * Query params:
+ *   - deleteSamples: boolean (default: false) - whether to delete associated samples
+ */
+router.delete('/:id', TrainingDatasetController.deleteDataset.bind(TrainingDatasetController))
+
+export default router