diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.controller.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.controller.ts
new file mode 100644
index 0000000..f671222
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.controller.ts
@@ -0,0 +1,243 @@
+import { Request, Response, NextFunction } from 'express'
+import TrainingDatasetService from './TrainingDataset.service'
+import * as multer from 'multer'
+import * as path from 'path'
+import * as os from 'os'
+
+// Configure multer for file uploads
+const uploadDir = path.join(os.tmpdir(), 'training-datasets')
+const storage = multer.diskStorage({
+  destination: (req, file, cb) => {
+    cb(null, uploadDir)
+  },
+  filename: (req, file, cb) => {
+    const timestamp = Date.now()
+    const random = Math.random().toString(36).substring(7)
+    cb(null, `${timestamp}-${random}-${file.originalname}`)
+  },
+})
+
+const upload = multer({
+  storage,
+  limits: { fileSize: 100 * 1024 * 1024 }, // 100MB max
+  fileFilter: (req, file, cb) => {
+    const allowedMimes = ['text/csv', 'application/json', 'text/markdown', 'text/plain']
+    const allowedExts = ['.csv', '.json', '.md', '.markdown', '.txt']
+
+    const ext = path.extname(file.originalname).toLowerCase()
+    const isMimeAllowed = allowedMimes.includes(file.mimetype)
+    const isExtAllowed = allowedExts.includes(ext)
+
+    if (isMimeAllowed || isExtAllowed) {
+      cb(null, true)
+    } else {
+      cb(new Error(`File type not supported. Allowed: ${allowedExts.join(', ')}`))
+    }
+  },
+})
+
+class TrainingDatasetController {
+  /**
+   * POST /api/v1/training-datasets/upload
+   * Upload and process a training dataset file
+   */
+  async uploadDataset(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      if (!req.file) {
+        res.status(400).json({ error: 'No file provided' })
+        return
+      }
+
+      const { name, description } = req.body
+
+      if (!name) {
+        res.status(400).json({ error: 'Dataset name is required' })
+        return
+      }
+
+      // Determine file type from extension
+      const ext = path.extname(req.file.originalname).toLowerCase()
+      let fileType: 'csv' | 'json' | 'markdown' | 'text'
+
+      switch (ext) {
+        case '.csv':
+          fileType = 'csv'
+          break
+        case '.json':
+          fileType = 'json'
+          break
+        case '.md':
+        case '.markdown':
+          fileType = 'markdown'
+          break
+        case '.txt':
+          fileType = 'text'
+          break
+        default:
+          res.status(400).json({ error: 'Unsupported file type' })
+          return
+      }
+
+      // Create dataset record
+      const dataset = await TrainingDatasetService.createDataset(
+        userId,
+        req.file.originalname,
+        fileType,
+        req.file.size,
+        name,
+        description
+      )
+
+      // Process dataset asynchronously
+      TrainingDatasetService.processDataset(dataset._id.toString(), userId, req.file.path).catch(error => {
+        console.error('Error processing dataset:', error)
+      })
+
+      res.status(201).json({
+        success: true,
+        data: dataset,
+        message: 'Dataset uploaded successfully and is being processed',
+      })
+    } catch (error: any) {
+      console.error('Error uploading dataset:', error)
+      res.status(500).json({ error: error.message || 'Failed to upload dataset' })
+    }
+  }
+
+  /**
+   * GET /api/v1/training-datasets
+   * Get all datasets for a user
+   */
+  async getDatasets(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const { skip = 0, limit = 20 } = req.query
+
+      const result = await TrainingDatasetService.getDatasets(
+        userId,
+        parseInt(skip as string) || 0,
+        parseInt(limit as string) || 20
+      )
+
+      res.status(200).json({
+        success: true,
+        data: result.datasets,
+        pagination: {
+          skip: parseInt(skip as string) || 0,
+          limit: parseInt(limit as string) || 20,
+          total: result.total,
+        },
+      })
+    } catch (error) {
+      console.error('Error fetching datasets:', error)
+      res.status(500).json({ error: 'Failed to fetch datasets' })
+    }
+  }
+
+  /**
+   * GET /api/v1/training-datasets/:id
+   * Get dataset by ID
+   */
+  async getDataset(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      const { id } = req.params
+
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const dataset = await TrainingDatasetService.getDataset(id, userId)
+
+      if (!dataset) {
+        res.status(404).json({ error: 'Dataset not found' })
+        return
+      }
+
+      res.status(200).json({
+        success: true,
+        data: dataset,
+      })
+    } catch (error) {
+      console.error('Error fetching dataset:', error)
+      res.status(500).json({ error: 'Failed to fetch dataset' })
+    }
+  }
+
+  /**
+   * DELETE /api/v1/training-datasets/:id
+   * Delete dataset
+   */
+  async deleteDataset(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      const { id } = req.params
+      const { deleteSamples = false } = req.query
+
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const success = await TrainingDatasetService.deleteDataset(id, userId, deleteSamples === 'true')
+
+      if (!success) {
+        res.status(404).json({ error: 'Dataset not found' })
+        return
+      }
+
+      res.status(200).json({
+        success: true,
+        message: 'Dataset deleted successfully',
+      })
+    } catch (error) {
+      console.error('Error deleting dataset:', error)
+      res.status(500).json({ error: 'Failed to delete dataset' })
+    }
+  }
+
+  /**
+   * GET /api/v1/training-datasets/stats
+   * Get dataset statistics
+   */
+  async getStatistics(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const stats = await TrainingDatasetService.getStatistics(userId)
+
+      res.status(200).json({
+        success: true,
+        data: stats,
+      })
+    } catch (error) {
+      console.error('Error fetching statistics:', error)
+      res.status(500).json({ error: 'Failed to fetch statistics' })
+    }
+  }
+
+  /**
+   * Get multer upload middleware
+   */
+  getUploadMiddleware() {
+    return upload.single('file')
+  }
+}
+
+export default new TrainingDatasetController()
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.model.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.model.ts
new file mode 100644
index 0000000..a9ab41e
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.model.ts
@@ -0,0 +1,87 @@
+import { Schema, Document, Types, model } from 'mongoose'
+
+export interface ITrainingDataset extends Document {
+  userId: Types.ObjectId
+  name: string
+  description?: string
+  fileName: string
+  fileType: 'csv' | 'json' | 'markdown' | 'text'
+  fileSize: number // in bytes
+  sampleCount: number
+  status: 'pending' | 'processing' | 'completed' | 'failed'
+  errorMessage?: string
+  importedAt?: Date
+  isActive: boolean
+  metadata?: {
+    headers?: string[]
+    delimiter?: string
+    encoding?: string
+  }
+  createdAt: Date
+  updatedAt: Date
+}
+
+const TrainingDatasetSchema = new Schema<ITrainingDataset>(
+  {
+    userId: {
+      type: Schema.Types.ObjectId,
+      ref: 'User',
+      required: true,
+    },
+    name: {
+      type: String,
+      required: true,
+      maxlength: 255,
+    },
+    description: {
+      type: String,
+      maxlength: 1000,
+    },
+    fileName: {
+      type: String,
+      required: true,
+      index: true,
+    },
+    fileType: {
+      type: String,
+      enum: ['csv', 'json', 'markdown', 'text'],
+      required: true,
+    },
+    fileSize: {
+      type: Number,
+      required: true,
+    },
+    sampleCount: {
+      type: Number,
+      default: 0,
+    },
+    status: {
+      type: String,
+      enum: ['pending', 'processing', 'completed', 'failed'],
+      default: 'pending',
+      index: true,
+    },
+    errorMessage: String,
+    importedAt: Date,
+    isActive: {
+      type: Boolean,
+      default: true,
+      index: true,
+    },
+    metadata: {
+      headers: [String],
+      delimiter: String,
+      encoding: String,
+    },
+  },
+  {
+    timestamps: true,
+  }
+)
+
+// Compound indices for common queries
+TrainingDatasetSchema.index({ userId: 1, status: 1 })
+TrainingDatasetSchema.index({ userId: 1, isActive: 1 })
+TrainingDatasetSchema.index({ userId: 1, createdAt: -1 })
+
+export const TrainingDataset = model<ITrainingDataset>('TrainingDataset', TrainingDatasetSchema)
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.routes.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.routes.ts
new file mode 100644
index 0000000..24f97bf
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.routes.ts
@@ -0,0 +1,52 @@
+import { Router } from 'express'
+import TrainingDatasetController from './TrainingDataset.controller'
+
+const router = Router()
+
+/**
+ * Training Dataset Routes
+ * All routes require authentication
+ */
+
+/**
+ * POST /api/v1/training-datasets/upload
+ * Upload a training dataset file
+ * Supports: CSV, JSON, Markdown, Text files
+ * Max file size: 100MB
+ */
+router.post(
+  '/upload',
+  TrainingDatasetController.getUploadMiddleware(),
+  TrainingDatasetController.uploadDataset.bind(TrainingDatasetController)
+)
+
+/**
+ * GET /api/v1/training-datasets
+ * Get all datasets for the user
+ * Query params:
+ *   - skip: number (default: 0)
+ *   - limit: number (default: 20)
+ */
+router.get('/', TrainingDatasetController.getDatasets.bind(TrainingDatasetController))
+
+/**
+ * GET /api/v1/training-datasets/stats
+ * Get statistics about datasets
+ */
+router.get('/stats', TrainingDatasetController.getStatistics.bind(TrainingDatasetController))
+
+/**
+ * GET /api/v1/training-datasets/:id
+ * Get a single dataset
+ */
+router.get('/:id', TrainingDatasetController.getDataset.bind(TrainingDatasetController))
+
+/**
+ * DELETE /api/v1/training-datasets/:id
+ * Delete a dataset
+ * Query params:
+ *   - deleteSamples: boolean (default: false) - whether to delete associated samples
+ */
+router.delete('/:id', TrainingDatasetController.deleteDataset.bind(TrainingDatasetController))
+
+export default router
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.service.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.service.ts
new file mode 100644
index 0000000..f8678d7
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingDataset.service.ts
@@ -0,0 +1,218 @@
+import { Types } from 'mongoose'
+import { TrainingDataset, ITrainingDataset } from './TrainingDataset.model'
+import { TrainingSample } from './TrainingSample.model'
+import TrainingSampleFileParser, { ParsedSample } from './TrainingSample.utils'
+import EmbeddingUtils from './TrainingSample.embedding'
+import * as fs from 'fs'
+
+class TrainingDatasetService {
+  /**
+   * Create a new dataset record
+   */
+  async createDataset(
+    userId: string,
+    fileName: string,
+    fileType: 'csv' | 'json' | 'markdown' | 'text',
+    fileSize: number,
+    name: string,
+    description?: string
+  ): Promise<ITrainingDataset> {
+    const dataset = await TrainingDataset.create({
+      userId: new Types.ObjectId(userId),
+      fileName,
+      fileType,
+      fileSize,
+      name,
+      description,
+      status: 'pending',
+    })
+
+    return dataset
+  }
+
+  /**
+   * Process a dataset file and create training samples
+   */
+  async processDataset(datasetId: string, userId: string, filePath: string): Promise<number> {
+    const dataset = await TrainingDataset.findOne({
+      _id: datasetId,
+      userId: new Types.ObjectId(userId),
+    })
+
+    if (!dataset) {
+      throw new Error('Dataset not found')
+    }
+
+    try {
+      // Update status to processing
+      await TrainingDataset.updateOne({ _id: datasetId }, { status: 'processing' })
+
+      // Parse file
+      const parsedSamples = await TrainingSampleFileParser.parseFile(filePath)
+
+      // Validate samples
+      const { valid: validSamples, errors } = TrainingSampleFileParser.validateSamples(parsedSamples)
+
+      if (validSamples.length === 0) {
+        throw new Error(`No valid samples found. Errors: ${errors.join('; ')}`)
+      }
+
+      // Generate embeddings and create samples
+      const createdSamples = []
+      for (const sample of validSamples) {
+        try {
+          const embeddingText = `${sample.question} ${sample.answerTemplate.answer}`
+          const { embedding } = await EmbeddingUtils.generateEmbedding(embeddingText)
+
+          const trainingSample = await TrainingSample.create({
+            userId: new Types.ObjectId(userId),
+            ...sample,
+            embedding,
+          })
+
+          createdSamples.push(trainingSample)
+        } catch (error) {
+          console.error('Error creating sample:', sample.question, error)
+          // Continue with next sample
+        }
+      }
+
+      // Update dataset with results
+      await TrainingDataset.updateOne(
+        { _id: datasetId },
+        {
+          status: 'completed',
+          sampleCount: createdSamples.length,
+          importedAt: new Date(),
+          errorMessage: errors.length > 0 ? errors.slice(0, 5).join('; ') : undefined,
+        }
+      )
+
+      // Clean up temporary file
+      if (fs.existsSync(filePath)) {
+        fs.unlinkSync(filePath)
+      }
+
+      return createdSamples.length
+    } catch (error: any) {
+      console.error('Error processing dataset:', error)
+
+      await TrainingDataset.updateOne(
+        { _id: datasetId },
+        {
+          status: 'failed',
+          errorMessage: error.message,
+        }
+      )
+
+      throw error
+    }
+  }
+
+  /**
+   * Get all datasets for a user
+   */
+  async getDatasets(
+    userId: string,
+    skip: number = 0,
+    limit: number = 20
+  ): Promise<{ datasets: ITrainingDataset[]; total: number }> {
+    const [datasets, total] = await Promise.all([
+      TrainingDataset.find({ userId: new Types.ObjectId(userId), isActive: true })
+        .skip(skip)
+        .limit(limit)
+        .sort({ createdAt: -1 })
+        .exec(),
+      TrainingDataset.countDocuments({
+        userId: new Types.ObjectId(userId),
+        isActive: true,
+      }),
+    ])
+
+    return { datasets, total }
+  }
+
+  /**
+   * Get dataset by ID
+   */
+  async getDataset(datasetId: string, userId: string): Promise<ITrainingDataset | null> {
+    return await TrainingDataset.findOne({
+      _id: datasetId,
+      userId: new Types.ObjectId(userId),
+    })
+  }
+
+  /**
+   * Delete dataset (and optionally associated samples)
+   */
+  async deleteDataset(datasetId: string, userId: string, deletesSamples: boolean = false): Promise<boolean> {
+    const dataset = await this.getDataset(datasetId, userId)
+
+    if (!dataset) {
+      return false
+    }
+
+    if (deletesSamples) {
+      // Delete all samples associated with this dataset
+      await TrainingSample.deleteMany({
+        sourceType: 'dataset',
+        userId: new Types.ObjectId(userId),
+      })
+    }
+
+    await TrainingDataset.updateOne({ _id: datasetId }, { isActive: false })
+
+    return true
+  }
+
+  /**
+   * Get dataset statistics
+   */
+  async getStatistics(userId: string): Promise<{
+    totalDatasets: number
+    completedDatasets: number
+    failedDatasets: number
+    totalSamples: number
+    byFileType: Record<string, number>
+  }> {
+    const userIdObj = new Types.ObjectId(userId)
+
+    const [totalDatasets, completedDatasets, failedDatasets, totalSamples, byFileType] = await Promise.all([
+      TrainingDataset.countDocuments({ userId: userIdObj, isActive: true }),
+      TrainingDataset.countDocuments({
+        userId: userIdObj,
+        status: 'completed',
+        isActive: true,
+      }),
+      TrainingDataset.countDocuments({
+        userId: userIdObj,
+        status: 'failed',
+        isActive: true,
+      }),
+      TrainingSample.countDocuments({
+        userId: userIdObj,
+        sourceType: 'dataset',
+        isActive: true,
+      }),
+      TrainingDataset.aggregate([
+        { $match: { userId: userIdObj, isActive: true } },
+        { $group: { _id: '$fileType', count: { $sum: 1 } } },
+      ]),
+    ])
+
+    const byFileTypeObj: Record<string, number> = {}
+    byFileType.forEach((item: any) => {
+      byFileTypeObj[item._id] = item.count
+    })
+
+    return {
+      totalDatasets,
+      completedDatasets,
+      failedDatasets,
+      totalSamples,
+      byFileType: byFileTypeObj,
+    }
+  }
+}
+
+export default new TrainingDatasetService()
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.controller.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.controller.ts
new file mode 100644
index 0000000..fd19402
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.controller.ts
@@ -0,0 +1,261 @@
+import { Request, Response, NextFunction } from 'express'
+import TrainingSampleService from './TrainingSample.service'
+import { createTrainingSampleSchema, updateTrainingSampleSchema, vectorSearchSchema } from './TrainingSample.validator'
+
+class TrainingSampleController {
+  /**
+   * POST /api/v1/training-samples
+   * Create a new training sample
+   */
+  async createTrainingSample(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      // Validate request body
+      const validatedData = createTrainingSampleSchema.parse(req.body)
+
+      const sample = await TrainingSampleService.createSample(userId, validatedData)
+
+      res.status(201).json({
+        success: true,
+        data: sample,
+        message: 'Training sample created successfully',
+      })
+    } catch (error: any) {
+      if (error.name === 'ZodError') {
+        res.status(400).json({
+          error: 'Validation error',
+          details: error.errors,
+        })
+      } else {
+        console.error('Error creating training sample:', error)
+        res.status(500).json({ error: 'Failed to create training sample' })
+      }
+    }
+  }
+
+  /**
+   * GET /api/v1/training-samples
+   * Get all training samples with filtering and pagination
+   */
+  async getSamples(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const {
+        type,
+        tags,
+        sourceType,
+        isActive = true,
+        language,
+        skip = 0,
+        limit = 20,
+      } = req.query
+
+      const filters = {
+        type: type ? (Array.isArray(type) ? type : [type]) : undefined,
+        tags: tags ? (Array.isArray(tags) ? tags : [tags]) : undefined,
+        sourceType: sourceType as 'manual' | 'dataset' | undefined,
+        isActive: isActive === 'true' ? true : isActive === 'false' ? false : undefined,
+        language: language as string | undefined,
+      }
+
+      const result = await TrainingSampleService.getSamplesByUser(
+        userId,
+        filters,
+        parseInt(skip as string) || 0,
+        parseInt(limit as string) || 20
+      )
+
+      res.status(200).json({
+        success: true,
+        data: result.samples,
+        pagination: {
+          skip: parseInt(skip as string) || 0,
+          limit: parseInt(limit as string) || 20,
+          total: result.total,
+        },
+      })
+    } catch (error) {
+      console.error('Error fetching training samples:', error)
+      res.status(500).json({ error: 'Failed to fetch training samples' })
+    }
+  }
+
+  /**
+   * GET /api/v1/training-samples/:id
+   * Get a single training sample by ID
+   */
+  async getSampleById(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      const { id } = req.params
+
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const sample = await TrainingSampleService.getSampleById(id, userId)
+
+      if (!sample) {
+        res.status(404).json({ error: 'Training sample not found' })
+        return
+      }
+
+      res.status(200).json({
+        success: true,
+        data: sample,
+      })
+    } catch (error) {
+      console.error('Error fetching training sample:', error)
+      res.status(500).json({ error: 'Failed to fetch training sample' })
+    }
+  }
+
+  /**
+   * PUT /api/v1/training-samples/:id
+   * Update a training sample
+   */
+  async updateSample(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      const { id } = req.params
+
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      // Validate request body
+      const validatedData = updateTrainingSampleSchema.parse(req.body)
+
+      const updatedSample = await TrainingSampleService.updateSample(id, userId, validatedData)
+
+      if (!updatedSample) {
+        res.status(404).json({ error: 'Training sample not found' })
+        return
+      }
+
+      res.status(200).json({
+        success: true,
+        data: updatedSample,
+        message: 'Training sample updated successfully',
+      })
+    } catch (error: any) {
+      if (error.name === 'ZodError') {
+        res.status(400).json({
+          error: 'Validation error',
+          details: error.errors,
+        })
+      } else {
+        console.error('Error updating training sample:', error)
+        res.status(500).json({ error: 'Failed to update training sample' })
+      }
+    }
+  }
+
+  /**
+   * DELETE /api/v1/training-samples/:id
+   * Delete a training sample (soft delete)
+   */
+  async deleteSample(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      const { id } = req.params
+
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const success = await TrainingSampleService.deleteSample(id, userId)
+
+      if (!success) {
+        res.status(404).json({ error: 'Training sample not found' })
+        return
+      }
+
+      res.status(200).json({
+        success: true,
+        message: 'Training sample deleted successfully',
+      })
+    } catch (error) {
+      console.error('Error deleting training sample:', error)
+      res.status(500).json({ error: 'Failed to delete training sample' })
+    }
+  }
+
+  /**
+   * POST /api/v1/training-samples/search
+   * Vector semantic search
+   */
+  async vectorSearch(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      // Validate request body
+      const validatedData = vectorSearchSchema.parse(req.body)
+
+      const result = await TrainingSampleService.vectorSearch(userId, validatedData)
+
+      res.status(200).json({
+        success: true,
+        data: result.samples,
+        metadata: {
+          totalResults: result.totalResults,
+          searchTime: result.searchTime,
+          query: validatedData.query,
+        },
+      })
+    } catch (error: any) {
+      if (error.name === 'ZodError') {
+        res.status(400).json({
+          error: 'Validation error',
+          details: error.errors,
+        })
+      } else {
+        console.error('Error performing vector search:', error)
+        res.status(500).json({ error: 'Failed to perform vector search' })
+      }
+    }
+  }
+
+  /**
+   * GET /api/v1/training-samples/stats
+   * Get statistics about training samples
+   */
+  async getStatistics(req: Request, res: Response, next: NextFunction): Promise<void> {
+    try {
+      const userId = req.user?.id
+      if (!userId) {
+        res.status(401).json({ error: 'Unauthorized' })
+        return
+      }
+
+      const stats = await TrainingSampleService.getStatistics(userId)
+
+      res.status(200).json({
+        success: true,
+        data: stats,
+      })
+    } catch (error) {
+      console.error('Error fetching statistics:', error)
+      res.status(500).json({ error: 'Failed to fetch statistics' })
+    }
+  }
+}
+
+export default new TrainingSampleController()
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.embedding.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.embedding.ts
new file mode 100644
index 0000000..d1e174c
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.embedding.ts
@@ -0,0 +1,69 @@
+import { GoogleGenerativeAI } from '@google/generative-ai'
+import { IEmbeddingResponse } from './TrainingSample.types'
+
+class EmbeddingUtils {
+  private genAI: GoogleGenerativeAI
+  private embeddingModel = 'models/embedding-001'
+
+  constructor() {
+    const apiKey = process.env.GOOGLE_API_KEY
+    if (!apiKey) {
+      throw new Error('GOOGLE_API_KEY environment variable is required for embeddings')
+    }
+    this.genAI = new GoogleGenerativeAI(apiKey)
+  }
+
+  async generateEmbedding(text: string): Promise<IEmbeddingResponse> {
+    try {
+      const model = this.genAI.getGenerativeModel({ model: this.embeddingModel })
+
+      const result = await model.embedContent(text)
+      const embedding = result.embedding
+
+      if (!embedding || !embedding.values) {
+        throw new Error('Failed to generate embedding - no embedding values returned')
+      }
+
+      return {
+        embedding: embedding.values,
+        modelUsed: this.embeddingModel,
+      }
+    } catch (error) {
+      console.error('Error generating embedding:', error)
+      throw new Error(`Failed to generate embedding: ${(error as Error).message}`)
+    }
+  }
+
+  async generateEmbeddingBatch(texts: string[]): Promise<IEmbeddingResponse[]> {
+    const results = await Promise.all(texts.map(text => this.generateEmbedding(text)))
+    return results
+  }
+
+  // Calculate cosine similarity between two vectors
+  static cosineSimilarity(a: number[], b: number[]): number {
+    if (a.length !== b.length) {
+      throw new Error('Vectors must have the same length')
+    }
+
+    let dotProduct = 0
+    let magnitudeA = 0
+    let magnitudeB = 0
+
+    for (let i = 0; i < a.length; i++) {
+      dotProduct += a[i] * b[i]
+      magnitudeA += a[i] * a[i]
+      magnitudeB += b[i] * b[i]
+    }
+
+    magnitudeA = Math.sqrt(magnitudeA)
+    magnitudeB = Math.sqrt(magnitudeB)
+
+    if (magnitudeA === 0 || magnitudeB === 0) {
+      return 0
+    }
+
+    return dotProduct / (magnitudeA * magnitudeB)
+  }
+}
+
+export default new EmbeddingUtils()
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.model.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.model.ts
new file mode 100644
index 0000000..914361d
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.model.ts
@@ -0,0 +1,133 @@
+import { Schema, model, Types } from 'mongoose'
+import { ITrainingSample, ISection, IAnswerTemplate } from './TrainingSample.types'
+
+// Section schema for structured answers
+const SectionSchema = new Schema<ISection>(
+  {
+    title: {
+      type: String,
+      required: true,
+      trim: true,
+    },
+    content: {
+      type: String,
+      required: true,
+      trim: true,
+    },
+  },
+  { _id: false }
+)
+
+// Answer template schema
+const AnswerTemplateSchema = new Schema<IAnswerTemplate>(
+  {
+    greeting: {
+      type: String,
+      trim: true,
+    },
+    answer: {
+      type: String,
+      required: true,
+      trim: true,
+    },
+    sections: {
+      type: [SectionSchema],
+      default: [],
+    },
+    suggestions: {
+      type: [String],
+      default: [],
+    },
+  },
+  { _id: false }
+)
+
+// Main Training Sample schema
+const TrainingSampleSchema = new Schema<ITrainingSample>(
+  {
+    userId: {
+      type: Types.ObjectId,
+      ref: 'User',
+      required: true,
+      index: true,
+    },
+    question: {
+      type: String,
+      required: true,
+      trim: true,
+      index: true,
+    },
+    type: {
+      type: String,
+      enum: ['qa', 'snippet', 'doc', 'faq', 'other'],
+      default: 'qa',
+      index: true,
+    },
+    answerTemplate: {
+      type: AnswerTemplateSchema,
+      required: true,
+    },
+    codeSnippet: {
+      type: String,
+      default: null,
+    },
+    // Vector embedding for semantic search
+    embedding: {
+      type: [Number],
+      required: true,
+      // Note: For MongoDB Atlas Vector Search, add vector index in MongoDB
+      // db.createIndex({ "embedding": "vector" })
+    },
+    filePath: {
+      type: String,
+      default: null,
+    },
+    fileMimeType: {
+      type: String,
+      default: null,
+    },
+    fileSizeInBytes: {
+      type: Number,
+      default: null,
+    },
+    sourceType: {
+      type: String,
+      enum: ['manual', 'dataset'],
+      default: 'manual',
+      index: true,
+    },
+    datasetId: {
+      type: Types.ObjectId,
+      ref: 'TrainingDataset',
+      default: null,
+    },
+    tags: {
+      type: [String],
+      default: [],
+      index: true,
+    },
+    language: {
+      type: String,
+      default: 'en',
+      trim: true,
+      index: true,
+    },
+    isActive: {
+      type: Boolean,
+      default: true,
+      index: true,
+    },
+  },
+  {
+    timestamps: true,
+    versionKey: false,
+  }
+)
+
+// Compound indices for common queries
+TrainingSampleSchema.index({ userId: 1, isActive: 1 })
+TrainingSampleSchema.index({ userId: 1, type: 1 })
+TrainingSampleSchema.index({ userId: 1, sourceType: 1 })
+TrainingSampleSchema.index({ tags: 1, userId: 1 })
+
+export const TrainingSample = model<ITrainingSample>('TrainingSample', TrainingSampleSchema)
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.routes.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.routes.ts
new file mode 100644
index 0000000..4ee3e18
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.routes.ts
@@ -0,0 +1,79 @@
+import { Router } from 'express'
+import TrainingSampleController from './TrainingSample.controller'
+
+const router = Router()
+
+/**
+ * Training Sample Routes
+ * All routes require authentication
+ */
+
+/**
+ * POST /api/v1/training-samples
+ * Create a new training sample
+ * Body: {
+ *   question: string,
+ *   answerTemplate: { answer, format, structure },
+ *   type: 'qa' | 'snippet' | 'doc' | 'faq' | 'other',
+ *   sourceType: 'manual' | 'dataset',
+ *   tags: string[],
+ *   language: string
+ * }
+ */
+router.post('/', TrainingSampleController.createTrainingSample.bind(TrainingSampleController))
+
+/**
+ * GET /api/v1/training-samples
+ * Get all training samples with filtering
+ * Query params:
+ *   - type: string[] (comma-separated)
+ *   - tags: string[] (comma-separated)
+ *   - sourceType: 'manual' | 'dataset'
+ *   - isActive: boolean
+ *   - language: string
+ *   - skip: number (default: 0)
+ *   - limit: number (default: 20)
+ */
+router.get('/', TrainingSampleController.getSamples.bind(TrainingSampleController))
+
+/**
+ * GET /api/v1/training-samples/stats
+ * Get statistics about training samples
+ */
+router.get('/stats', TrainingSampleController.getStatistics.bind(TrainingSampleController))
+
+/**
+ * GET /api/v1/training-samples/:id
+ * Get a single training sample
+ */
+router.get('/:id', TrainingSampleController.getSampleById.bind(TrainingSampleController))
+
+/**
+ * PUT /api/v1/training-samples/:id
+ * Update a training sample
+ */
+router.put('/:id', TrainingSampleController.updateSample.bind(TrainingSampleController))
+
+/**
+ * DELETE /api/v1/training-samples/:id
+ * Delete a training sample (soft delete)
+ */
+router.delete('/:id', TrainingSampleController.deleteSample.bind(TrainingSampleController))
+
+/**
+ * POST /api/v1/training-samples/search
+ * Vector semantic search
+ * Body: {
+ *   query: string,
+ *   topK: number (default: 5),
+ *   filters: {
+ *     type?: string[],
+ *     tags?: string[],
+ *     sourceType?: 'manual' | 'dataset',
+ *     language?: string
+ *   }
+ * }
+ */
+router.post('/search', TrainingSampleController.vectorSearch.bind(TrainingSampleController))
+
+export default router
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.service.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.service.ts
new file mode 100644
index 0000000..19a66ca
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.service.ts
@@ -0,0 +1,229 @@
+import { Types } from 'mongoose'
+import { TrainingSample } from './TrainingSample.model'
+import { ITrainingSample, IVectorSearchRequest, IVectorSearchResult } from './TrainingSample.types'
+import EmbeddingUtils from './TrainingSample.embedding'
+
+class TrainingSampleService {
+  /**
+   * Create a new training sample with embedding generation
+   */
+  async createSample(userId: string, data: any): Promise<ITrainingSample> {
+    try {
+      // Generate embedding from question and answer
+      const embeddingText = `${data.question} ${data.answerTemplate.answer}`
+      const { embedding } = await EmbeddingUtils.generateEmbedding(embeddingText)
+
+      const sample = await TrainingSample.create({
+        userId: new Types.ObjectId(userId),
+        ...data,
+        embedding,
+      })
+
+      return sample
+    } catch (error) {
+      console.error('Error creating training sample:', error)
+      throw error
+    }
+  }
+
+  /**
+   * Get all training samples for a user with filters
+   */
+  async getSamplesByUser(
+    userId: string,
+    filters: {
+      type?: string[]
+      tags?: string[]
+      sourceType?: 'manual' | 'dataset'
+      isActive?: boolean
+      language?: string
+    } = {},
+    skip: number = 0,
+    limit: number = 20
+  ): Promise<{ samples: ITrainingSample[]; total: number }> {
+    const query: any = {
+      userId: new Types.ObjectId(userId),
+    }
+
+    if (filters.type && filters.type.length > 0) {
+      query.type = { $in: filters.type }
+    }
+    if (filters.tags && filters.tags.length > 0) {
+      query.tags = { $in: filters.tags }
+    }
+    if (filters.sourceType) {
+      query.sourceType = filters.sourceType
+    }
+    if (filters.isActive !== undefined) {
+      query.isActive = filters.isActive
+    }
+    if (filters.language) {
+      query.language = filters.language
+    }
+
+    const [samples, total] = await Promise.all([
+      TrainingSample.find(query).skip(skip).limit(limit).exec(),
+      TrainingSample.countDocuments(query),
+    ])
+
+    return { samples, total }
+  }
+
+  /**
+   * Get a single training sample by ID
+   */
+  async getSampleById(sampleId: string, userId: string): Promise<ITrainingSample | null> {
+    return await TrainingSample.findOne({
+      _id: new Types.ObjectId(sampleId),
+      userId: new Types.ObjectId(userId),
+    })
+  }
+
+  /**
+   * Update a training sample and regenerate embedding if content changed
+   */
+  async updateSample(sampleId: string, userId: string, data: any): Promise<ITrainingSample | null> {
+    const sample = await this.getSampleById(sampleId, userId)
+
+    if (!sample) {
+      throw new Error('Training sample not found')
+    }
+
+    // Regenerate embedding if question or answer changed
+    let embedding = sample.embedding
+    if (data.question || data.answerTemplate) {
+      const question = data.question || sample.question
+      const answer = data.answerTemplate?.answer || sample.answerTemplate.answer
+      const embeddingText = `${question} ${answer}`
+      const result = await EmbeddingUtils.generateEmbedding(embeddingText)
+      embedding = result.embedding
+    }
+
+    const updated = await TrainingSample.findByIdAndUpdate(
+      sampleId,
+      {
+        ...data,
+        embedding,
+      },
+      { new: true }
+    )
+
+    return updated
+  }
+
+  /**
+   * Soft delete a training sample
+   */
+  async deleteSample(sampleId: string, userId: string): Promise<boolean> {
+    const result = await TrainingSample.findOneAndUpdate(
+      {
+        _id: new Types.ObjectId(sampleId),
+        userId: new Types.ObjectId(userId),
+      },
+      { isActive: false },
+      { new: true }
+    )
+
+    return !!result
+  }
+
+  /**
+   * Vector semantic search with cosine similarity
+   */
+  async vectorSearch(userId: string, searchRequest: IVectorSearchRequest): Promise<IVectorSearchResult> {
+    const startTime = Date.now()
+
+    try {
+      // Generate embedding for the query
+      const { embedding: queryEmbedding } = await EmbeddingUtils.generateEmbedding(searchRequest.query)
+
+      // Build filter query
+      const filterQuery: any = {
+        userId: new Types.ObjectId(userId),
+        isActive: true,
+      }
+
+      if (searchRequest.filters) {
+        if (searchRequest.filters.type && searchRequest.filters.type.length > 0) {
+          filterQuery.type = { $in: searchRequest.filters.type }
+        }
+        if (searchRequest.filters.tags && searchRequest.filters.tags.length > 0) {
+          filterQuery.tags = { $in: searchRequest.filters.tags }
+        }
+        if (searchRequest.filters.sourceType) {
+          filterQuery.sourceType = searchRequest.filters.sourceType
+        }
+        if (searchRequest.filters.language) {
+          filterQuery.language = searchRequest.filters.language
+        }
+      }
+
+      // Get all matching samples (note: for large datasets, consider MongoDB Atlas Vector Search)
+      const samples = await TrainingSample.find(filterQuery).exec()
+
+      // Calculate similarity scores
+      const scoredSamples = samples
+        .map(sample => ({
+          sample,
+          score: EmbeddingUtils.cosineSimilarity(queryEmbedding, sample.embedding),
+        }))
+        .sort((a, b) => b.score - a.score)
+        .slice(0, searchRequest.topK || 5)
+
+      const searchTime = Date.now() - startTime
+
+      return {
+        samples: scoredSamples.map(s => s.sample),
+        totalResults: scoredSamples.length,
+        searchTime,
+      }
+    } catch (error) {
+      console.error('Error performing vector search:', error)
+      throw error
+    }
+  }
+
+  /**
+   * Get statistics for training samples
+   */
+  async getStatistics(userId: string): Promise<{
+    total: number
+    active: number
+    byType: Record<string, number>
+    byLanguage: Record<string, number>
+  }> {
+    const userIdObj = new Types.ObjectId(userId)
+
+    const [total, active, byType, byLanguage] = await Promise.all([
+      TrainingSample.countDocuments({ userId: userIdObj }),
+      TrainingSample.countDocuments({ userId: userIdObj, isActive: true }),
+      TrainingSample.aggregate([
+        { $match: { userId: userIdObj } },
+        { $group: { _id: '$type', count: { $sum: 1 } } },
+      ]),
+      TrainingSample.aggregate([
+        { $match: { userId: userIdObj } },
+        { $group: { _id: '$language', count: { $sum: 1 } } },
+      ]),
+    ])
+
+    const byTypeObj: Record<string, number> = {}
+    byType.forEach((item: any) => {
+      byTypeObj[item._id] = item.count
+    })
+
+    const byLanguageObj: Record<string, number> = {}
+    byLanguage.forEach((item: any) => {
+      byLanguageObj[item._id] = item.count
+    })
+
+    return {
+      total,
+      active,
+      byType: byTypeObj,
+      byLanguage: byLanguageObj,
+    }
+  }
+}
+
+export default new TrainingSampleService()
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.types.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.types.ts
new file mode 100644
index 0000000..b419999
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.types.ts
@@ -0,0 +1,75 @@
+import { Document, Types } from 'mongoose'
+
+export interface ISection {
+  title: string
+  content: string
+}
+
+export interface IAnswerTemplate {
+  greeting?: string
+  answer: string
+  sections: ISection[]
+  suggestions: string[]
+}
+
+export interface IAgent {
+  provider: string
+  model: string
+  isPaid: boolean
+}
+
+export interface ITrainingSample extends Document {
+  userId: Types.ObjectId
+  question: string
+  type: 'qa' | 'snippet' | 'doc' | 'faq' | 'other'
+  answerTemplate: IAnswerTemplate
+  codeSnippet?: string
+  embedding: number[]
+  filePath?: string
+  fileMimeType?: string
+  fileSizeInBytes?: number
+  sourceType: 'manual' | 'dataset'
+  datasetId?: Types.ObjectId
+  tags: string[]
+  language: string
+  isActive: boolean
+  createdAt: Date
+  updatedAt: Date
+}
+
+export interface ITrainingDataset extends Document {
+  userId: Types.ObjectId
+  fileName: string
+  filePath: string
+  fileMimeType: string
+  fileSizeInBytes: number
+  totalSamples: number
+  processedSamples: number
+  status: 'pending' | 'processing' | 'completed' | 'failed'
+  errorMessage?: string
+  createdAt: Date
+  updatedAt: Date
+}
+
+export interface IEmbeddingResponse {
+  embedding: number[]
+  modelUsed: string
+}
+
+export interface IVectorSearchRequest {
+  query: string
+  topK?: number
+  filters?: {
+    type?: string[]
+    tags?: string[]
+    sourceType?: 'manual' | 'dataset'
+    isActive?: boolean
+    language?: string
+  }
+}
+
+export interface IVectorSearchResult {
+  samples: ITrainingSample[]
+  totalResults: number
+  searchTime: number
+}
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.utils.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.utils.ts
new file mode 100644
index 0000000..dd9a66f
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.utils.ts
@@ -0,0 +1,246 @@
+import * as fs from 'fs'
+import * as path from 'path'
+import * as readline from 'readline'
+
+export interface ParsedSample {
+  question: string
+  answerTemplate: {
+    answer: string
+    format?: string
+    structure?: string[]
+  }
+  type: 'qa' | 'snippet' | 'doc' | 'faq' | 'other'
+  tags?: string[]
+  language?: string
+  sourceType: 'dataset'
+}
+
+/**
+ * Utility class for parsing training data files
+ */
+export class TrainingSampleFileParser {
+  /**
+   * Parse CSV file for training samples
+   * Expected format: question,answer,type,tags,language
+   */
+  static async parseCSV(filePath: string): Promise<ParsedSample[]> {
+    const samples: ParsedSample[] = []
+
+    return new Promise((resolve, reject) => {
+      const fileStream = fs.createReadStream(filePath)
+      const rl = readline.createInterface({
+        input: fileStream,
+        crlfDelay: Infinity,
+      })
+
+      let isFirstLine = true
+
+      rl.on('line', (line: string) => {
+        // Skip header row
+        if (isFirstLine) {
+          isFirstLine = false
+          return
+        }
+
+        try {
+          const [question, answer, type, tags, language] = line.split(',').map(s => s.trim())
+
+          if (!question || !answer) {
+            console.warn('Skipping invalid CSV row:', line)
+            return
+          }
+
+          const sample: ParsedSample = {
+            question,
+            answerTemplate: {
+              answer,
+              format: 'text',
+            },
+            type: (type || 'qa') as any,
+            tags: tags ? tags.split(';').map(t => t.trim()) : [],
+            language: language || 'en',
+            sourceType: 'dataset',
+          }
+
+          samples.push(sample)
+        } catch (error) {
+          console.warn('Error parsing CSV line:', line, error)
+        }
+      })
+
+      rl.on('close', () => {
+        resolve(samples)
+      })
+
+      rl.on('error', reject)
+    })
+  }
+
+  /**
+   * Parse JSON file for training samples
+   * Expected format: Array of objects with question, answer, type, tags, language
+   */
+  static async parseJSON(filePath: string): Promise<ParsedSample[]> {
+    const content = fs.readFileSync(filePath, 'utf-8')
+    const data = JSON.parse(content)
+
+    if (!Array.isArray(data)) {
+      throw new Error('JSON file must contain an array of samples')
+    }
+
+    return data.map((item: any) => {
+      if (!item.question || !item.answerTemplate?.answer) {
+        throw new Error('Each sample must have question and answerTemplate.answer')
+      }
+
+      return {
+        question: item.question,
+        answerTemplate: {
+          answer: item.answerTemplate.answer,
+          format: item.answerTemplate.format || 'text',
+          structure: item.answerTemplate.structure || [],
+        },
+        type: item.type || 'qa',
+        tags: item.tags || [],
+        language: item.language || 'en',
+        sourceType: 'dataset',
+      } as ParsedSample
+    })
+  }
+
+  /**
+   * Parse Markdown file for training samples
+   * Format: ## Question\nAnswer text\nTags: tag1, tag2\n---\n
+   */
+  static async parseMarkdown(filePath: string): Promise<ParsedSample[]> {
+    const content = fs.readFileSync(filePath, 'utf-8')
+    const sections = content.split('---').map(s => s.trim()).filter(s => s)
+
+    return sections.map((section, index) => {
+      const lines = section.split('\n').filter(l => l.trim())
+
+      if (lines.length < 2) {
+        throw new Error(`Invalid markdown section ${index}: must have at least question and answer`)
+      }
+
+      // Extract question (first ## header)
+      const questionMatch = lines[0].match(/^#+\s+(.+)$/)
+      const question = questionMatch ? questionMatch[1] : lines[0]
+
+      // Extract answer (everything except question and metadata)
+      let answer = ''
+      let tags: string[] = []
+      let language = 'en'
+
+      for (let i = 1; i < lines.length; i++) {
+        const line = lines[i]
+        if (line.startsWith('Tags:')) {
+          tags = line
+            .replace('Tags:', '')
+            .split(',')
+            .map(t => t.trim())
+        } else if (line.startsWith('Language:')) {
+          language = line.replace('Language:', '').trim()
+        } else if (!line.startsWith('---')) {
+          answer += line + '\n'
+        }
+      }
+
+      return {
+        question,
+        answerTemplate: {
+          answer: answer.trim(),
+          format: 'markdown',
+        },
+        type: 'doc',
+        tags,
+        language,
+        sourceType: 'dataset',
+      } as ParsedSample
+    })
+  }
+
+  /**
+   * Parse Text file for training samples
+   * Simple format: pairs of questions and answers separated by newlines
+   */
+  static async parseText(filePath: string): Promise<ParsedSample[]> {
+    const content = fs.readFileSync(filePath, 'utf-8')
+    const lines = content.split('\n').filter(l => l.trim())
+
+    const samples: ParsedSample[] = []
+
+    for (let i = 0; i < lines.length; i += 2) {
+      if (i + 1 < lines.length) {
+        samples.push({
+          question: lines[i],
+          answerTemplate: {
+            answer: lines[i + 1],
+            format: 'text',
+          },
+          type: 'qa',
+          tags: [],
+          language: 'en',
+          sourceType: 'dataset',
+        })
+      }
+    }
+
+    return samples
+  }
+
+  /**
+   * Auto-detect and parse file based on extension
+   */
+  static async parseFile(filePath: string): Promise<ParsedSample[]> {
+    const ext = path.extname(filePath).toLowerCase()
+
+    switch (ext) {
+      case '.csv':
+        return this.parseCSV(filePath)
+      case '.json':
+        return this.parseJSON(filePath)
+      case '.md':
+      case '.markdown':
+        return this.parseMarkdown(filePath)
+      case '.txt':
+        return this.parseText(filePath)
+      default:
+        throw new Error(`Unsupported file format: ${ext}`)
+    }
+  }
+
+  /**
+   * Validate parsed samples
+   */
+  static validateSamples(samples: ParsedSample[]): { valid: ParsedSample[]; errors: string[] } {
+    const valid: ParsedSample[] = []
+    const errors: string[] = []
+
+    samples.forEach((sample, index) => {
+      const errs: string[] = []
+
+      if (!sample.question || sample.question.trim().length < 5) {
+        errs.push(`Question too short (min 5 chars)`)
+      }
+
+      if (!sample.answerTemplate.answer || sample.answerTemplate.answer.trim().length < 5) {
+        errs.push(`Answer too short (min 5 chars)`)
+      }
+
+      if (!['qa', 'snippet', 'doc', 'faq', 'other'].includes(sample.type)) {
+        errs.push(`Invalid type: ${sample.type}`)
+      }
+
+      if (errs.length === 0) {
+        valid.push(sample)
+      } else {
+        errors.push(`Sample ${index}: ${errs.join('; ')}`)
+      }
+    })
+
+    return { valid, errors }
+  }
+}
+
+export default TrainingSampleFileParser
diff --git a/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.validator.ts b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.validator.ts
new file mode 100644
index 0000000..b0dd51d
--- /dev/null
+++ b/LocalMind-Backend/src/api/v1/TrainingSample/TrainingSample.validator.ts
@@ -0,0 +1,36 @@
+import { z } from 'zod'
+
+const SectionSchema = z.object({
+  title: z.string().min(1, 'Section title is required').max(200, 'Title too long'),
+  content: z.string().min(1, 'Section content is required'),
+})
+
+const AnswerTemplateSchema = z.object({
+  greeting: z.string().max(500, 'Greeting too long').optional(),
+  answer: z.string().min(10, 'Answer must be at least 10 characters').max(5000, 'Answer too long'),
+  sections: z.array(SectionSchema).default([]),
+  suggestions: z.array(z.string().min(1).max(200)).default([]),
+})
+
+export const createTrainingSampleSchema = z.object({
+  question: z.string().min(5, 'Question must be at least 5 characters').max(1000),
+  type: z.enum(['qa', 'snippet', 'doc', 'faq', 'other']).default('qa'),
+  answerTemplate: AnswerTemplateSchema,
+  codeSnippet: z.string().max(10000).optional(),
+  tags: z.array(z.string().min(1).max(50)).default([]),
+  language: z.string().default('en').max(10),
+})
+
+export const updateTrainingSampleSchema = createTrainingSampleSchema.partial()
+
+export const vectorSearchSchema = z.object({
+  query: z.string().min(1, 'Query is required'),
+  topK: z.number().int().min(1).max(100).default(5),
+  filters: z.object({
+    type: z.array(z.string()).optional(),
+    tags: z.array(z.string()).optional(),
+    sourceType: z.enum(['manual', 'dataset']).optional(),
+    isActive: z.boolean().optional(),
+    language: z.string().optional(),
+  }).optional(),
+})