From 758f0303709768251aad900396a17c590d6f0cdf Mon Sep 17 00:00:00 2001
From: Kiro Agent <244629292+kiro-agent@users.noreply.github.com>
Date: Wed, 8 Apr 2026 09:22:55 +0000
Subject: [PATCH 1/6] feat: extend domain model, GraphQL schema, and add AI
 exercise generation

- Add 'AI_TRAINING' to TrainingMode union type in Training.ts
- Add AIExercise interface with prompt, options, correctOptionIndex,
  exerciseType, and sourceWord fields
- Add optional aiExercises field to TrainingExecution interface
- Add AI_TRAINING to TrainingMode enum in GraphQL schema
- Add AIExercise type to GraphQL schema
- Add aiExercises field to TrainingExecution GraphQL type
- Add generateExercises method to AIService with rate limiting,
  Bedrock integration, and response validation
- Extract parseAndValidateExercises as a separate testable method

Co-authored-by: Johannes Koch <github@lockhead.net>
---
 backend/src/gql-schemas/schema.graphql |  10 ++
 backend/src/model/domain/Training.ts   |  11 +-
 backend/src/services/ai-service.ts     | 171 +++++++++++++++++++++++++
 3 files changed, 191 insertions(+), 1 deletion(-)

diff --git a/backend/src/gql-schemas/schema.graphql b/backend/src/gql-schemas/schema.graphql
index c7377a4..01e6684 100644
--- a/backend/src/gql-schemas/schema.graphql
+++ b/backend/src/gql-schemas/schema.graphql
@@ -27,6 +27,7 @@ enum PaymentProvider @aws_cognito_user_pools {
 enum TrainingMode @aws_cognito_user_pools {
   TEXT_INPUT
   MULTIPLE_CHOICE
+  AI_TRAINING
 }
 
 enum TrainingDirection @aws_cognito_user_pools {
@@ -106,6 +107,14 @@ type MultipleChoiceOption @aws_cognito_user_pools {
   options: [String!]!
 }
 
+type AIExercise @aws_cognito_user_pools {
+  prompt: String!
+  options: [String!]!
+  correctOptionIndex: Int!
+  exerciseType: String!
+  sourceWord: String!
+}
+
 type TrainingExecution @aws_cognito_user_pools {
   id: ID!
   trainingId: ID!
@@ -116,6 +125,7 @@ type TrainingExecution @aws_cognito_user_pools {
   results: [TrainingResult!]!
   multipleChoiceOptions: [MultipleChoiceOption!]
   words: [TrainingWord!]
+  aiExercises: [AIExercise!]
   correctCount: Int!
   incorrectCount: Int!
 }
diff --git a/backend/src/model/domain/Training.ts b/backend/src/model/domain/Training.ts
index aef8481..47228a7 100644
--- a/backend/src/model/domain/Training.ts
+++ b/backend/src/model/domain/Training.ts
@@ -2,7 +2,7 @@
  * Domain models for Training entity
  */
 
-export type TrainingMode = 'TEXT_INPUT' | 'MULTIPLE_CHOICE';
+export type TrainingMode = 'TEXT_INPUT' | 'MULTIPLE_CHOICE' | 'AI_TRAINING';
 export type TrainingDirection = 'WORD_TO_TRANSLATION' | 'TRANSLATION_TO_WORD';
 
 export interface TrainingWord {
@@ -41,6 +41,14 @@ export interface MultipleChoiceOption {
   correctOptionIndex: number;
 }
 
+export interface AIExercise {
+  prompt: string;
+  options: string[];
+  correctOptionIndex: number;
+  exerciseType: string;
+  sourceWord: string;
+}
+
 export interface TrainingExecution {
   id: string;
   trainingId: string;
@@ -51,6 +59,7 @@ export interface TrainingExecution {
   results: TrainingResult[];
   multipleChoiceOptions?: MultipleChoiceOption[];
   words?: TrainingWord[];
+  aiExercises?: AIExercise[];
   correctCount: number;
   incorrectCount: number;
 }
diff --git a/backend/src/services/ai-service.ts b/backend/src/services/ai-service.ts
index e71b646..72e0f7e 100644
--- a/backend/src/services/ai-service.ts
+++ b/backend/src/services/ai-service.ts
@@ -1,4 +1,5 @@
 import { BedrockRuntimeClient, InvokeModelCommand } from '@aws-sdk/client-bedrock-runtime';
+import type { AIExercise } from '../model/domain/Training';
 
 /**
  * Rate limiting tracker for AI service
@@ -147,6 +148,176 @@ export class AIService {
     return fallbackResponse.completion || fallbackResponse.text || '';
   }
 
+  /**
+   * Parse and validate exercises from AI response text
+   */
+  parseAndValidateExercises(responseText: string): AIExercise[] {
+    // Strip markdown code fences if present
+    const stripped = responseText
+      .replace(/^```(?:json)?\s*/i, '')
+      .replace(/\s*```\s*$/, '')
+      .trim();
+
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(stripped);
+    } catch {
+      throw new Error('Failed to parse exercises response as JSON');
+    }
+
+    if (!Array.isArray(parsed)) {
+      throw new Error('Expected JSON array of exercises');
+    }
+
+    const validExercises: AIExercise[] = [];
+
+    for (const exercise of parsed) {
+      const isValid =
+        exercise &&
+        typeof exercise.prompt === 'string' &&
+        exercise.prompt.length > 0 &&
+        Array.isArray(exercise.options) &&
+        exercise.options.length >= 3 &&
+        exercise.options.length <= 5 &&
+        typeof exercise.correctOptionIndex === 'number' &&
+        exercise.correctOptionIndex >= 0 &&
+        exercise.correctOptionIndex < exercise.options.length &&
+        typeof exercise.exerciseType === 'string' &&
+        exercise.exerciseType.length > 0 &&
+        typeof exercise.sourceWord === 'string' &&
+        exercise.sourceWord.length > 0;
+
+      if (isValid) {
+        validExercises.push({
+          prompt: exercise.prompt,
+          options: exercise.options,
+          correctOptionIndex: exercise.correctOptionIndex,
+          exerciseType: exercise.exerciseType,
+          sourceWord: exercise.sourceWord,
+        });
+      } else {
+        console.warn('Invalid exercise filtered out:', JSON.stringify(exercise));
+      }
+    }
+
+    return validExercises;
+  }
+
+  /**
+   * Generate AI exercises for vocabulary words
+   */
+  async generateExercises(
+    words: {
+      word: string;
+      translation?: string;
+      definition?: string;
+      partOfSpeech?: string;
+      exampleSentence?: string;
+    }[],
+    sourceLanguage: string,
+    targetLanguage: string,
+    userId: string,
+  ): Promise<AIExercise[]> {
+    // Check rate limit
+    if (!this.checkRateLimit(userId)) {
+      throw new Error('Rate limit exceeded. Please wait before making more AI requests.');
+    }
+
+    if (!words || words.length === 0) {
+      throw new Error('Words array cannot be empty');
+    }
+
+    try {
+      const wordDescriptions = words
+        .map((w, i) => {
+          const parts = [`${i + 1}. word: "${w.word}"`];
+          if (w.translation) parts.push(`translation: "${w.translation}"`);
+          if (w.definition) parts.push(`definition: "${w.definition}"`);
+          if (w.partOfSpeech) parts.push(`partOfSpeech: "${w.partOfSpeech}"`);
+          if (w.exampleSentence) parts.push(`exampleSentence: "${w.exampleSentence}"`);
+          return parts.join(', ');
+        })
+        .join('\n');
+
+      const prompt = `Generate vocabulary exercises for language learners studying ${sourceLanguage} to ${targetLanguage}.
+
+Here are the words to create exercises for:
+${wordDescriptions}
+
+Create a JSON array of exercises with varied types including: verb_conjugation, preposition, fill_in_the_blank, sentence_completion.
+
+Each exercise must have:
+- "prompt": a question or instruction for the learner
+- "options": an array of 3 to 5 answer choices
+- "correctOptionIndex": the zero-based index of the correct answer in the options array
+- "exerciseType": one of verb_conjugation, preposition, fill_in_the_blank, sentence_completion
+- "sourceWord": the vocabulary word this exercise is based on
+
+Return ONLY a valid JSON array, no markdown, no code blocks, no extra text.`;
+
+      const requestBody = this.buildRequestBody(prompt, 2000);
+
+      const response = await this.bedrockClient.send(
+        new InvokeModelCommand({
+          modelId: this.modelId,
+          contentType: 'application/json',
+          accept: 'application/json',
+          body: JSON.stringify(requestBody),
+        }),
+      );
+
+      const responseBody = JSON.parse(new TextDecoder().decode(response.body));
+      const responseText = this.extractResponseText(responseBody);
+
+      if (!responseText) {
+        throw new Error('No content returned from Bedrock');
+      }
+
+      const validExercises = this.parseAndValidateExercises(responseText);
+
+      if (validExercises.length === 0) {
+        throw new Error('No valid exercises could be generated');
+      }
+
+      // Log usage
+      const tokenEstimate = responseText.length / 4;
+      this.logUsage(userId, 'generateExercises', tokenEstimate);
+
+      return validExercises;
+    } catch (error) {
+      console.error('Error generating AI exercises with Bedrock:', error);
+
+      if (error instanceof Error) {
+        if (error.message.includes('Rate limit')) {
+          throw error;
+        }
+        if (error.message.includes('No valid exercises')) {
+          throw error;
+        }
+        if (error.message.includes('Failed to parse') || error.message.includes('Expected JSON array')) {
+          throw error;
+        }
+        if (error.message.includes('throttling')) {
+          throw new Error('Bedrock service is currently throttling requests. Please try again later.');
+        }
+        if (error.message.includes('is not authorized to perform') || error.message.includes('AccessDeniedException')) {
+          throw new Error(
+            'Bedrock model access is not enabled. Please enable model access in the AWS Bedrock console.',
+          );
+        }
+        if (
+          error.message.includes('Could not resolve the foundation model') ||
+          error.message.includes('ValidationException') ||
+          error.name === 'ValidationException'
+        ) {
+          throw new Error(`Bedrock model '${this.modelId}' is not available in this region. Please contact support.`);
+        }
+      }
+
+      throw new Error(`Failed to generate AI exercises: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
+
   /**
    * Enhance content using AI
    */

From 9b4a148876e7f6c3dc9e15bf9be0455d8627aeeb Mon Sep 17 00:00:00 2001
From: Kiro Agent <244629292+kiro-agent@users.noreply.github.com>
Date: Wed, 8 Apr 2026 09:26:41 +0000
Subject: [PATCH 2/6] feat: add AI_TRAINING mode support in TrainingService for
 create, start, and answer submission

Co-authored-by: Johannes Koch <github@lockhead.net>
---
 backend/src/services/training-service.ts | 163 +++++++++++++++++++++++
 1 file changed, 163 insertions(+)

diff --git a/backend/src/services/training-service.ts b/backend/src/services/training-service.ts
index 66a2c22..5436dc2 100644
--- a/backend/src/services/training-service.ts
+++ b/backend/src/services/training-service.ts
@@ -10,6 +10,7 @@ import type {
   TrainingResult,
   MultipleChoiceOption,
 } from '../model/domain/Training';
+import { getAIService } from './ai-service';
 
 /**
  * Service for managing vocabulary trainings
@@ -276,6 +277,67 @@ export class TrainingService {
         // Select up to randomizedWordCount words
         const selectedWords = collectedWords.slice(0, training.randomizedWordCount ?? 10);
 
+        if (training.mode === 'AI_TRAINING') {
+          if (selectedWords.length < 1) {
+            return { success: false, error: 'No words available from the selected vocabulary lists' };
+          }
+
+          // Fetch vocabulary lists for full word details and language info
+          const vocabRepoAI = VocabularyListRepository.getInstance();
+          let sourceLanguage = 'English';
+          let targetLanguage = 'English';
+          const enrichedWords: {
+            word: string;
+            translation?: string;
+            definition?: string;
+            partOfSpeech?: string;
+            exampleSentence?: string;
+          }[] = [];
+
+          for (const listId of training.vocabularyListIds) {
+            const list = await vocabRepoAI.getById(listId);
+            if (!list) continue;
+            if (list.sourceLanguage) sourceLanguage = list.sourceLanguage;
+            if (list.targetLanguage) targetLanguage = list.targetLanguage;
+
+            for (const selectedWord of selectedWords) {
+              if (selectedWord.vocabularyListId === listId) {
+                const fullWord = list.words.find((w) => w.word === selectedWord.word);
+                enrichedWords.push({
+                  word: selectedWord.word,
+                  translation: selectedWord.translation,
+                  definition: fullWord?.definition,
+                  partOfSpeech: fullWord?.partOfSpeech,
+                  exampleSentence: fullWord?.exampleSentence,
+                });
+              }
+            }
+          }
+
+          try {
+            const aiService = getAIService();
+            const aiExercises = await aiService.generateExercises(enrichedWords, sourceLanguage, targetLanguage, userId);
+
+            const execution: TrainingExecution = {
+              id: crypto.randomUUID(),
+              trainingId,
+              userId,
+              startedAt: new Date().toISOString(),
+              results: [],
+              words: selectedWords,
+              aiExercises,
+              correctCount: 0,
+              incorrectCount: 0,
+            };
+
+            await trainingRepo.createExecution(execution);
+            return { success: true, execution };
+          } catch (aiError) {
+            const errorMessage = aiError instanceof Error ? aiError.message : 'Unknown error';
+            return { success: false, error: 'Failed to generate AI exercises: ' + errorMessage };
+          }
+        }
+
         if (training.mode === 'MULTIPLE_CHOICE' && selectedWords.length < 3) {
           return { success: false, error: 'Multiple-choice requires at least 3 words' };
         }
@@ -303,6 +365,66 @@ export class TrainingService {
       }
 
       // Static path: existing behavior unchanged
+      if (training.mode === 'AI_TRAINING') {
+        if (training.words.length < 1) {
+          return { success: false, error: 'No words available from the selected vocabulary lists' };
+        }
+
+        // Fetch vocabulary lists for full word details and language info
+        const vocabRepoAI = VocabularyListRepository.getInstance();
+        let sourceLanguage = 'English';
+        let targetLanguage = 'English';
+        const enrichedWords: {
+          word: string;
+          translation?: string;
+          definition?: string;
+          partOfSpeech?: string;
+          exampleSentence?: string;
+        }[] = [];
+
+        for (const listId of training.vocabularyListIds) {
+          const list = await vocabRepoAI.getById(listId);
+          if (!list) continue;
+          if (list.sourceLanguage) sourceLanguage = list.sourceLanguage;
+          if (list.targetLanguage) targetLanguage = list.targetLanguage;
+
+          for (const trainingWord of training.words) {
+            if (trainingWord.vocabularyListId === listId) {
+              const fullWord = list.words.find((w) => w.word === trainingWord.word);
+              enrichedWords.push({
+                word: trainingWord.word,
+                translation: trainingWord.translation,
+                definition: fullWord?.definition,
+                partOfSpeech: fullWord?.partOfSpeech,
+                exampleSentence: fullWord?.exampleSentence,
+              });
+            }
+          }
+        }
+
+        try {
+          const aiService = getAIService();
+          const aiExercises = await aiService.generateExercises(enrichedWords, sourceLanguage, targetLanguage, userId);
+
+          const execution: TrainingExecution = {
+            id: crypto.randomUUID(),
+            trainingId,
+            userId,
+            startedAt: new Date().toISOString(),
+            results: [],
+            aiExercises,
+            correctCount: 0,
+            incorrectCount: 0,
+          };
+
+          await trainingRepo.createExecution(execution);
+          return { success: true, execution };
+        } catch (aiError) {
+          const errorMessage = aiError instanceof Error ? aiError.message : 'Unknown error';
+          return { success: false, error: 'Failed to generate AI exercises: ' + errorMessage };
+        }
+      }
+
       if (training.mode === 'MULTIPLE_CHOICE' && training.words.length < 3) {
         return { success: false, error: 'Multiple-choice requires at least 3 words' };
       }
@@ -377,6 +499,47 @@ export class TrainingService {
         return { success: false, error: 'Training not found' };
       }
 
+      // AI_TRAINING answer submission path
+      if (training.mode === 'AI_TRAINING') {
+        const aiExercises = execution.aiExercises;
+        if (!aiExercises || wordIndex < 0 || wordIndex >= aiExercises.length) {
+          return { success: false, error: 'Invalid word index' };
+        }
+
+        const exercise = aiExercises[wordIndex];
+        const selectedIndex = parseInt(answer, 10);
+        const correct = selectedIndex === exercise.correctOptionIndex;
+
+        const result: TrainingResult = {
+          wordIndex,
+          word: exercise.prompt,
+          expectedAnswer: exercise.options[exercise.correctOptionIndex],
+          userAnswer: answer,
+          correct,
+        };
+
+        execution.results.push(result);
+        if (correct) {
+          execution.correctCount++;
+        } else {
+          execution.incorrectCount++;
+        }
+
+        // Completion check: all AI exercises answered
+        if (execution.results.length === aiExercises.length) {
+          execution.completedAt = new Date().toISOString();
+        }
+
+        await trainingRepo.updateExecution(executionId, {
+          results: execution.results,
+          correctCount: execution.correctCount,
+          incorrectCount: execution.incorrectCount,
+          completedAt: execution.completedAt,
+        });
+
+        return { success: true, result, completed: !!execution.completedAt, execution };
+      }
+
       // Dual-path word resolution: randomized uses execution.words, static uses training.words
       const wordList = training.isRandomized ? execution.words! : training.words;
       const word = wordList[wordIndex];

From 7ac9053a7bfacdd031525a099dc0941c54ccff96 Mon Sep 17 00:00:00 2001
From: Kiro Agent <244629292+kiro-agent@users.noreply.github.com>
Date: Wed, 8 Apr 2026 09:32:36 +0000
Subject: [PATCH 3/6] feat: add AI service unit tests and property-based tests

- ai-service.test.ts: unit tests for parseAndValidateExercises (valid, invalid,
  markdown fences, invalid JSON, all-invalid) and generateExercises (Bedrock
  failure, rate limit, empty words, logging, invalid JSON, all-invalid)
- ai-service.property.test.ts: property tests for prompt construction (all words
  and languages included), parsing validation (only valid exercises returned),
  and round-trip serialization (JSON.stringify/parse equivalence)

Co-authored-by: Johannes Koch <github@lockhead.net>
---
 backend/test/ai-service.property.test.ts | 219 +++++++++++++++++
 backend/test/ai-service.test.ts          | 298 +++++++++++++++++++++++
 2 files changed, 517 insertions(+)
 create mode 100644 backend/test/ai-service.property.test.ts
 create mode 100644 backend/test/ai-service.test.ts

diff --git a/backend/test/ai-service.property.test.ts b/backend/test/ai-service.property.test.ts
new file mode 100644
index 0000000..4d79def
--- /dev/null
+++ b/backend/test/ai-service.property.test.ts
@@ -0,0 +1,219 @@
+import { describe, test, expect, beforeEach } from 'vitest';
+import * as fc from 'fast-check';
+import { AIService } from '../src/services/ai-service';
+import { mockClient } from 'aws-sdk-client-mock';
+import { BedrockRuntimeClient, InvokeModelCommand } from '@aws-sdk/client-bedrock-runtime';
+import type { AIExercise } from '../src/model/domain/Training';
+
+/**
+ * Property-Based Tests for AI Service
+ */
+
+const bedrockMock = mockClient(BedrockRuntimeClient);
+
+// Shared arbitraries for valid exercises
+const validExerciseArb: fc.Arbitrary<AIExercise> = fc
+  .record({
+    prompt: fc.string({ minLength: 1, maxLength: 100 }),
+    options: fc.array(fc.string({ minLength: 1, maxLength: 30 }), { minLength: 3, maxLength: 5 }),
+    exerciseType: fc.constantFrom('verb_conjugation', 'preposition', 'fill_in_the_blank', 'sentence_completion'),
+    sourceWord: fc.string({ minLength: 1, maxLength: 30 }),
+  })
+  .map((rec) => ({
+    ...rec,
+    correctOptionIndex: 0, // always valid index
+  }));
+
+// Arbitrary for invalid exercises (missing or malformed fields)
+const invalidExerciseArb = fc.oneof(
+  // Missing prompt
+  fc.record({
+    options: fc.array(fc.string({ minLength: 1, maxLength: 20 }), { minLength: 3, maxLength: 5 }),
+    correctOptionIndex: fc.constant(0),
+    exerciseType: fc.constant('fill_in_the_blank'),
+    sourceWord: fc.string({ minLength: 1, maxLength: 20 }),
+  }),
+  // Too few options
+  fc.record({
+    prompt: fc.string({ minLength: 1, maxLength: 50 }),
+    options: fc.array(fc.string({ minLength: 1, maxLength: 20 }), { minLength: 1, maxLength: 2 }),
+    correctOptionIndex: fc.constant(0),
+    exerciseType: fc.constant('fill_in_the_blank'),
+    sourceWord: fc.string({ minLength: 1, maxLength: 20 }),
+  }),
+  // Empty sourceWord
+  fc.record({
+    prompt: fc.string({ minLength: 1, maxLength: 50 }),
+    options: fc.array(fc.string({ minLength: 1, maxLength: 20 }), { minLength: 3, maxLength: 5 }),
+    correctOptionIndex: fc.constant(0),
+    exerciseType: fc.constant('fill_in_the_blank'),
+    sourceWord: fc.constant(''),
+  }),
+  // correctOptionIndex out of bounds
+  fc.record({
+    prompt: fc.string({ minLength: 1, maxLength: 50 }),
+    options: fc.array(fc.string({ minLength: 1, maxLength: 20 }), { minLength: 3, maxLength: 3 }),
+    correctOptionIndex: fc.constant(99),
+    exerciseType: fc.constant('fill_in_the_blank'),
+    sourceWord: fc.string({ minLength: 1, maxLength: 20 }),
+  }),
+);
+
+/**
+ * Helper: create Titan-format Bedrock response
+ */
+function createBedrockResponse(text: string) {
+  return {
+    body: new TextEncoder().encode(JSON.stringify({ results: [{ outputText: text }] })),
+  };
+}
+
+describe('AI Service Property Tests', () => {
+  beforeEach(() => {
+    bedrockMock.reset();
+  });
+
+  /**
+   * Property 2: Prompt construction
+   * For random words and languages, the prompt sent to Bedrock includes all word fields and both language names.
+   */
+  test('Property 2: Prompt construction includes all words and languages', { timeout: 60000 }, async () => {
+    await fc.assert(
+      fc.asyncProperty(
+        fc.array(
+          fc.record({
+            word: fc.string({ minLength: 1, maxLength: 20 }).filter((s) => !s.includes('"')),
+            translation: fc.string({ minLength: 1, maxLength: 20 }).filter((s) => !s.includes('"')),
+          }),
+          { minLength: 1, maxLength: 5 },
+        ),
+        fc.string({ minLength: 1, maxLength: 20 }).filter((s) => s.trim().length > 0),
+        fc.string({ minLength: 1, maxLength: 20 }).filter((s) => s.trim().length > 0),
+        async (words, sourceLanguage, targetLanguage) => {
+          bedrockMock.reset();
+
+          let capturedBody: string | undefined;
+
+          bedrockMock.on(InvokeModelCommand).callsFake((input) => {
+            capturedBody = typeof input.body === 'string' ? input.body : new TextDecoder().decode(input.body);
+
+            // Return a valid exercises response
+            const exercises = words.map((w) => ({
+              prompt: `What is the translation of ${w.word}?`,
+              options: ['opt_a', 'opt_b', 'opt_c'],
+              correctOptionIndex: 0,
+              exerciseType: 'fill_in_the_blank',
+              sourceWord: w.word,
+            }));
+
+            return createBedrockResponse(JSON.stringify(exercises));
+          });
+
+          const service = AIService.getInstance();
+          const userId = `user-prop2-${Date.now()}-${Math.random()}`;
+
+          await service.generateExercises(words, sourceLanguage, targetLanguage, userId);
+
+          expect(capturedBody).toBeDefined();
+
+          // The body contains the prompt within the request body structure
+          // For Titan format: { inputText: prompt, ... }
+          const parsedBody = JSON.parse(capturedBody!);
+          const prompt: string = parsedBody.inputText;
+
+          // Verify prompt contains both language names
+          expect(prompt).toContain(sourceLanguage);
+          expect(prompt).toContain(targetLanguage);
+
+          // Verify prompt contains all word fields
+          for (const w of words) {
+            expect(prompt).toContain(w.word);
+            expect(prompt).toContain(w.translation);
+          }
+        },
+      ),
+      { numRuns: 100 },
+    );
+  });
+
+  /**
+   * Property 3: Parsing validation
+   * For any array mixing valid and invalid exercises, parseAndValidateExercises returns only valid ones.
+   */
+  test(
+    'Property 3: Parsing returns only valid exercises from mixed input',
+    { timeout: 60000 },
+    async () => {
+      await fc.assert(
+        fc.property(
+          fc.array(validExerciseArb, { minLength: 0, maxLength: 5 }),
+          fc.array(invalidExerciseArb, { minLength: 0, maxLength: 5 }),
+          (validExercises, invalidExercises) => {
+            const mixed = [...validExercises, ...invalidExercises];
+            // Shuffle the array deterministically
+            const shuffled = mixed.sort((a, b) => JSON.stringify(a).localeCompare(JSON.stringify(b)));
+
+            const service = AIService.getInstance();
+            const result = service.parseAndValidateExercises(JSON.stringify(shuffled));
+
+            // All returned exercises must be valid
+            for (const exercise of result) {
+              expect(typeof exercise.prompt).toBe('string');
+              expect(exercise.prompt.length).toBeGreaterThan(0);
+              expect(Array.isArray(exercise.options)).toBe(true);
+              expect(exercise.options.length).toBeGreaterThanOrEqual(3);
+              expect(exercise.options.length).toBeLessThanOrEqual(5);
+              expect(typeof exercise.correctOptionIndex).toBe('number');
+              expect(exercise.correctOptionIndex).toBeGreaterThanOrEqual(0);
+              expect(exercise.correctOptionIndex).toBeLessThan(exercise.options.length);
+              expect(typeof exercise.exerciseType).toBe('string');
+              expect(exercise.exerciseType.length).toBeGreaterThan(0);
+              expect(typeof exercise.sourceWord).toBe('string');
+              expect(exercise.sourceWord.length).toBeGreaterThan(0);
+            }
+
+            // Result count should be <= total input count
+            expect(result.length).toBeLessThanOrEqual(shuffled.length);
+
+            // Result count should be >= valid exercise count (all valid ones should pass)
+            // Note: some "invalid" exercises might accidentally be valid, so we just check
+            // that valid exercises pass through
+            for (const valid of validExercises) {
+              const found = result.some(
+                (r) =>
+                  r.prompt === valid.prompt &&
+                  r.sourceWord === valid.sourceWord &&
+                  r.exerciseType === valid.exerciseType,
+              );
+              expect(found).toBe(true);
+            }
+          },
+        ),
+        { numRuns: 100 },
+      );
+    },
+  );
+
+  /**
+   * Property 4: Round-trip serialization
+   * For any valid AIExercise, JSON.stringify then JSON.parse is equivalent.
+   */
+  test('Property 4: Round-trip serialization preserves exercises', { timeout: 60000 }, async () => {
+    await fc.assert(
+      fc.property(validExerciseArb, (exercise) => {
+        const serialized = JSON.stringify(exercise);
+        const deserialized = JSON.parse(serialized) as AIExercise;
+
+        expect(deserialized.prompt).toBe(exercise.prompt);
+        expect(deserialized.options).toEqual(exercise.options);
+        expect(deserialized.correctOptionIndex).toBe(exercise.correctOptionIndex);
+        expect(deserialized.exerciseType).toBe(exercise.exerciseType);
+        expect(deserialized.sourceWord).toBe(exercise.sourceWord);
+
+        // Deep equality
+        expect(deserialized).toEqual(exercise);
+      }),
+      { numRuns: 100 },
+    );
+  });
+});
diff --git a/backend/test/ai-service.test.ts b/backend/test/ai-service.test.ts
new file mode 100644
index 0000000..abd9818
--- /dev/null
+++ b/backend/test/ai-service.test.ts
@@ -0,0 +1,298 @@
+import { describe, test, expect, beforeEach, vi } from 'vitest';
+import { AIService } from '../src/services/ai-service';
+import { mockClient } from 'aws-sdk-client-mock';
+import { BedrockRuntimeClient, InvokeModelCommand } from '@aws-sdk/client-bedrock-runtime';
+
+/**
+ * Unit Tests for AI Service
+ */
+
+const bedrockMock = mockClient(BedrockRuntimeClient);
+
+/**
+ * Helper: create Titan-format Bedrock response
+ */
+function createBedrockResponse(text: string) {
+  return {
+    body: new TextEncoder().encode(JSON.stringify({ results: [{ outputText: text }] })),
+  };
+}
+
+/**
+ * Helper: create a valid exercise JSON string
+ */
+function validExercisesJson(count: number = 2): string {
+  const exercises = [];
+  for (let i = 0; i < count; i++) {
+    exercises.push({
+      prompt: `What is the translation of word${i}?`,
+      options: [`option_a_${i}`, `option_b_${i}`, `option_c_${i}`],
+      correctOptionIndex: 0,
+      exerciseType: 'fill_in_the_blank',
+      sourceWord: `word${i}`,
+    });
+  }
+  return JSON.stringify(exercises);
+}
+
+describe('AI Service Unit Tests', () => {
+  beforeEach(() => {
+    bedrockMock.reset();
+  });
+
+  describe('parseAndValidateExercises', () => {
+    test('valid exercises returned correctly', () => {
+      const service = AIService.getInstance();
+
+      const json = JSON.stringify([
+        {
+          prompt: 'What is the translation?',
+          options: ['hola', 'adios', 'gracias'],
+          correctOptionIndex: 0,
+          exerciseType: 'fill_in_the_blank',
+          sourceWord: 'hello',
+        },
+        {
+          prompt: 'Choose the correct verb form',
+          options: ['corro', 'corres', 'corre', 'corremos'],
+          correctOptionIndex: 2,
+          exerciseType: 'verb_conjugation',
+          sourceWord: 'run',
+        },
+      ]);
+
+      const result = service.parseAndValidateExercises(json);
+
+      expect(result).toHaveLength(2);
+      expect(result[0].prompt).toBe('What is the translation?');
+      expect(result[0].options).toEqual(['hola', 'adios', 'gracias']);
+      expect(result[0].correctOptionIndex).toBe(0);
+      expect(result[0].exerciseType).toBe('fill_in_the_blank');
+      expect(result[0].sourceWord).toBe('hello');
+      expect(result[1].prompt).toBe('Choose the correct verb form');
+      expect(result[1].exerciseType).toBe('verb_conjugation');
+    });
+
+    test('invalid exercises filtered out', () => {
+      const service = AIService.getInstance();
+
+      const json = JSON.stringify([
+        // Valid exercise
+        {
+          prompt: 'What is the translation?',
+          options: ['hola', 'adios', 'gracias'],
+          correctOptionIndex: 0,
+          exerciseType: 'fill_in_the_blank',
+          sourceWord: 'hello',
+        },
+        // Invalid: missing prompt
+        {
+          options: ['a', 'b', 'c'],
+          correctOptionIndex: 0,
+          exerciseType: 'fill_in_the_blank',
+          sourceWord: 'test',
+        },
+        // Invalid: correctOptionIndex out of bounds
+        {
+          prompt: 'Question?',
+          options: ['a', 'b', 'c'],
+          correctOptionIndex: 5,
+          exerciseType: 'fill_in_the_blank',
+          sourceWord: 'test',
+        },
+        // Valid exercise
+        {
+          prompt: 'Another question',
+          options: ['x', 'y', 'z'],
+          correctOptionIndex: 1,
+          exerciseType: 'preposition',
+          sourceWord: 'word2',
+        },
+      ]);
+
+      const result = service.parseAndValidateExercises(json);
+
+      expect(result).toHaveLength(2);
+      expect(result[0].sourceWord).toBe('hello');
+      expect(result[1].sourceWord).toBe('word2');
+    });
+
+    test('handles markdown fences', () => {
+      const service = AIService.getInstance();
+
+      const json = `\`\`\`json
+[
+  {
+    "prompt": "Translate hello",
+    "options": ["hola", "adios", "gracias"],
+    "correctOptionIndex": 0,
+    "exerciseType": "fill_in_the_blank",
+    "sourceWord": "hello"
+  }
+]
+\`\`\``;
+
+      const result = service.parseAndValidateExercises(json);
+
+      expect(result).toHaveLength(1);
+      expect(result[0].sourceWord).toBe('hello');
+    });
+
+    test('throws on invalid JSON', () => {
+      const service = AIService.getInstance();
+
+      expect(() => service.parseAndValidateExercises('not valid json {')).toThrow(
+        'Failed to parse exercises response as JSON',
+      );
+    });
+
+    test('returns empty for all-invalid', () => {
+      const service = AIService.getInstance();
+
+      const json = JSON.stringify([
+        // Missing prompt
+        {
+          options: ['a', 'b', 'c'],
+          correctOptionIndex: 0,
+          exerciseType: 'fill_in_the_blank',
+          sourceWord: 'test',
+        },
+        // Too few options (only 2)
+        {
+          prompt: 'Question?',
+          options: ['a', 'b'],
+          correctOptionIndex: 0,
+          exerciseType: 'fill_in_the_blank',
+          sourceWord: 'test',
+        },
+        // Empty sourceWord
+        {
+          prompt: 'Question?',
+          options: ['a', 'b', 'c'],
+          correctOptionIndex: 0,
+          exerciseType: 'fill_in_the_blank',
+          sourceWord: '',
+        },
+      ]);
+
+      const result = service.parseAndValidateExercises(json);
+
+      expect(result).toHaveLength(0);
+    });
+  });
+
+  describe('generateExercises', () => {
+    test('Bedrock failure returns error', async () => {
+      bedrockMock.on(InvokeModelCommand).rejects(new Error('Bedrock service unavailable'));
+
+      const service = AIService.getInstance();
+
+      await expect(
+        service.generateExercises(
+          [{ word: 'hello', translation: 'hola' }],
+          'English',
+          'Spanish',
+          'user-bedrock-fail',
+        ),
+      ).rejects.toThrow('Failed to generate AI exercises');
+    });
+
+    test('rate limit error when exceeded', async () => {
+      const userId = `user-rate-limit-${Date.now()}`;
+
+      bedrockMock.on(InvokeModelCommand).resolves(createBedrockResponse(validExercisesJson()));
+
+      const service = AIService.getInstance();
+      const words = [{ word: 'hello', translation: 'hola' }];
+
+      // Make 10 successful calls (rate limit is 10 per window)
+      for (let i = 0; i < 10; i++) {
+        await service.generateExercises(words, 'English', 'Spanish', userId);
+      }
+
+      // 11th call should fail with rate limit
+      await expect(service.generateExercises(words, 'English', 'Spanish', userId)).rejects.toThrow(
+        'Rate limit exceeded',
+      );
+    });
+
+    test('throws when words empty', async () => {
+      const service = AIService.getInstance();
+
+      await expect(service.generateExercises([], 'English', 'Spanish', 'user-empty-words')).rejects.toThrow(
+        'Words array cannot be empty',
+      );
+    });
+
+    test('logs usage', async () => {
+      const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
+
+      bedrockMock.on(InvokeModelCommand).resolves(createBedrockResponse(validExercisesJson()));
+
+      const service = AIService.getInstance();
+      const userId = `user-logs-${Date.now()}`;
+
+      await service.generateExercises([{ word: 'hello', translation: 'hola' }], 'English', 'Spanish', userId);
+
+      // Find the usage log call
+      const logCalls = consoleSpy.mock.calls;
+      const usageLog = logCalls.find((call) => {
+        try {
+          const parsed = JSON.parse(call[0] as string);
+          return parsed.operation === 'generateExercises' && parsed.userId === userId;
+        } catch {
+          return false;
+        }
+      });
+
+      expect(usageLog).toBeDefined();
+      const parsedLog = JSON.parse(usageLog![0] as string);
+      expect(parsedLog.userId).toBe(userId);
+      expect(parsedLog.operation).toBe('generateExercises');
+      expect(parsedLog.tokenCount).toBeGreaterThan(0);
+      expect(parsedLog.timestamp).toBeDefined();
+
+      consoleSpy.mockRestore();
+    });
+
+    test('invalid JSON from Bedrock', async () => {
+      bedrockMock.on(InvokeModelCommand).resolves(createBedrockResponse('this is not json at all'));
+
+      const service = AIService.getInstance();
+      const userId = `user-invalid-json-${Date.now()}`;
+
+      await expect(
+        service.generateExercises([{ word: 'hello', translation: 'hola' }], 'English', 'Spanish', userId),
+      ).rejects.toThrow('Failed to parse exercises response as JSON');
+    });
+
+    test('all exercises invalid returns error', async () => {
+      const invalidExercises = JSON.stringify([
+        // Missing prompt
+        {
+          options: ['a', 'b', 'c'],
+          correctOptionIndex: 0,
+          exerciseType: 'fill_in_the_blank',
+          sourceWord: 'test',
+        },
+        // Too few options
+        {
+          prompt: 'Question?',
+          options: ['a', 'b'],
+          correctOptionIndex: 0,
+          exerciseType: 'fill_in_the_blank',
+          sourceWord: 'test',
+        },
+      ]);
+
+      bedrockMock.on(InvokeModelCommand).resolves(createBedrockResponse(invalidExercises));
+
+      const service = AIService.getInstance();
+      const userId = `user-all-invalid-${Date.now()}`;
+
+      await expect(
+        service.generateExercises([{ word: 'hello', translation: 'hola' }], 'English', 'Spanish', userId),
+      ).rejects.toThrow('No valid exercises could be generated');
+    });
+  });
+});

From 0e7a2c481b5b11034344595df08cca3e125d7aab Mon Sep 17 00:00:00 2001
From: Kiro Agent <244629292+kiro-agent@users.noreply.github.com>
Date: Wed, 8 Apr 2026 09:35:22 +0000
Subject: [PATCH 4/6] feat: add AI training service unit tests for start,
 submit, and completion

Co-authored-by: Johannes Koch <github@lockhead.net>
---
 backend/test/ai-training-service.test.ts | 475 +++++++++++++++++++++++
 1 file changed, 475 insertions(+)
 create mode 100644 backend/test/ai-training-service.test.ts

diff --git a/backend/test/ai-training-service.test.ts b/backend/test/ai-training-service.test.ts
new file mode 100644
index 0000000..e85d2dc
--- /dev/null
+++ b/backend/test/ai-training-service.test.ts
@@ -0,0 +1,475 @@
+import { describe, test, expect, beforeEach } from 'vitest';
+import { TrainingService } from '../src/services/training-service';
+import { mockClient } from 'aws-sdk-client-mock';
+import { DynamoDBDocumentClient, GetCommand, PutCommand, UpdateCommand } from '@aws-sdk/lib-dynamodb';
+import { BedrockRuntimeClient, InvokeModelCommand } from '@aws-sdk/client-bedrock-runtime';
+import type { Training, TrainingExecution } from '../src/model/domain/Training';
+
+const ddbMock = mockClient(DynamoDBDocumentClient);
+const bedrockMock = mockClient(BedrockRuntimeClient);
+
+const VOCAB_TABLE = 'train-with-joe-vocabulary-lists-sandbox';
+const TRAINING_TABLE = 'train-with-joe-trainings-sandbox';
+
+/**
+ * Helper: create a valid Bedrock response containing AI exercises for the given words
+ */
+function createExercisesResponse(words: { word: string }[]) {
+  const exercises = words.map((w, i) => ({
+    prompt: `Fill in the blank: ___ is the word for ${w.word}`,
+    options: [`option_a_${i}`, `option_b_${i}`, `option_c_${i}`],
+    correctOptionIndex: 0,
+    exerciseType: 'fill_in_the_blank',
+    sourceWord: w.word,
+  }));
+  return {
+    body: new TextEncoder().encode(
+      JSON.stringify({
+        results: [{ outputText: JSON.stringify(exercises) }],
+      }),
+    ),
+  };
+}
+
+describe('AI Training Service Unit Tests', () => {
+  beforeEach(() => {
+    ddbMock.reset();
+    bedrockMock.reset();
+  });
+
+  describe('startTraining - AI_TRAINING static path', () => {
+    test('should return error when starting AI training with 0 words', async () => {
+      const userId = 'user-123';
+      const trainingId = 'training#ai-empty';
+
+      const training: Training = {
+        id: trainingId,
+        userId,
+        name: 'Empty AI Training',
+        mode: 'AI_TRAINING',
+        direction: 'WORD_TO_TRANSLATION',
+        vocabularyListIds: ['list-1'],
+        words: [],
+        createdAt: '2024-01-01T00:00:00.000Z',
+        updatedAt: '2024-01-01T00:00:00.000Z',
+      };
+
+      ddbMock.on(GetCommand).callsFake((input) => {
+        if (input.TableName === TRAINING_TABLE && input.Key.id === trainingId) {
+          return { Item: { ...training } };
+        }
+        return { Item: undefined };
+      });
+
+      const service = TrainingService.getInstance();
+      const result = await service.startTraining(trainingId, userId);
+
+      expect(result.success).toBe(false);
+      expect(result.error).toContain('No words available');
+    });
+
+    test('should succeed with valid words and return aiExercises', async () => {
+      const userId = 'user-123';
+      const trainingId = 'training#ai-valid';
+      const vocabListId = 'list-1';
+
+      const training: Training = {
+        id: trainingId,
+        userId,
+        name: 'AI Training',
+        mode: 'AI_TRAINING',
+        direction: 'WORD_TO_TRANSLATION',
+        vocabularyListIds: [vocabListId],
+        words: [
+          { word: 'hello', translation: 'hola', vocabularyListId: vocabListId },
+          { word: 'goodbye', translation: 'adios', vocabularyListId: vocabListId },
+        ],
+        createdAt: '2024-01-01T00:00:00.000Z',
+        updatedAt: '2024-01-01T00:00:00.000Z',
+      };
+
+      ddbMock.on(GetCommand).callsFake((input) => {
+        if (input.TableName === TRAINING_TABLE && input.Key.id === trainingId) {
+          return { Item: { ...training } };
+        }
+        if (input.TableName === VOCAB_TABLE && input.Key.id === vocabListId) {
+          return {
+            Item: {
+              id: vocabListId,
+              userId,
+              sourceLanguage: 'English',
+              targetLanguage: 'Spanish',
+              words: [
+                { word: 'hello', translation: 'hola', definition: 'a greeting' },
+                { word: 'goodbye', translation: 'adios', definition: 'a farewell' },
+              ],
+              createdAt: '2024-01-01T00:00:00.000Z',
+              updatedAt: '2024-01-01T00:00:00.000Z',
+            },
+          };
+        }
+        return { Item: undefined };
+      });
+
+      bedrockMock.on(InvokeModelCommand).resolves(
+        createExercisesResponse([{ word: 'hello' }, { word: 'goodbye' }]),
+      );
+
+      ddbMock.on(PutCommand).resolves({});
+
+      const service = TrainingService.getInstance();
+      const result = await service.startTraining(trainingId, userId);
+
+      expect(result.success).toBe(true);
+      expect(result.execution).toBeDefined();
+      expect(result.execution!.aiExercises).toBeDefined();
+      expect(result.execution!.aiExercises!.length).toBe(2);
+      expect(result.execution!.multipleChoiceOptions).toBeUndefined();
+    });
+  });
+
+  describe('TEXT_INPUT training backward compatibility', () => {
+    test('should not invoke Bedrock for TEXT_INPUT training', async () => {
+      const userId = 'user-123';
+      const trainingId = 'training#text-input';
+
+      const training: Training = {
+        id: trainingId,
+        userId,
+        name: 'Text Input Training',
+        mode: 'TEXT_INPUT',
+        direction: 'WORD_TO_TRANSLATION',
+        vocabularyListIds: ['list-1'],
+        words: [
+          { word: 'cat', translation: 'gato', vocabularyListId: 'list-1' },
+          { word: 'dog', translation: 'perro', vocabularyListId: 'list-1' },
+          { word: 'bird', translation: 'pajaro', vocabularyListId: 'list-1' },
+        ],
+        createdAt: '2024-01-01T00:00:00.000Z',
+        updatedAt: '2024-01-01T00:00:00.000Z',
+      };
+
+      ddbMock.on(GetCommand).callsFake((input) => {
+        if (input.TableName === TRAINING_TABLE && input.Key.id === trainingId) {
+          return { Item: { ...training } };
+        }
+        return { Item: undefined };
+      });
+
+      ddbMock.on(PutCommand).resolves({});
+
+      const service = TrainingService.getInstance();
+      const result = await service.startTraining(trainingId, userId);
+
+      expect(result.success).toBe(true);
+      expect(result.execution).toBeDefined();
+      expect(result.execution!.aiExercises).toBeUndefined();
+      expect(bedrockMock.commandCalls(InvokeModelCommand).length).toBe(0);
+    });
+  });
+
+  describe('AI answer submission', () => {
+    test('should mark answer correct when selected index matches correctOptionIndex', async () => {
+      const userId = 'user-123';
+      const executionId = 'execution#ai-1';
+      const trainingId = 'training#ai-1';
+
+      const execution: TrainingExecution = {
+        id: executionId,
+        trainingId,
+        userId,
+        startedAt: '2024-01-01T10:00:00.000Z',
+        results: [],
+        aiExercises: [
+          {
+            prompt: 'Fill in the blank: ___ is the word for hello',
+            options: ['hola', 'adios', 'gato'],
+            correctOptionIndex: 0,
+            exerciseType: 'fill_in_the_blank',
+            sourceWord: 'hello',
+          },
+          {
+            prompt: 'Fill in the blank: ___ is the word for goodbye',
+            options: ['adios', 'hola', 'perro'],
+            correctOptionIndex: 0,
+            exerciseType: 'fill_in_the_blank',
+            sourceWord: 'goodbye',
+          },
+        ],
+        correctCount: 0,
+        incorrectCount: 0,
+      };
+
+      const training: Training = {
+        id: trainingId,
+        userId,
+        name: 'AI Training',
+        mode: 'AI_TRAINING',
+        direction: 'WORD_TO_TRANSLATION',
+        vocabularyListIds: ['list-1'],
+        words: [
+          { word: 'hello', translation: 'hola', vocabularyListId: 'list-1' },
+          { word: 'goodbye', translation: 'adios', vocabularyListId: 'list-1' },
+        ],
+        createdAt: '2024-01-01T00:00:00.000Z',
+        updatedAt: '2024-01-01T00:00:00.000Z',
+      };
+
+      ddbMock.on(GetCommand).callsFake((input) => {
+        if (input.Key.id === executionId) {
+          return { Item: { ...execution } };
+        }
+        if (input.Key.id === trainingId) {
+          return { Item: { ...training } };
+        }
+        return { Item: undefined };
+      });
+
+      ddbMock.on(UpdateCommand).resolves({
+        Attributes: {
+          ...execution,
+          results: [
+            {
+              wordIndex: 0,
+              word: 'Fill in the blank: ___ is the word for hello',
+              expectedAnswer: 'hola',
+              userAnswer: '0',
+              correct: true,
+            },
+          ],
+          correctCount: 1,
+        },
+      });
+
+      const service = TrainingService.getInstance();
+      const result = await service.submitAnswer(executionId, userId, 0, '0');
+
+      expect(result.success).toBe(true);
+      expect(result.result).toBeDefined();
+      expect(result.result!.correct).toBe(true);
+    });
+
+    test('should mark answer incorrect when selected index does not match correctOptionIndex', async () => {
+      const userId = 'user-123';
+      const executionId = 'execution#ai-2';
+      const trainingId = 'training#ai-2';
+
+      const execution: TrainingExecution = {
+        id: executionId,
+        trainingId,
+        userId,
+        startedAt: '2024-01-01T10:00:00.000Z',
+        results: [],
+        aiExercises: [
+          {
+            prompt: 'Fill in the blank: ___ is the word for hello',
+            options: ['hola', 'adios', 'gato'],
+            correctOptionIndex: 0,
+            exerciseType: 'fill_in_the_blank',
+            sourceWord: 'hello',
+          },
+        ],
+        correctCount: 0,
+        incorrectCount: 0,
+      };
+
+      const training: Training = {
+        id: trainingId,
+        userId,
+        name: 'AI Training',
+        mode: 'AI_TRAINING',
+        direction: 'WORD_TO_TRANSLATION',
+        vocabularyListIds: ['list-1'],
+        words: [{ word: 'hello', translation: 'hola', vocabularyListId: 'list-1' }],
+        createdAt: '2024-01-01T00:00:00.000Z',
+        updatedAt: '2024-01-01T00:00:00.000Z',
+      };
+
+      ddbMock.on(GetCommand).callsFake((input) => {
+        if (input.Key.id === executionId) {
+          return { Item: { ...execution } };
+        }
+        if (input.Key.id === trainingId) {
+          return { Item: { ...training } };
+        }
+        return { Item: undefined };
+      });
+
+      ddbMock.on(UpdateCommand).resolves({
+        Attributes: {
+          ...execution,
+          results: [
+            {
+              wordIndex: 0,
+              word: 'Fill in the blank: ___ is the word for hello',
+              expectedAnswer: 'hola',
+              userAnswer: '2',
+              correct: false,
+            },
+          ],
+          incorrectCount: 1,
+        },
+      });
+
+      const service = TrainingService.getInstance();
+      const result = await service.submitAnswer(executionId, userId, 0, '2');
+
+      expect(result.success).toBe(true);
+      expect(result.result).toBeDefined();
+      expect(result.result!.correct).toBe(false);
+    });
+  });
+
+  describe('AI training completion', () => {
+    test('should mark training completed after all exercises are answered', async () => {
+      const userId = 'user-123';
+      const executionId = 'execution#ai-complete';
+      const trainingId = 'training#ai-complete';
+
+      const execution: TrainingExecution = {
+        id: executionId,
+        trainingId,
+        userId,
+        startedAt: '2024-01-01T10:00:00.000Z',
+        results: [
+          {
+            wordIndex: 0,
+            word: 'Fill in the blank: ___ is the word for hello',
+            expectedAnswer: 'hola',
+            userAnswer: '0',
+            correct: true,
+          },
+        ],
+        aiExercises: [
+          {
+            prompt: 'Fill in the blank: ___ is the word for hello',
+            options: ['hola', 'adios', 'gato'],
+            correctOptionIndex: 0,
+            exerciseType: 'fill_in_the_blank',
+            sourceWord: 'hello',
+          },
+          {
+            prompt: 'Fill in the blank: ___ is the word for goodbye',
+            options: ['adios', 'hola', 'perro'],
+            correctOptionIndex: 0,
+            exerciseType: 'fill_in_the_blank',
+            sourceWord: 'goodbye',
+          },
+        ],
+        correctCount: 1,
+        incorrectCount: 0,
+      };
+
+      const training: Training = {
+        id: trainingId,
+        userId,
+        name: 'AI Training',
+        mode: 'AI_TRAINING',
+        direction: 'WORD_TO_TRANSLATION',
+        vocabularyListIds: ['list-1'],
+        words: [
+          { word: 'hello', translation: 'hola', vocabularyListId: 'list-1' },
+          { word: 'goodbye', translation: 'adios', vocabularyListId: 'list-1' },
+        ],
+        createdAt: '2024-01-01T00:00:00.000Z',
+        updatedAt: '2024-01-01T00:00:00.000Z',
+      };
+
+      ddbMock.on(GetCommand).callsFake((input) => {
+        if (input.Key.id === executionId) {
+          return { Item: { ...execution } };
+        }
+        if (input.Key.id === trainingId) {
+          return { Item: { ...training } };
+        }
+        return { Item: undefined };
+      });
+
+      ddbMock.on(UpdateCommand).resolves({
+        Attributes: {
+          ...execution,
+          results: [
+            ...execution.results,
+            {
+              wordIndex: 1,
+              word: 'Fill in the blank: ___ is the word for goodbye',
+              expectedAnswer: 'adios',
+              userAnswer: '0',
+              correct: true,
+            },
+          ],
+          correctCount: 2,
+          completedAt: '2024-01-01T10:05:00.000Z',
+        },
+      });
+
+      const service = TrainingService.getInstance();
+      const result = await service.submitAnswer(executionId, userId, 1, '0');
+
+      expect(result.success).toBe(true);
+      expect(result.completed).toBe(true);
+    });
+  });
+
+  describe('startTraining - AI_TRAINING randomized path', () => {
+    test('should succeed with randomized AI training and return aiExercises with words', async () => {
+      const userId = 'user-123';
+      const trainingId = 'training#ai-rand';
+      const vocabListId = 'list-rand-1';
+
+      const training: Training = {
+        id: trainingId,
+        userId,
+        name: 'Randomized AI Training',
+        mode: 'AI_TRAINING',
+        direction: 'WORD_TO_TRANSLATION',
+        vocabularyListIds: [vocabListId],
+        words: [],
+        isRandomized: true,
+        randomizedWordCount: 10,
+        createdAt: '2024-01-01T00:00:00.000Z',
+        updatedAt: '2024-01-01T00:00:00.000Z',
+      };
+
+      ddbMock.on(GetCommand).callsFake((input) => {
+        if (input.TableName === TRAINING_TABLE && input.Key.id === trainingId) {
+          return { Item: { ...training } };
+        }
+        if (input.TableName === VOCAB_TABLE && input.Key.id === vocabListId) {
+          return {
+            Item: {
+              id: vocabListId,
+              userId,
+              sourceLanguage: 'English',
+              targetLanguage: 'French',
+              words: [
+                { word: 'house', translation: 'maison', definition: 'a building' },
+                { word: 'car', translation: 'voiture', definition: 'a vehicle' },
+                { word: 'tree', translation: 'arbre', definition: 'a plant' },
+              ],
+              createdAt: '2024-01-01T00:00:00.000Z',
+              updatedAt: '2024-01-01T00:00:00.000Z',
+            },
+          };
+        }
+        return { Item: undefined };
+      });
+
+      bedrockMock.on(InvokeModelCommand).resolves(
+        createExercisesResponse([{ word: 'house' }, { word: 'car' }, { word: 'tree' }]),
+      );
+
+      ddbMock.on(PutCommand).resolves({});
+
+      const service = TrainingService.getInstance();
+      const result = await service.startTraining(trainingId, userId);
+
+      expect(result.success).toBe(true);
+      expect(result.execution).toBeDefined();
+      expect(result.execution!.aiExercises).toBeDefined();
+      expect(result.execution!.aiExercises!.length).toBeGreaterThan(0);
+      expect(result.execution!.words).toBeDefined();
+      expect(result.execution!.words!.length).toBeGreaterThan(0);
+    });
+  });
+});

From e64112d909d1448ce42f59d831f01a5916b3fb6d Mon Sep 17 00:00:00 2001
From: Kiro Agent <244629292+kiro-agent@users.noreply.github.com>
Date: Wed, 8 Apr 2026 09:38:36 +0000
Subject: [PATCH 5/6] feat: add AI training property-based tests for create,
 start, submit, and backward compatibility

Co-authored-by: Johannes Koch <github@lockhead.net>
---
 .../test/ai-training-service.property.test.ts | 385 ++++++++++++++++++
 1 file changed, 385 insertions(+)
 create mode 100644 backend/test/ai-training-service.property.test.ts

diff --git a/backend/test/ai-training-service.property.test.ts b/backend/test/ai-training-service.property.test.ts
new file mode 100644
index 0000000..00d0c88
--- /dev/null
+++ b/backend/test/ai-training-service.property.test.ts
@@ -0,0 +1,385 @@
+import { describe, test, expect, beforeEach } from 'vitest';
+import * as crypto from 'crypto';
+import * as fc from 'fast-check';
+import { TrainingService } from '../src/services/training-service';
+import { mockClient } from 'aws-sdk-client-mock';
+import { DynamoDBDocumentClient, GetCommand, PutCommand, UpdateCommand } from '@aws-sdk/lib-dynamodb';
+import { BedrockRuntimeClient, InvokeModelCommand } from '@aws-sdk/client-bedrock-runtime';
+import type { Training, TrainingExecution, AIExercise } from '../src/model/domain/Training';
+
+const ddbMock = mockClient(DynamoDBDocumentClient);
+const bedrockMock = mockClient(BedrockRuntimeClient);
+const VOCAB_TABLE = 'train-with-joe-vocabulary-lists-sandbox';
+const TRAINING_TABLE = 'train-with-joe-trainings-sandbox';
+
+// Helper to build Bedrock response for given words
+function buildBedrockExercisesResponse(words: { word: string }[]): { body: Uint8Array } {
+  const exercises = words.map((w, i) => ({
+    prompt: `Exercise for ${w.word}: Fill in the blank`,
+    options: [`opt_a_${i}`, `opt_b_${i}`, `opt_c_${i}`],
+    correctOptionIndex: 0,
+    exerciseType: i % 2 === 0 ? 'fill_in_the_blank' : 'verb_conjugation',
+    sourceWord: w.word,
+  }));
+  return {
+    body: new TextEncoder().encode(JSON.stringify({ results: [{ outputText: JSON.stringify(exercises) }] })),
+  };
+}
+
+const vocabularyWordArb = fc.record({
+  word: fc.string({ minLength: 1, maxLength: 20 }),
+  translation: fc.string({ minLength: 1, maxLength: 20 }),
+  definition: fc.string({ minLength: 1, maxLength: 30 }),
+});
+
+describe('AI Training Service Property Tests', () => {
+  beforeEach(() => {
+    ddbMock.reset();
+    bedrockMock.reset();
+  });
+
+  /**
+   * Feature: ai-training-mode, Property 1: AI training creation stores mode correctly
+   */
+  test(
+    'Feature: ai-training-mode, Property 1: AI training creation stores mode correctly',
+    { timeout: 60000 },
+    async () => {
+      await fc.assert(
+        fc.asyncProperty(
+          fc.uuid(),
+          fc.array(
+            fc.record({
+              id: fc.uuid(),
+              words: fc.array(vocabularyWordArb, { minLength: 1, maxLength: 10 }),
+            }),
+            { minLength: 1, maxLength: 5 },
+          ),
+          async (userId, vocabLists) => {
+            ddbMock.reset();
+            bedrockMock.reset();
+
+            // Mock GetCommand with callsFake to differentiate tables
+            ddbMock.on(GetCommand).callsFake((input) => {
+              if (input.TableName === VOCAB_TABLE) {
+                const list = vocabLists.find((l) => l.id === input.Key.id);
+                if (list) {
+                  return {
+                    Item: {
+                      id: list.id,
+                      userId,
+                      words: list.words,
+                      createdAt: '2024-01-01T00:00:00.000Z',
+                      updatedAt: '2024-01-01T00:00:00.000Z',
+                    },
+                  };
+                }
+              }
+              return {};
+            });
+
+            ddbMock.on(PutCommand).resolves({});
+
+            const service = TrainingService.getInstance();
+            const listIds = vocabLists.map((l) => l.id);
+            const result = await service.createTraining(userId, listIds, 'AI_TRAINING');
+
+            expect(result.success).toBe(true);
+            expect(result.training).toBeDefined();
+            expect(result.training!.mode).toBe('AI_TRAINING');
+            expect(result.training!.userId).toBe(userId);
+            expect(result.training!.vocabularyListIds).toEqual(listIds);
+          },
+        ),
+        { numRuns: 100 },
+      );
+    },
+  );
+
+  /**
+   * Feature: ai-training-mode, Property 5: AI training start produces one exercise per selected word
+   */
+  test(
+    'Feature: ai-training-mode, Property 5: AI training start produces one exercise per selected word',
+    { timeout: 60000 },
+    async () => {
+      await fc.assert(
+        fc.asyncProperty(
+          fc.uuid(),
+          fc.array(
+            fc.record({
+              id: fc.uuid(),
+              words: fc.array(vocabularyWordArb, { minLength: 1, maxLength: 8 }),
+            }),
+            { minLength: 1, maxLength: 4 },
+          ),
+          async (userId, vocabLists) => {
+            ddbMock.reset();
+            bedrockMock.reset();
+
+            const trainingId = crypto.randomUUID();
+            const listIds = vocabLists.map((l) => l.id);
+
+            // Compute the expected words (those with non-empty translations)
+            const allWords = vocabLists.flatMap((l) =>
+              l.words
+                .filter((w) => w.translation && w.translation.length > 0)
+                .map((w) => ({
+                  word: w.word,
+                  translation: w.translation,
+                  vocabularyListId: l.id,
+                })),
+            );
+
+            // Skip if no words with translations available
+            if (allWords.length === 0) return;
+
+            const training: Training = {
+              id: trainingId,
+              userId,
+              name: 'AI Static Training',
+              mode: 'AI_TRAINING',
+              direction: 'WORD_TO_TRANSLATION',
+              vocabularyListIds: listIds,
+              words: allWords,
+              createdAt: '2024-01-01T00:00:00.000Z',
+              updatedAt: '2024-01-01T00:00:00.000Z',
+            };
+
+            // Mock GetCommand: differentiate training vs vocab list by TableName
+            ddbMock.on(GetCommand).callsFake((input) => {
+              if (input.TableName === TRAINING_TABLE) {
+                if (input.Key.id === trainingId) {
+                  return { Item: { ...training } };
+                }
+                return { Item: undefined };
+              }
+              if (input.TableName === VOCAB_TABLE) {
+                const list = vocabLists.find((l) => l.id === input.Key.id);
+                if (list) {
+                  return {
+                    Item: {
+                      id: list.id,
+                      userId,
+                      sourceLanguage: 'English',
+                      targetLanguage: 'Spanish',
+                      words: list.words,
+                      createdAt: '2024-01-01T00:00:00.000Z',
+                      updatedAt: '2024-01-01T00:00:00.000Z',
+                    },
+                  };
+                }
+                return { Item: undefined };
+              }
+              return {};
+            });
+
+            // Mock InvokeModelCommand to return exercises for each word
+            bedrockMock.on(InvokeModelCommand).callsFake(() => {
+              return buildBedrockExercisesResponse(allWords.map((w) => ({ word: w.word })));
+            });
+
+            ddbMock.on(PutCommand).resolves({});
+
+            const service = TrainingService.getInstance();
+            const result = await service.startTraining(trainingId, userId);
+
+            expect(result.success).toBe(true);
+            expect(result.execution).toBeDefined();
+            expect(result.execution!.aiExercises).toBeDefined();
+            expect(result.execution!.aiExercises!.length).toBeGreaterThan(0);
+
+            // Each exercise's sourceWord should correspond to one of the training words
+            const trainingWordSet = new Set(allWords.map((w) => w.word));
+            for (const exercise of result.execution!.aiExercises!) {
+              expect(trainingWordSet.has(exercise.sourceWord)).toBe(true);
+            }
+          },
+        ),
+        { numRuns: 100 },
+      );
+    },
+  );
+
+  /**
+   * Feature: ai-training-mode, Property 6: AI answer submission and completion
+   */
+  test(
+    'Feature: ai-training-mode, Property 6: AI answer submission and completion',
+    { timeout: 60000 },
+    async () => {
+      const aiExerciseArb = fc.record({
+        prompt: fc.string({ minLength: 1, maxLength: 40 }),
+        options: fc.array(fc.string({ minLength: 1, maxLength: 20 }), { minLength: 3, maxLength: 5 }),
+        correctOptionIndex: fc.nat({ max: 2 }),
+        exerciseType: fc.constantFrom('fill_in_the_blank', 'verb_conjugation', 'preposition', 'sentence_completion'),
+        sourceWord: fc.string({ minLength: 1, maxLength: 20 }),
+      });
+
+      await fc.assert(
+        fc.asyncProperty(
+          fc.uuid(),
+          fc.array(aiExerciseArb, { minLength: 2, maxLength: 5 }),
+          async (userId, exercises) => {
+            ddbMock.reset();
+            bedrockMock.reset();
+
+            // Ensure correctOptionIndex is valid for each exercise
+            const validExercises: AIExercise[] = exercises.map((e) => ({
+              ...e,
+              correctOptionIndex: e.correctOptionIndex % e.options.length,
+            }));
+
+            const trainingId = `training#${crypto.randomUUID()}`;
+            const executionId = `execution#${crypto.randomUUID()}`;
+
+            const training: Training = {
+              id: trainingId,
+              userId,
+              name: 'AI Training',
+              mode: 'AI_TRAINING',
+              direction: 'WORD_TO_TRANSLATION',
+              vocabularyListIds: ['list-1'],
+              words: validExercises.map((e) => ({
+                word: e.sourceWord,
+                translation: 'translation',
+                vocabularyListId: 'list-1',
+              })),
+              createdAt: '2024-01-01T00:00:00.000Z',
+              updatedAt: '2024-01-01T00:00:00.000Z',
+            };
+
+            // Track execution state across calls
+            const executionState: TrainingExecution = {
+              id: executionId,
+              trainingId,
+              userId,
+              startedAt: '2024-01-01T10:00:00.000Z',
+              results: [],
+              aiExercises: validExercises,
+              correctCount: 0,
+              incorrectCount: 0,
+            };
+
+            ddbMock.on(GetCommand).callsFake((input) => {
+              if (input.Key.id === executionId) {
+                return { Item: { ...executionState, results: [...executionState.results] } };
+              }
+              if (input.Key.id === trainingId) {
+                return { Item: { ...training } };
+              }
+              return {};
+            });
+
+            ddbMock.on(UpdateCommand).callsFake((input) => {
+              // Reflect updates back to execution state
+              if (input.ExpressionAttributeValues) {
+                if (input.ExpressionAttributeValues[':results']) {
+                  executionState.results = input.ExpressionAttributeValues[':results'];
+                }
+                if (input.ExpressionAttributeValues[':correctCount'] !== undefined) {
+                  executionState.correctCount = input.ExpressionAttributeValues[':correctCount'];
+                }
+                if (input.ExpressionAttributeValues[':incorrectCount'] !== undefined) {
+                  executionState.incorrectCount = input.ExpressionAttributeValues[':incorrectCount'];
+                }
+                if (input.ExpressionAttributeValues[':completedAt']) {
+                  executionState.completedAt = input.ExpressionAttributeValues[':completedAt'];
+                }
+              }
+              return { Attributes: { ...executionState } };
+            });
+
+            const service = TrainingService.getInstance();
+
+            let lastResult;
+            for (let i = 0; i < validExercises.length; i++) {
+              // Alternate between correct and incorrect answers
+              const answerIndex = i % 2 === 0 ? validExercises[i].correctOptionIndex : (validExercises[i].correctOptionIndex + 1) % validExercises[i].options.length;
+              lastResult = await service.submitAnswer(executionId, userId, i, String(answerIndex));
+
+              expect(lastResult.success).toBe(true);
+              expect(lastResult.result).toBeDefined();
+
+              // Verify correctness: correct if submitted index === correctOptionIndex
+              const expectedCorrect = answerIndex === validExercises[i].correctOptionIndex;
+              expect(lastResult.result!.correct).toBe(expectedCorrect);
+            }
+
+            // After all answers are submitted
+            expect(lastResult!.completed).toBe(true);
+            expect(executionState.correctCount + executionState.incorrectCount).toBe(validExercises.length);
+          },
+        ),
+        { numRuns: 100 },
+      );
+    },
+  );
+
+  /**
+   * Feature: ai-training-mode, Property 8: Non-AI trainings backward compatibility
+   */
+  test(
+    'Feature: ai-training-mode, Property 8: Non-AI trainings backward compatibility',
+    { timeout: 60000 },
+    async () => {
+      await fc.assert(
+        fc.asyncProperty(
+          fc.uuid(),
+          fc.constantFrom('TEXT_INPUT' as const, 'MULTIPLE_CHOICE' as const),
+          fc.array(
+            fc.record({
+              word: fc.string({ minLength: 1, maxLength: 20 }),
+              translation: fc.string({ minLength: 1, maxLength: 20 }),
+              vocabularyListId: fc.uuid(),
+            }),
+            { minLength: 3, maxLength: 10 },
+          ),
+          async (userId, mode, words) => {
+            ddbMock.reset();
+            bedrockMock.reset();
+
+            const trainingId = crypto.randomUUID();
+
+            const training: Training = {
+              id: trainingId,
+              userId,
+              name: 'Non-AI Training',
+              mode,
+              direction: 'WORD_TO_TRANSLATION',
+              vocabularyListIds: [...new Set(words.map((w) => w.vocabularyListId))],
+              words,
+              createdAt: '2024-01-01T00:00:00.000Z',
+              updatedAt: '2024-01-01T00:00:00.000Z',
+            };
+
+            ddbMock.on(GetCommand).callsFake((input) => {
+              if (input.TableName === TRAINING_TABLE) {
+                if (input.Key.id === trainingId) {
+                  return { Item: { ...training } };
+                }
+                return { Item: undefined };
+              }
+              return {};
+            });
+
+            ddbMock.on(PutCommand).resolves({});
+
+            const service = TrainingService.getInstance();
+            const result = await service.startTraining(trainingId, userId);
+
+            expect(result.success).toBe(true);
+            expect(result.execution).toBeDefined();
+
+            // Verify Bedrock was NOT called for non-AI trainings
+            expect(bedrockMock.commandCalls(InvokeModelCommand).length).toBe(0);
+
+            // AI exercises should not be present
+            expect(result.execution!.aiExercises).toBeUndefined();
+          },
+        ),
+        { numRuns: 100 },
+      );
+    },
+  );
+});

From 15801476344637d66a261fc8ead7eb536a221944 Mon Sep 17 00:00:00 2001
From: Kiro Agent <244629292+kiro-agent@users.noreply.github.com>
Date: Wed, 8 Apr 2026 09:42:56 +0000
Subject: [PATCH 6/6] feat: add AI training mode to Flutter frontend with
 exercise widget and mode selection

Co-authored-by: Johannes Koch <github@lockhead.net>
---
 .kiro/specs/ai-training-mode/tasks.md         |  56 +++++-----
 .../src/lib/providers/training_provider.dart  |   2 +
 .../lib/screens/training_creation_screen.dart |  11 +-
 .../screens/training_execution_screen.dart    |  62 ++++++++---
 .../src/lib/widgets/ai_exercise_widget.dart   | 103 ++++++++++++++++++
 5 files changed, 190 insertions(+), 44 deletions(-)
 create mode 100644 frontend/src/lib/widgets/ai_exercise_widget.dart

diff --git a/.kiro/specs/ai-training-mode/tasks.md b/.kiro/specs/ai-training-mode/tasks.md
index e6d15ba..833829b 100644
--- a/.kiro/specs/ai-training-mode/tasks.md
+++ b/.kiro/specs/ai-training-mode/tasks.md
@@ -6,21 +6,21 @@ Extend the existing training system with an AI-powered training mode (`AI_TRAINI
 
 ## Tasks
 
-- [ ] 1. Extend domain model and GraphQL schema
-  - [ ] 1.1 Add `AI_TRAINING` to `TrainingMode` type and add `AIExercise` interface in `backend/src/model/domain/Training.ts`
+- [x] 1. Extend domain model and GraphQL schema
+  - [x] 1.1 Add `AI_TRAINING` to `TrainingMode` type and add `AIExercise` interface in `backend/src/model/domain/Training.ts`
     - Extend `TrainingMode` to `'TEXT_INPUT' | 'MULTIPLE_CHOICE' | 'AI_TRAINING'`
     - Add `AIExercise` interface with `prompt`, `options`, `correctOptionIndex`, `exerciseType`, `sourceWord`
     - Add optional `aiExercises?: AIExercise[]` field to `TrainingExecution`
     - _Requirements: 1.2, 3.3, 3.4_
 
-  - [ ] 1.2 Update GraphQL schema in `backend/src/gql-schemas/schema.graphql`
+  - [x] 1.2 Update GraphQL schema in `backend/src/gql-schemas/schema.graphql`
     - Add `AI_TRAINING` to `TrainingMode` enum
     - Add `AIExercise` type with `prompt: String!`, `options: [String!]!`, `correctOptionIndex: Int!`, `exerciseType: String!`, `sourceWord: String!`
     - Add `aiExercises: [AIExercise!]` to `TrainingExecution` type
     - _Requirements: 1.1, 3.1, 3.2, 10.3_
 
-- [ ] 2. Implement AI exercise generation in AIService
-  - [ ] 2.1 Add `generateExercises` method to `backend/src/services/ai-service.ts`
+- [x] 2. Implement AI exercise generation in AIService
+  - [x] 2.1 Add `generateExercises` method to `backend/src/services/ai-service.ts`
     - Accept `words` array (with word, translation, definition, partOfSpeech, exampleSentence), `sourceLanguage`, `targetLanguage`, `userId`
     - Check rate limit via existing `checkRateLimit`
     - Build structured Bedrock prompt including all word fields and both languages
@@ -31,20 +31,20 @@ Extend the existing training system with an AI-powered training mode (`AI_TRAINI
     - Return `AIExercise[]`
     - _Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 6.1, 6.2, 6.3, 7.1, 7.2, 7.3, 7.4_
 
-  - [ ]* 2.2 Write property test: AI exercise prompt construction includes all context (Property 2)
+  - [x]* 2.2 Write property test: AI exercise prompt construction includes all context (Property 2)
     - **Property 2: AI exercise prompt construction includes all context**
     - **Validates: Requirements 2.2, 2.5**
 
-  - [ ]* 2.3 Write property test: AI exercise parsing and validation (Property 3)
+  - [x]* 2.3 Write property test: AI exercise parsing and validation (Property 3)
     - **Property 3: AI exercise parsing and validation**
     - **Validates: Requirements 2.3, 2.6, 7.1, 7.3**
 
-  - [ ]* 2.4 Write property test: AI exercise round-trip serialization (Property 4)
+  - [x]* 2.4 Write property test: AI exercise round-trip serialization (Property 4)
     - **Property 4: AI exercise round-trip serialization**
     - **Validates: Requirements 7.5**
 
 
-  - [ ]* 2.5 Write unit tests for AI exercise generation edge cases
+  - [x]* 2.5 Write unit tests for AI exercise generation edge cases
     - Test `generateExercises` returns error when Bedrock fails (Req 2.7)
     - Test `generateExercises` returns rate limit error when limit exceeded (Req 6.1, 6.2)
     - Test `generateExercises` logs usage with userId, operation, tokenCount (Req 6.3)
@@ -52,17 +52,17 @@ Extend the existing training system with an AI-powered training mode (`AI_TRAINI
     - Test all exercises invalid returns error (Req 7.4)
     - _Requirements: 2.7, 6.1, 6.2, 6.3, 7.2, 7.4_
 
-- [ ] 3. Checkpoint - Ensure all tests pass
+- [x] 3. Checkpoint - Ensure all tests pass
   - Ensure all tests pass, ask the user if questions arise.
 
-- [ ] 4. Implement AI training creation and start in TrainingService
-  - [ ] 4.1 Modify `createTraining` in `backend/src/services/training-service.ts` to accept `AI_TRAINING` mode
+- [x] 4. Implement AI training creation and start in TrainingService
+  - [x] 4.1 Modify `createTraining` in `backend/src/services/training-service.ts` to accept `AI_TRAINING` mode
     - For static AI trainings: word selection works identically to existing modes (fetch from vocab lists, shuffle, slice)
     - For randomized AI trainings: store `words: []` as with existing randomized behavior
     - No special handling needed beyond accepting the new mode value
     - _Requirements: 1.3, 5.3_
 
-  - [ ] 4.2 Modify `startTraining` in `backend/src/services/training-service.ts` to add AI_TRAINING branch
+  - [x] 4.2 Modify `startTraining` in `backend/src/services/training-service.ts` to add AI_TRAINING branch
     - For AI_TRAINING mode: select words (from `training.words` for static, dynamically for randomized)
     - Fetch vocabulary lists to get full word details (definition, partOfSpeech, exampleSentence) and language info (sourceLanguage, targetLanguage)
     - Call `AIService.generateExercises(words, sourceLanguage, targetLanguage, userId)`
@@ -70,54 +70,54 @@ Extend the existing training system with an AI-powered training mode (`AI_TRAINI
     - Error if fewer than 1 word available
     - _Requirements: 2.1, 4.1, 4.2, 4.3, 4.6, 5.1, 5.2_
 
-  - [ ] 4.3 Modify `submitAnswer` in `backend/src/services/training-service.ts` to add AI_TRAINING branch
+  - [x] 4.3 Modify `submitAnswer` in `backend/src/services/training-service.ts` to add AI_TRAINING branch
     - Look up `execution.aiExercises[wordIndex]`
     - Compare `parseInt(answer)` against `exercise.correctOptionIndex`
     - Build `TrainingResult` with prompt as `word`, correct option as `expectedAnswer`
     - Completion check: `results.length === aiExercises.length`
     - _Requirements: 4.4, 4.5_
 
-  - [ ]* 4.4 Write property test: AI training creation stores mode correctly (Property 1)
+  - [x]* 4.4 Write property test: AI training creation stores mode correctly (Property 1)
     - **Property 1: AI training creation stores mode correctly**
     - **Validates: Requirements 1.3**
 
-  - [ ]* 4.5 Write property test: AI training start produces one exercise per selected word (Property 5)
+  - [x]* 4.5 Write property test: AI training start produces one exercise per selected word (Property 5)
     - **Property 5: AI training start produces one exercise per selected word**
     - **Validates: Requirements 2.1, 4.1, 4.3**
 
-  - [ ]* 4.6 Write property test: AI answer submission and completion (Property 6)
+  - [x]* 4.6 Write property test: AI answer submission and completion (Property 6)
     - **Property 6: AI answer submission and completion**
     - **Validates: Requirements 4.4, 4.5**
 
-  - [ ]* 4.7 Write property test: Randomized AI training dynamic selection and generation (Property 7)
+  - [x]* 4.7 Write property test: Randomized AI training dynamic selection and generation (Property 7)
     - **Property 7: Randomized AI training dynamic selection and generation**
     - **Validates: Requirements 5.1, 5.2**
 
-  - [ ]* 4.8 Write property test: Non-AI trainings backward compatibility (Property 8)
+  - [x]* 4.8 Write property test: Non-AI trainings backward compatibility (Property 8)
     - **Property 8: Non-AI trainings backward compatibility**
     - **Validates: Requirements 10.1**
 
-  - [ ]* 4.9 Write unit tests for AI training service edge cases
+  - [x]* 4.9 Write unit tests for AI training service edge cases
     - Test start AI training with 0 available words returns error (Req 4.6)
     - Test existing TEXT_INPUT/MULTIPLE_CHOICE trainings unaffected by AI changes (Req 10.1, 10.2)
     - Test AI answer submission with correct and incorrect option indices (Req 4.4)
     - Test AI training completion after all exercises answered (Req 4.5)
     - _Requirements: 4.4, 4.5, 4.6, 10.1, 10.2_
 
-- [ ] 5. Checkpoint - Ensure all backend tests pass
+- [x] 5. Checkpoint - Ensure all backend tests pass
   - Ensure all tests pass, ask the user if questions arise.
 
-- [ ] 6. Update frontend training provider and creation UI
-  - [ ] 6.1 Update `startTraining` GraphQL mutation in `frontend/src/lib/providers/training_provider.dart` to fetch `aiExercises { prompt options correctOptionIndex exerciseType sourceWord }` on the execution response
+- [x] 6. Update frontend training provider and creation UI
+  - [x] 6.1 Update `startTraining` GraphQL mutation in `frontend/src/lib/providers/training_provider.dart` to fetch `aiExercises { prompt options correctOptionIndex exerciseType sourceWord }` on the execution response
     - _Requirements: 3.2, 9.1_
 
-  - [ ] 6.2 Add `AI_TRAINING` as a third `ChoiceChip` option in the mode selector on `frontend/src/lib/screens/training_creation_screen.dart`
+  - [x] 6.2 Add `AI_TRAINING` as a third `ChoiceChip` option in the mode selector on `frontend/src/lib/screens/training_creation_screen.dart`
     - Allow vocabulary list selection, word count, and randomized mode same as other modes
     - Send mode as `AI_TRAINING` in the `createTraining` mutation
     - _Requirements: 8.1, 8.2, 8.3_
 
-- [ ] 7. Create AI exercise display widget and update execution screen
-  - [ ] 7.1 Create `frontend/src/lib/widgets/ai_exercise_widget.dart`
+- [x] 7. Create AI exercise display widget and update execution screen
+  - [x] 7.1 Create `frontend/src/lib/widgets/ai_exercise_widget.dart`
     - Display exercise type label (e.g., "Verb Conjugation")
     - Display prompt sentence
     - Display answer options as tappable buttons
@@ -125,13 +125,13 @@ Extend the existing training system with an AI-powered training mode (`AI_TRAINI
     - Show correct/incorrect feedback with the correct answer highlighted
     - _Requirements: 9.1, 9.2, 9.3_
 
-  - [ ] 7.2 Update `frontend/src/lib/screens/training_execution_screen.dart` to handle AI_TRAINING mode
+  - [x] 7.2 Update `frontend/src/lib/screens/training_execution_screen.dart` to handle AI_TRAINING mode
     - When mode is AI_TRAINING, render `AIExerciseWidget` instead of text input or multiple choice
     - Show progress indicator (current exercise / total)
     - Navigate to results screen when all exercises completed
     - _Requirements: 9.1, 9.4_
 
-- [ ] 8. Final checkpoint - Ensure all tests pass
+- [x] 8. Final checkpoint - Ensure all tests pass
   - Ensure all tests pass, ask the user if questions arise.
 
 ## Notes
diff --git a/frontend/src/lib/providers/training_provider.dart b/frontend/src/lib/providers/training_provider.dart
index 553ad40..93f42d6 100644
--- a/frontend/src/lib/providers/training_provider.dart
+++ b/frontend/src/lib/providers/training_provider.dart
@@ -309,6 +309,7 @@ class TrainingProvider extends ChangeNotifier {
               results { wordIndex word expectedAnswer userAnswer correct }
               multipleChoiceOptions { wordIndex options }
               words { word translation vocabularyListId unit }
+              aiExercises { prompt options correctOptionIndex exerciseType sourceWord }
             }
             error
           }
@@ -359,6 +360,7 @@ class TrainingProvider extends ChangeNotifier {
               id trainingId userId startedAt completedAt correctCount incorrectCount
               results { wordIndex word expectedAnswer userAnswer correct }
               multipleChoiceOptions { wordIndex options }
+              aiExercises { prompt options correctOptionIndex exerciseType sourceWord }
             }
             error
           }
diff --git a/frontend/src/lib/screens/training_creation_screen.dart b/frontend/src/lib/screens/training_creation_screen.dart
index 4fec373..ec9a0d2 100644
--- a/frontend/src/lib/screens/training_creation_screen.dart
+++ b/frontend/src/lib/screens/training_creation_screen.dart
@@ -186,7 +186,9 @@ class _TrainingCreationScreenState extends State<TrainingCreationScreen> {
                   style: TextStyle(fontSize: 16, fontWeight: FontWeight.bold),
                 ),
                 const SizedBox(height: 8),
-                Row(
+                Wrap(
+                  spacing: 8,
+                  runSpacing: 8,
                   children: [
                     ChoiceChip(
                       label: const Text('Text Input'),
@@ -194,13 +196,18 @@ class _TrainingCreationScreenState extends State<TrainingCreationScreen> {
                       selectedColor: const Color(0xFF2B6CB0).withValues(alpha: 0.2),
                       onSelected: (_) => setState(() => _selectedMode = 'TEXT_INPUT'),
                     ),
-                    const SizedBox(width: 8),
                     ChoiceChip(
                       label: const Text('Multiple Choice'),
                       selected: _selectedMode == 'MULTIPLE_CHOICE',
                       selectedColor: const Color(0xFFF0932B).withValues(alpha: 0.2),
                       onSelected: (_) => setState(() => _selectedMode = 'MULTIPLE_CHOICE'),
                     ),
+                    ChoiceChip(
+                      label: const Text('AI Training'),
+                      selected: _selectedMode == 'AI_TRAINING',
+                      selectedColor: const Color(0xFF6B46C1).withValues(alpha: 0.2),
+                      onSelected: (_) => setState(() => _selectedMode = 'AI_TRAINING'),
+                    ),
                   ],
                 ),
                 const SizedBox(height: 24),
diff --git a/frontend/src/lib/screens/training_execution_screen.dart b/frontend/src/lib/screens/training_execution_screen.dart
index e841631..f32e011 100644
--- a/frontend/src/lib/screens/training_execution_screen.dart
+++ b/frontend/src/lib/screens/training_execution_screen.dart
@@ -6,6 +6,7 @@ import '../providers/training_provider.dart';
 import '../providers/vocabulary_provider.dart';
 import '../services/feedback_sound_service.dart';
 import '../widgets/answer_feedback_animation.dart';
+import '../widgets/ai_exercise_widget.dart';
 
 /// Screen for executing a training session
 class TrainingExecutionScreen extends StatefulWidget {
@@ -31,6 +32,7 @@ class _TrainingExecutionScreenState extends State<TrainingExecutionScreen> {
   bool _soundMuted = FeedbackSoundService().isMuted;
   final TextEditingController _answerController = TextEditingController();
   final Set<int> _flaggedIndices = {};
+  int? _selectedAIOptionIndex;
 
   @override
   void initState() {
@@ -86,6 +88,16 @@ class _TrainingExecutionScreenState extends State<TrainingExecutionScreen> {
     return null;
   }
 
+  List<dynamic> get _aiExercises {
+    return (_execution?['aiExercises'] as List<dynamic>?) ?? [];
+  }
+
+  Future<void> _submitAIAnswer(int optionIndex) async {
+    if (_showFeedback) return;
+    setState(() => _selectedAIOptionIndex = optionIndex);
+    await _submitAnswer(optionIndex.toString());
+  }
+
   Future<void> _submitAnswer(String answer) async {
     if (_showFeedback) return;
 
@@ -121,6 +133,7 @@ class _TrainingExecutionScreenState extends State<TrainingExecutionScreen> {
           _currentWordIndex++;
           _showFeedback = false;
           _lastResult = null;
+          _selectedAIOptionIndex = null;
         });
       }
     });
@@ -174,7 +187,10 @@ class _TrainingExecutionScreenState extends State<TrainingExecutionScreen> {
   Widget build(BuildContext context) {
     final words = _words;
 
-    if (_execution == null || words.isEmpty) {
+    final isAIMode = _currentMode == 'AI_TRAINING';
+    final totalWords = isAIMode ? _aiExercises.length : words.length;
+
+    if (_execution == null || totalWords == 0) {
       return Scaffold(
         appBar: AppBar(
           title: const Text('Training'),
@@ -184,10 +200,9 @@ class _TrainingExecutionScreenState extends State<TrainingExecutionScreen> {
       );
     }
 
-    final totalWords = words.length;
     final progress = totalWords > 0 ? (_currentWordIndex + 1) / totalWords : 0.0;
-    final currentWord = words[_currentWordIndex] as Map<String, dynamic>;
-    final wordText = currentWord['word'] as String? ?? '';
+    final currentWord = isAIMode ? null : (words[_currentWordIndex] as Map<String, dynamic>);
+    final wordText = currentWord?['word'] as String? ?? '';
 
     return Scaffold(
       appBar: AppBar(
@@ -214,19 +229,23 @@ class _TrainingExecutionScreenState extends State<TrainingExecutionScreen> {
             ),
             const SizedBox(height: 32),
 
-            // Current word
-            Text(
-              wordText,
-              textAlign: TextAlign.center,
-              style: Theme.of(context).textTheme.headlineMedium?.copyWith(
-                    fontWeight: FontWeight.bold,
-                  ),
-            ),
-            const SizedBox(height: 32),
+            // Current word (hidden for AI mode - the exercise widget shows the prompt)
+            if (!isAIMode) ...[
+              Text(
+                wordText,
+                textAlign: TextAlign.center,
+                style: Theme.of(context).textTheme.headlineMedium?.copyWith(
+                      fontWeight: FontWeight.bold,
+                    ),
+              ),
+              const SizedBox(height: 32),
+            ],
 
             // Input area
-            if (_showFeedback)
+            if (_showFeedback && !isAIMode)
               _buildFeedback()
+            else if (_currentMode == 'AI_TRAINING')
+              _buildAIExercise()
             else if (_currentMode == 'MULTIPLE_CHOICE')
               _buildMultipleChoice()
             else
@@ -311,6 +330,21 @@ class _TrainingExecutionScreenState extends State<TrainingExecutionScreen> {
     );
   }
 
+  Widget _buildAIExercise() {
+    final exercises = _aiExercises;
+    if (_currentWordIndex >= exercises.length) return const SizedBox.shrink();
+
+    final exercise = exercises[_currentWordIndex] as Map<String, dynamic>;
+
+    return AIExerciseWidget(
+      exercise: exercise,
+      onAnswerSelected: _submitAIAnswer,
+      showFeedback: _showFeedback,
+      selectedIndex: _selectedAIOptionIndex,
+      isCorrect: _lastResult?['correct'] as bool?,
+    );
+  }
+
   Widget _buildFeedback() {
     final isCorrect = _lastResult?['correct'] as bool? ?? false;
     final expected = _lastResult?['expectedAnswer'] as String?;
diff --git a/frontend/src/lib/widgets/ai_exercise_widget.dart b/frontend/src/lib/widgets/ai_exercise_widget.dart
new file mode 100644
index 0000000..e4e7fd9
--- /dev/null
+++ b/frontend/src/lib/widgets/ai_exercise_widget.dart
@@ -0,0 +1,103 @@
+import 'package:flutter/material.dart';
+
+/// Widget for displaying an AI-generated exercise during training
+class AIExerciseWidget extends StatelessWidget {
+  final Map<String, dynamic> exercise;
+  final void Function(int) onAnswerSelected;
+  final bool showFeedback;
+  final int? selectedIndex;
+  final bool? isCorrect;
+
+  const AIExerciseWidget({
+    super.key,
+    required this.exercise,
+    required this.onAnswerSelected,
+    this.showFeedback = false,
+    this.selectedIndex,
+    this.isCorrect,
+  });
+
+  String _formatExerciseType(String type) {
+    return type
+        .replaceAll('_', ' ')
+        .split(' ')
+        .map((word) => word.isNotEmpty
+            ? '${word[0].toUpperCase()}${word.substring(1)}'
+            : '')
+        .join(' ');
+  }
+
+  @override
+  Widget build(BuildContext context) {
+    final exerciseType = exercise['exerciseType'] as String? ?? '';
+    final prompt = exercise['prompt'] as String? ?? '';
+    final options = (exercise['options'] as List<dynamic>?) ?? [];
+    final correctIndex = exercise['correctOptionIndex'] as int? ?? 0;
+
+    return Column(
+      crossAxisAlignment: CrossAxisAlignment.stretch,
+      children: [
+        // Exercise type label
+        Container(
+          padding: const EdgeInsets.symmetric(horizontal: 12, vertical: 6),
+          decoration: BoxDecoration(
+            color: const Color(0xFF6B46C1).withValues(alpha: 0.1),
+            borderRadius: BorderRadius.circular(16),
+          ),
+          child: Text(
+            _formatExerciseType(exerciseType),
+            style: const TextStyle(
+              color: Color(0xFF6B46C1),
+              fontWeight: FontWeight.w600,
+              fontSize: 14,
+            ),
+            textAlign: TextAlign.center,
+          ),
+        ),
+        const SizedBox(height: 16),
+
+        // Prompt
+        Text(
+          prompt,
+          style: Theme.of(context).textTheme.titleLarge?.copyWith(
+                fontWeight: FontWeight.w500,
+              ),
+          textAlign: TextAlign.center,
+        ),
+        const SizedBox(height: 24),
+
+        // Options
+        ...List.generate(options.length, (index) {
+          final optionText = options[index] as String? ?? '';
+          final isSelected = selectedIndex == index;
+          final isCorrectOption = index == correctIndex;
+
+          Color? backgroundColor;
+          Color? textColor;
+          if (showFeedback) {
+            if (isCorrectOption) {
+              backgroundColor = Colors.green.withValues(alpha: 0.2);
+              textColor = Colors.green.shade800;
+            } else if (isSelected && !isCorrectOption) {
+              backgroundColor = Colors.red.withValues(alpha: 0.2);
+              textColor = Colors.red.shade800;
+            }
+          }
+
+          return Padding(
+            padding: const EdgeInsets.only(bottom: 12),
+            child: ElevatedButton(
+              onPressed: showFeedback ? null : () => onAnswerSelected(index),
+              style: ElevatedButton.styleFrom(
+                padding: const EdgeInsets.all(16),
+                backgroundColor: backgroundColor,
+                foregroundColor: textColor,
+              ),
+              child: Text(optionText),
+            ),
+          );
+        }),
+      ],
+    );
+  }
+}