BasedHardware · beastoin · Jan 22, 2026 · Jan 19, 2026 · Jan 21, 2026 · Jan 21, 2026
diff --git a/app/lib/backend/schema/person.dart b/app/lib/backend/schema/person.dart
@@ -31,6 +31,8 @@ class Person {
   final DateTime createdAt;
   final DateTime updatedAt;
   final List<String>? speechSamples;
+  final List<String>? speechSampleTranscripts;
+  final int speechSamplesVersion;
   final int? colorIdx;
 
   Person({
@@ -39,6 +41,8 @@ class Person {
     required this.createdAt,
     required this.updatedAt,
     this.speechSamples,
+    this.speechSampleTranscripts,
+    this.speechSamplesVersion = 1,
     this.colorIdx,
   });
 
@@ -49,6 +53,10 @@ class Person {
       createdAt: DateTime.parse(json['created_at']).toLocal(),
       updatedAt: DateTime.parse(json['updated_at']).toLocal(),
       speechSamples: json['speech_samples'] != null ? List<String>.from(json['speech_samples']) : [],
+      speechSampleTranscripts: json['speech_sample_transcripts'] != null
+          ? List<String>.from(json['speech_sample_transcripts'])
+          : null,
+      speechSamplesVersion: json['speech_samples_version'] ?? 1,
       colorIdx: json['color_idx'] ?? json['id'].hashCode % speakerColors.length,
     );
   }
@@ -60,6 +68,8 @@ class Person {
       'created_at': createdAt.toUtc().toIso8601String(),
       'updated_at': updatedAt.toUtc().toIso8601String(),
       'speech_samples': speechSamples ?? [],
+      'speech_sample_transcripts': speechSampleTranscripts,
+      'speech_samples_version': speechSamplesVersion,
       'color_idx': colorIdx,
     };
   }

diff --git a/app/lib/pages/settings/people.dart b/app/lib/pages/settings/people.dart
@@ -303,7 +303,28 @@ class _UserPeoplePageState extends State<_UserPeoplePage> {
                                           title:
                                               Text(j == 0 ? context.l10n.speechProfile : context.l10n.sampleNumber(j)),
                                           onTap: () => _confirmDeleteSample(index, person, j, provider),
-                                          subtitle: Text(context.l10n.tapToDelete),
+                                          subtitle: Column(
+                                            crossAxisAlignment: CrossAxisAlignment.start,
+                                            children: [
+                                              if (person.speechSampleTranscripts != null &&
+                                                  j < person.speechSampleTranscripts!.length &&
+                                                  person.speechSampleTranscripts![j].isNotEmpty)
+                                                Padding(
+                                                  padding: const EdgeInsets.only(bottom: 4),
+                                                  child: Text(
+                                                    '"${person.speechSampleTranscripts![j]}"',
+                                                    style: const TextStyle(
+                                                      fontSize: 14,
+                                                      fontStyle: FontStyle.italic,
+                                                    ),
+                                                  ),
+                                                ),
+                                              Text(
+                                                context.l10n.tapToDelete,
+                                                style: const TextStyle(fontSize: 12, color: Colors.grey),
+                                              ),
+                                            ],
+                                          ),
                                         )),
                                   ],
                                 ),

diff --git a/backend/database/users.py b/backend/database/users.py
@@ -100,40 +100,65 @@ def delete_person(uid: str, person_id: str):
     person_ref.delete()
 
 
-def add_person_speech_sample(uid: str, person_id: str, sample_path: str, max_samples: int = 5) -> bool:
+@transactional
+def _add_sample_transaction(transaction, person_ref, sample_path, transcript, max_samples):
+    """Transaction to atomically add sample and transcript."""
+    snapshot = person_ref.get(transaction=transaction)
+    if not snapshot.exists:
+        return False
+
+    person_data = snapshot.to_dict()
+    samples = person_data.get('speech_samples', [])
+
+    if len(samples) >= max_samples:
+        return False
+
+    samples.append(sample_path)
+    update_data = {
+        'speech_samples': samples,
+        'updated_at': datetime.now(timezone.utc),
+    }
+
+    if transcript is not None:
+        transcripts = person_data.get('speech_sample_transcripts', [])
+        # Ensure transcript array alignment with samples:
+        # If we're adding a transcript but existing samples don't have transcripts,
+        # pad with empty strings for the existing samples first (Dart expects non-null)
+        existing_sample_count = len(samples) - 1  # samples already has new one appended
+        if len(transcripts) < existing_sample_count:
+            # Pad with empty strings for each existing sample without a transcript
+            transcripts.extend([''] * (existing_sample_count - len(transcripts)))
+        transcripts.append(transcript)
+        update_data['speech_sample_transcripts'] = transcripts
+        update_data['speech_samples_version'] = 2
+
+    transaction.update(person_ref, update_data)
+    return True
+
+
+def add_person_speech_sample(
+    uid: str, person_id: str, sample_path: str, transcript: Optional[str] = None, max_samples: int = 5
+) -> bool:
     """
     Append speech sample path to person's speech_samples list.
     Limits to max_samples to prevent unlimited growth.
 
+    Uses Firestore transaction to ensure atomic read-modify-write,
+    preventing array drift from concurrent updates.
+
     Args:
         uid: User ID
         person_id: Person ID
         sample_path: GCS path to the speech sample
+        transcript: Optional transcript text for the sample
         max_samples: Maximum number of samples to keep (default 5)
 
     Returns:
-        True if sample was added, False if limit reached
+        True if sample was added, False if limit reached or person not found
     """
     person_ref = db.collection('users').document(uid).collection('people').document(person_id)
-    person_doc = person_ref.get()
-
-    if not person_doc.exists:
-        return False
-
-    person_data = person_doc.to_dict()
-    current_samples = person_data.get('speech_samples', [])
-
-    # Check if we've hit the limit
-    if len(current_samples) >= max_samples:
-        return False
-
-    person_ref.update(
-        {
-            'speech_samples': firestore.ArrayUnion([sample_path]),
-            'updated_at': datetime.now(timezone.utc),
-        }
-    )
-    return True
+    transaction = db.transaction()
+    return _add_sample_transaction(transaction, person_ref, sample_path, transcript, max_samples)
 
 
 def get_person_speech_samples_count(uid: str, person_id: str) -> int:
@@ -151,24 +176,41 @@ def get_person_speech_samples_count(uid: str, person_id: str) -> int:
 def remove_person_speech_sample(uid: str, person_id: str, sample_path: str) -> bool:
     """
     Remove a speech sample path from person's speech_samples list.
+    Also removes the corresponding transcript at the same index to keep arrays in sync.
 
     Args:
         uid: User ID
         person_id: Person ID
         sample_path: GCS path to remove
 
     Returns:
-        True if removed, False if person not found
+        True if removed, False if person or sample not found
     """
     person_ref = db.collection('users').document(uid).collection('people').document(person_id)
     person_doc = person_ref.get()
 
     if not person_doc.exists:
         return False
 
+    person_data = person_doc.to_dict()
+    samples = person_data.get('speech_samples', [])
+    transcripts = person_data.get('speech_sample_transcripts', [])
+
+    # Find index of sample to remove
+    try:
+        idx = samples.index(sample_path)
+    except ValueError:
+        return False  # Sample not found
+
+    # Remove from both arrays by index
+    samples.pop(idx)
+    if idx < len(transcripts):
+        transcripts.pop(idx)
+
     person_ref.update(
         {
-            'speech_samples': firestore.ArrayRemove([sample_path]),
+            'speech_samples': samples,
+            'speech_sample_transcripts': transcripts,
             'updated_at': datetime.now(timezone.utc),
         }
     )
@@ -223,6 +265,148 @@ def get_person_speaker_embedding(uid: str, person_id: str) -> Optional[list]:
     return person_data.get('speaker_embedding')
 
 
+def set_person_speech_sample_transcript(uid: str, person_id: str, sample_index: int, transcript: str) -> bool:
+    """
+    Update transcript at a specific index in the speech_sample_transcripts array.
+
+    Args:
+        uid: User ID
+        person_id: Person ID
+        sample_index: Index of the sample/transcript to update
+        transcript: The transcript text to set
+
+    Returns:
+        True if updated successfully, False if person not found or index out of bounds
+    """
+    person_ref = db.collection('users').document(uid).collection('people').document(person_id)
+    person_doc = person_ref.get()
+
+    if not person_doc.exists:
+        return False
+
+    person_data = person_doc.to_dict()
+    samples = person_data.get('speech_samples', [])
+    transcripts = person_data.get('speech_sample_transcripts', [])
+
+    # Validate index
+    if sample_index < 0 or sample_index >= len(samples):
+        return False
+
+    # Extend transcripts array if needed
+    while len(transcripts) < len(samples):
+        transcripts.append('')
+
+    transcripts[sample_index] = transcript
+
+    person_ref.update(
+        {
+            'speech_sample_transcripts': transcripts,
+            'updated_at': datetime.now(timezone.utc),
+        }
+    )
+    return True
+
+
+def update_person_speech_samples_after_migration(
+    uid: str,
+    person_id: str,
+    samples: list,
+    transcripts: list,
+    version: int,
+    speaker_embedding: Optional[list] = None,
+) -> bool:
+    """
+    Replace all samples/transcripts/embedding and set version atomically.
+    Used after v1 to v2 migration to update all related fields together.
+
+    Args:
+        uid: User ID
+        person_id: Person ID
+        samples: List of sample paths (may have dropped invalid samples)
+        transcripts: List of transcript strings (parallel array with samples)
+        version: Version number to set (typically 2)
+        speaker_embedding: Optional new speaker embedding, or None to clear
+
+    Returns:
+        True if updated successfully, False if person not found
+    """
+    person_ref = db.collection('users').document(uid).collection('people').document(person_id)
+    person_doc = person_ref.get()
+
+    if not person_doc.exists:
+        return False
+
+    update_data = {
+        'speech_samples': samples,
+        'speech_sample_transcripts': transcripts,
+        'speech_samples_version': version,
+        'updated_at': datetime.now(timezone.utc),
+    }
+
+    # Set or clear speaker embedding
+    if speaker_embedding is not None:
+        update_data['speaker_embedding'] = speaker_embedding
+    else:
+        update_data['speaker_embedding'] = firestore.DELETE_FIELD
+
+    person_ref.update(update_data)
+    return True
+
+
+def clear_person_speaker_embedding(uid: str, person_id: str) -> bool:
+    """
+    Clear speaker embedding for a person.
+    Used when all samples are dropped during migration.
+
+    Args:
+        uid: User ID
+        person_id: Person ID
+
+    Returns:
+        True if cleared successfully, False if person not found
+    """
+    person_ref = db.collection('users').document(uid).collection('people').document(person_id)
+    person_doc = person_ref.get()
+
+    if not person_doc.exists:
+        return False
+
+    person_ref.update(
+        {
+            'speaker_embedding': firestore.DELETE_FIELD,
+            'updated_at': datetime.now(timezone.utc),
+        }
+    )
+    return True
+
+
+def update_person_speech_samples_version(uid: str, person_id: str, version: int) -> bool:
+    """
+    Update just the speech_samples_version field.
+
+    Args:
+        uid: User ID
+        person_id: Person ID
+        version: Version number to set
+
+    Returns:
+        True if updated successfully, False if person not found
+    """
+    person_ref = db.collection('users').document(uid).collection('people').document(person_id)
+    person_doc = person_ref.get()
+
+    if not person_doc.exists:
+        return False
+
+    person_ref.update(
+        {
+            'speech_samples_version': version,
+            'updated_at': datetime.now(timezone.utc),
+        }
+    )
+    return True
+
+
 def delete_user_data(uid: str):
     user_ref = db.collection('users').document(uid)
     if not user_ref.get().exists:

diff --git a/backend/models/other.py b/backend/models/other.py
@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import List
+from typing import List, Optional
 
 from pydantic import BaseModel, Field
 
@@ -24,3 +24,5 @@ class Person(BaseModel):
     created_at: datetime
     updated_at: datetime
     speech_samples: List[str] = []
+    speech_sample_transcripts: Optional[List[str]] = None
+    speech_samples_version: int = 1
diff --git a/backend/routers/users.py b/backend/routers/users.py
@@ -255,10 +255,10 @@ def get_all_people(include_speech_samples: bool = True, uid: str = Depends(auth.
     print('get_all_people', include_speech_samples)
     people = get_people(uid)
     if include_speech_samples:
-        # Convert stored GCS paths to signed URLs for each person
-        for person in people:
+        # Convert GCS paths to signed URLs for each person
+        for i, person in enumerate(people):
             stored_paths = person.get('speech_samples', [])
-            person['speech_samples'] = get_speech_sample_signed_urls(stored_paths)
+            people[i]['speech_samples'] = get_speech_sample_signed_urls(stored_paths)
     return people
 
 

diff --git a/backend/test.sh b/backend/test.sh
@@ -4,4 +4,10 @@ set -euo pipefail
 ROOT_DIR="$(cd "$(dirname "$0")" && pwd)"
 cd "$ROOT_DIR"
 
+export ENCRYPTION_SECRET="omi_ZwB2ZNqB2HHpMK6wStk7sTpavJiPTFg7gXUHnc4tFABPU6pZ2c2DKgehtfgi4RZv"
+
 pytest tests/unit/test_transcript_segment.py -v
+pytest tests/unit/test_text_similarity.py -v
+pytest tests/unit/test_speaker_sample.py -v
+pytest tests/unit/test_speaker_sample_migration.py -v
+pytest tests/unit/test_users_add_sample_transaction.py -v