Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
4726f65
extract text_similarity to utils/text_utils.py for testability
beastoin Jan 19, 2026
eb6fc13
docs: add PRD and progress checklist for speech sample transcripts
beastoin Jan 21, 2026
a43d4d4
feat: add centralized speaker sample migration utility
beastoin Jan 21, 2026
cd6a770
docs: mark task 1 complete in progress checklist
beastoin Jan 21, 2026
9f4d3c2
feat: add speech_sample_transcripts and speech_samples_version to Per…
beastoin Jan 21, 2026
bae97ee
feat: add database functions for speech sample transcripts
beastoin Jan 21, 2026
983aa89
feat: refactor speaker identification to use centralized verification
beastoin Jan 21, 2026
32dc783
feat: add lazy migration to people API endpoints
beastoin Jan 21, 2026
380d4dc
feat: add JSON parsing for speech sample transcripts in Flutter Perso…
beastoin Jan 21, 2026
750786b
feat: display speech sample transcripts in People settings UI
beastoin Jan 21, 2026
0d800ae
test: verify backend tests pass (49 tests)
beastoin Jan 21, 2026
3d1b4aa
test: verify Flutter tests (pre-existing environment issue)
beastoin Jan 21, 2026
3f3f772
fix: address PR review - move migration to extraction, add locks, use…
beastoin Jan 22, 2026
4142816
fix: run migration before sample count check in extract_speaker_samples
beastoin Jan 22, 2026
7f18879
refactor: split speaker_sample_migration into two modules
beastoin Jan 22, 2026
eb57491
refactor: move GCS helpers to storage.py, add speech-profile wrappers
beastoin Jan 22, 2026
09830b4
fix: prevent migration from deleting samples on transient failures
beastoin Jan 22, 2026
fdc372d
fix: defer blob deletions and use empty strings for transcript padding
beastoin Jan 22, 2026
066c672
test: add unit tests for speaker sample verification and migration
beastoin Jan 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions app/lib/backend/schema/person.dart
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class Person {
final DateTime createdAt;
final DateTime updatedAt;
final List<String>? speechSamples;
final List<String>? speechSampleTranscripts;
final int speechSamplesVersion;
final int? colorIdx;

Person({
Expand All @@ -39,6 +41,8 @@ class Person {
required this.createdAt,
required this.updatedAt,
this.speechSamples,
this.speechSampleTranscripts,
this.speechSamplesVersion = 1,
this.colorIdx,
});

Expand All @@ -49,6 +53,10 @@ class Person {
createdAt: DateTime.parse(json['created_at']).toLocal(),
updatedAt: DateTime.parse(json['updated_at']).toLocal(),
speechSamples: json['speech_samples'] != null ? List<String>.from(json['speech_samples']) : [],
speechSampleTranscripts: json['speech_sample_transcripts'] != null
? List<String>.from(json['speech_sample_transcripts'])
: null,
speechSamplesVersion: json['speech_samples_version'] ?? 1,
colorIdx: json['color_idx'] ?? json['id'].hashCode % speakerColors.length,
);
}
Expand All @@ -60,6 +68,8 @@ class Person {
'created_at': createdAt.toUtc().toIso8601String(),
'updated_at': updatedAt.toUtc().toIso8601String(),
'speech_samples': speechSamples ?? [],
'speech_sample_transcripts': speechSampleTranscripts,
'speech_samples_version': speechSamplesVersion,
'color_idx': colorIdx,
};
}
Expand Down
23 changes: 22 additions & 1 deletion app/lib/pages/settings/people.dart
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,28 @@ class _UserPeoplePageState extends State<_UserPeoplePage> {
title:
Text(j == 0 ? context.l10n.speechProfile : context.l10n.sampleNumber(j)),
onTap: () => _confirmDeleteSample(index, person, j, provider),
subtitle: Text(context.l10n.tapToDelete),
subtitle: Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: [
if (person.speechSampleTranscripts != null &&
j < person.speechSampleTranscripts!.length &&
person.speechSampleTranscripts![j].isNotEmpty)
Padding(
padding: const EdgeInsets.only(bottom: 4),
child: Text(
'"${person.speechSampleTranscripts![j]}"',
style: const TextStyle(
fontSize: 14,
fontStyle: FontStyle.italic,
),
),
),
Text(
context.l10n.tapToDelete,
style: const TextStyle(fontSize: 12, color: Colors.grey),
),
],
),
)),
],
),
Expand Down
230 changes: 207 additions & 23 deletions backend/database/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,40 +100,65 @@ def delete_person(uid: str, person_id: str):
person_ref.delete()


def add_person_speech_sample(uid: str, person_id: str, sample_path: str, max_samples: int = 5) -> bool:
@transactional
def _add_sample_transaction(transaction, person_ref, sample_path, transcript, max_samples):
"""Transaction to atomically add sample and transcript."""
snapshot = person_ref.get(transaction=transaction)
if not snapshot.exists:
return False

person_data = snapshot.to_dict()
samples = person_data.get('speech_samples', [])

if len(samples) >= max_samples:
return False

samples.append(sample_path)
update_data = {
'speech_samples': samples,
'updated_at': datetime.now(timezone.utc),
}

if transcript is not None:
transcripts = person_data.get('speech_sample_transcripts', [])
# Ensure transcript array alignment with samples:
# If we're adding a transcript but existing samples don't have transcripts,
# pad with empty strings for the existing samples first (Dart expects non-null)
existing_sample_count = len(samples) - 1 # samples already has new one appended
if len(transcripts) < existing_sample_count:
# Pad with empty strings for each existing sample without a transcript
transcripts.extend([''] * (existing_sample_count - len(transcripts)))
transcripts.append(transcript)
update_data['speech_sample_transcripts'] = transcripts
update_data['speech_samples_version'] = 2

transaction.update(person_ref, update_data)
return True


def add_person_speech_sample(
uid: str, person_id: str, sample_path: str, transcript: Optional[str] = None, max_samples: int = 5
) -> bool:
"""
Append speech sample path to person's speech_samples list.
Limits to max_samples to prevent unlimited growth.

Uses Firestore transaction to ensure atomic read-modify-write,
preventing array drift from concurrent updates.

Args:
uid: User ID
person_id: Person ID
sample_path: GCS path to the speech sample
transcript: Optional transcript text for the sample
max_samples: Maximum number of samples to keep (default 5)

Returns:
True if sample was added, False if limit reached
True if sample was added, False if limit reached or person not found
"""
person_ref = db.collection('users').document(uid).collection('people').document(person_id)
person_doc = person_ref.get()

if not person_doc.exists:
return False

person_data = person_doc.to_dict()
current_samples = person_data.get('speech_samples', [])

# Check if we've hit the limit
if len(current_samples) >= max_samples:
return False

person_ref.update(
{
'speech_samples': firestore.ArrayUnion([sample_path]),
'updated_at': datetime.now(timezone.utc),
}
)
return True
transaction = db.transaction()
return _add_sample_transaction(transaction, person_ref, sample_path, transcript, max_samples)


def get_person_speech_samples_count(uid: str, person_id: str) -> int:
Expand All @@ -151,24 +176,41 @@ def get_person_speech_samples_count(uid: str, person_id: str) -> int:
def remove_person_speech_sample(uid: str, person_id: str, sample_path: str) -> bool:
"""
Remove a speech sample path from person's speech_samples list.
Also removes the corresponding transcript at the same index to keep arrays in sync.

Args:
uid: User ID
person_id: Person ID
sample_path: GCS path to remove

Returns:
True if removed, False if person not found
True if removed, False if person or sample not found
"""
person_ref = db.collection('users').document(uid).collection('people').document(person_id)
person_doc = person_ref.get()

if not person_doc.exists:
return False

person_data = person_doc.to_dict()
samples = person_data.get('speech_samples', [])
transcripts = person_data.get('speech_sample_transcripts', [])

# Find index of sample to remove
try:
idx = samples.index(sample_path)
except ValueError:
return False # Sample not found

# Remove from both arrays by index
samples.pop(idx)
if idx < len(transcripts):
transcripts.pop(idx)

person_ref.update(
{
'speech_samples': firestore.ArrayRemove([sample_path]),
'speech_samples': samples,
'speech_sample_transcripts': transcripts,
'updated_at': datetime.now(timezone.utc),
}
)
Expand Down Expand Up @@ -223,6 +265,148 @@ def get_person_speaker_embedding(uid: str, person_id: str) -> Optional[list]:
return person_data.get('speaker_embedding')


def set_person_speech_sample_transcript(uid: str, person_id: str, sample_index: int, transcript: str) -> bool:
"""
Update transcript at a specific index in the speech_sample_transcripts array.

Args:
uid: User ID
person_id: Person ID
sample_index: Index of the sample/transcript to update
transcript: The transcript text to set

Returns:
True if updated successfully, False if person not found or index out of bounds
"""
person_ref = db.collection('users').document(uid).collection('people').document(person_id)
person_doc = person_ref.get()

if not person_doc.exists:
return False

person_data = person_doc.to_dict()
samples = person_data.get('speech_samples', [])
transcripts = person_data.get('speech_sample_transcripts', [])

# Validate index
if sample_index < 0 or sample_index >= len(samples):
return False

# Extend transcripts array if needed
while len(transcripts) < len(samples):
transcripts.append('')

transcripts[sample_index] = transcript

person_ref.update(
{
'speech_sample_transcripts': transcripts,
'updated_at': datetime.now(timezone.utc),
}
)
return True


def update_person_speech_samples_after_migration(
uid: str,
person_id: str,
samples: list,
transcripts: list,
version: int,
speaker_embedding: Optional[list] = None,
) -> bool:
"""
Replace all samples/transcripts/embedding and set version atomically.
Used after v1 to v2 migration to update all related fields together.

Args:
uid: User ID
person_id: Person ID
samples: List of sample paths (may have dropped invalid samples)
transcripts: List of transcript strings (parallel array with samples)
version: Version number to set (typically 2)
speaker_embedding: Optional new speaker embedding, or None to clear

Returns:
True if updated successfully, False if person not found
"""
person_ref = db.collection('users').document(uid).collection('people').document(person_id)
person_doc = person_ref.get()

if not person_doc.exists:
return False

update_data = {
'speech_samples': samples,
'speech_sample_transcripts': transcripts,
'speech_samples_version': version,
'updated_at': datetime.now(timezone.utc),
}

# Set or clear speaker embedding
if speaker_embedding is not None:
update_data['speaker_embedding'] = speaker_embedding
else:
update_data['speaker_embedding'] = firestore.DELETE_FIELD

person_ref.update(update_data)
return True


def clear_person_speaker_embedding(uid: str, person_id: str) -> bool:
"""
Clear speaker embedding for a person.
Used when all samples are dropped during migration.

Args:
uid: User ID
person_id: Person ID

Returns:
True if cleared successfully, False if person not found
"""
person_ref = db.collection('users').document(uid).collection('people').document(person_id)
person_doc = person_ref.get()

if not person_doc.exists:
return False

person_ref.update(
{
'speaker_embedding': firestore.DELETE_FIELD,
'updated_at': datetime.now(timezone.utc),
}
)
return True


def update_person_speech_samples_version(uid: str, person_id: str, version: int) -> bool:
"""
Update just the speech_samples_version field.

Args:
uid: User ID
person_id: Person ID
version: Version number to set

Returns:
True if updated successfully, False if person not found
"""
person_ref = db.collection('users').document(uid).collection('people').document(person_id)
person_doc = person_ref.get()

if not person_doc.exists:
return False

person_ref.update(
{
'speech_samples_version': version,
'updated_at': datetime.now(timezone.utc),
}
)
return True


def delete_user_data(uid: str):
user_ref = db.collection('users').document(uid)
if not user_ref.get().exists:
Expand Down
4 changes: 3 additions & 1 deletion backend/models/other.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from typing import List
from typing import List, Optional

from pydantic import BaseModel, Field

Expand All @@ -24,3 +24,5 @@ class Person(BaseModel):
created_at: datetime
updated_at: datetime
speech_samples: List[str] = []
speech_sample_transcripts: Optional[List[str]] = None
speech_samples_version: int = 1
6 changes: 3 additions & 3 deletions backend/routers/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,10 @@ def get_all_people(include_speech_samples: bool = True, uid: str = Depends(auth.
print('get_all_people', include_speech_samples)
people = get_people(uid)
if include_speech_samples:
# Convert stored GCS paths to signed URLs for each person
for person in people:
# Convert GCS paths to signed URLs for each person
for i, person in enumerate(people):
stored_paths = person.get('speech_samples', [])
person['speech_samples'] = get_speech_sample_signed_urls(stored_paths)
people[i]['speech_samples'] = get_speech_sample_signed_urls(stored_paths)
return people


Expand Down
6 changes: 6 additions & 0 deletions backend/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,10 @@ set -euo pipefail
ROOT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$ROOT_DIR"

export ENCRYPTION_SECRET="omi_ZwB2ZNqB2HHpMK6wStk7sTpavJiPTFg7gXUHnc4tFABPU6pZ2c2DKgehtfgi4RZv"

pytest tests/unit/test_transcript_segment.py -v
pytest tests/unit/test_text_similarity.py -v
pytest tests/unit/test_speaker_sample.py -v
pytest tests/unit/test_speaker_sample_migration.py -v
pytest tests/unit/test_users_add_sample_transaction.py -v
Loading