diff --git a/db/scripts/create_seed_from_prod_clone.sh b/db/scripts/create_seed_from_prod_clone.sh index 3a291310..b5dd86f5 100644 --- a/db/scripts/create_seed_from_prod_clone.sh +++ b/db/scripts/create_seed_from_prod_clone.sh @@ -136,6 +136,10 @@ echo "==> [5/9] Copying translation data for target languages..." psql -U postgres -d "$STAGING_DB" < { const { language: spaLanguage } = await languageFactory.build({ members: [], }); + const llmImportModel = await getDb() + .selectFrom("machine_gloss_model") + .where("code", "=", "llm_import") + .select("id") + .executeTakeFirstOrThrow(); const existingGloss = { word_id: "0100100101", language_id: engLanguage.id, + model_id: llmImportModel.id, gloss: "Gloss in another language", }; @@ -85,6 +91,7 @@ describe("updateAllForLanguage", () => { { word_id: "0100100101", language_id: spaLanguage.id, + model_id: llmImportModel.id, gloss: "Existing gloss to be removed", }, ]) @@ -107,6 +114,7 @@ describe("updateAllForLanguage", () => { await machineGlossRepository.updateAllForLanguage({ languageId: spaLanguage.id, + modelCode: "llm_import", stream: Readable.from(newGlosses), }); @@ -122,6 +130,7 @@ describe("updateAllForLanguage", () => { word_id: g.wordId, gloss: g.gloss, language_id: spaLanguage.id, + model_id: llmImportModel.id, })), ]); }); diff --git a/src/modules/translation/data-access/machineGlossRepository.ts b/src/modules/translation/data-access/machineGlossRepository.ts index 0652ea17..9bd88b86 100644 --- a/src/modules/translation/data-access/machineGlossRepository.ts +++ b/src/modules/translation/data-access/machineGlossRepository.ts @@ -9,11 +9,19 @@ interface StreamedMachineGloss { export const machineGlossRepository = { async updateAllForLanguage({ languageId, + modelCode, stream, }: { languageId: string; + modelCode: string; stream: Readable; }): Promise { + const model = await getDb() + .selectFrom("machine_gloss_model") + .select("id") + .where("code", "=", modelCode) + .executeTakeFirstOrThrow(); + await getDb() .deleteFrom("machine_gloss") .where("language_id", "=", languageId) @@ -25,6 +33,7 @@ export const machineGlossRepository = { fields: { word_id: (record) => record.wordId, language_id: () => languageId, + model_id: () => model.id.toString(), gloss: (record) => record.gloss, }, }); diff --git a/src/modules/translation/db/migrations/machine-gloss-model.data.sql b/src/modules/translation/db/migrations/machine-gloss-model.data.sql new file mode 100644 index 00000000..9cd9d874 --- /dev/null +++ b/src/modules/translation/db/migrations/machine-gloss-model.data.sql @@ -0,0 +1,17 @@ +begin; + +insert into machine_gloss_model (code) +values ('google'), ('llm_import') +on conflict (code) do nothing; + +update machine_gloss mg +set model_id = ( + select mgm.id + from machine_gloss_model mgm + where mgm.code = case when l.code = 'hin' then 'llm_import' else 'google' end +) +from language l +where mg.language_id = l.id + and mg.model_id is null; + +commit; diff --git a/src/modules/translation/db/migrations/machine-gloss-model.schema.sql b/src/modules/translation/db/migrations/machine-gloss-model.schema.sql new file mode 100644 index 00000000..73fe0e46 --- /dev/null +++ b/src/modules/translation/db/migrations/machine-gloss-model.schema.sql @@ -0,0 +1,16 @@ +begin; + +create table machine_gloss_model ( + id serial primary key, + code text not null unique +); + +alter table machine_gloss + add column model_id int references machine_gloss_model (id); + +drop index if exists idx_machine_gloss_language_word; + +create unique index idx_machine_gloss_language_word + on machine_gloss (language_id, word_id, model_id); + +commit; diff --git a/src/modules/translation/db/schema.ts b/src/modules/translation/db/schema.ts index 53d4e2f2..011ea590 100644 --- a/src/modules/translation/db/schema.ts +++ b/src/modules/translation/db/schema.ts @@ -56,9 +56,15 @@ export interface MachineGlossTable { id: Generated; word_id: string; language_id: string; + model_id: number | null; gloss: string; } +export interface MachineGlossModelTable { + id: Generated; + code: string; +} + export interface GlossEventTable { id: string; phrase_id: number; diff --git a/src/modules/translation/jobs/importAIGlosses.ts b/src/modules/translation/jobs/importAIGlosses.ts index 4f22c757..9c80bd2b 100644 --- a/src/modules/translation/jobs/importAIGlosses.ts +++ b/src/modules/translation/jobs/importAIGlosses.ts @@ -42,6 +42,7 @@ export async function importAIGlosses(job: ImportAIGlossesJob) { ); await machineGlossRepository.updateAllForLanguage({ languageId: language.id, + modelCode: "llm_import", stream: Readable.from(requestStream), });