diff --git a/srv/adapter/chat-completion.ts b/srv/adapter/chat-completion.ts index bc14af88a..6c2805645 100644 --- a/srv/adapter/chat-completion.ts +++ b/srv/adapter/chat-completion.ts @@ -166,6 +166,17 @@ export const streamCompletion: CompletionGenerator = async function* ( } } +/** + * OpenAI API enforces the following: + * "'Firstname Lastname' does not match '^[a-zA-Z0-9_-]{1,64}$' - 'messages.1.name'" + */ +function openAiName(inputName?: string): string | undefined { + return inputName + ?.replace(/ /g, '_') + .replace(/[^a-zA-Z0-9_-]/g, '') + .substring(0, 64) +} + /** * This function contains the inserts logic for Chat models (Turbo, GPT4...) * This logic also exists in other places: @@ -249,15 +260,24 @@ export async function toChatCompletionPayload( const line = all[i] - const obj: CompletionItem = { - role: 'assistant', - content: line.trim().replace(BOT_REPLACE, replyAs.name).replace(SELF_REPLACE, handle), - } - const isSystem = line.startsWith('System:') const isUser = line.startsWith(handle) const isBot = !isUser && !isSystem + const nameInLine: string | undefined = line.split(':')[0] + const speakerName = isBot ? nameInLine ?? replyAs.name : isUser ? handle : undefined + + const content = line + .trim() + .replace(BOT_REPLACE, replyAs.name) + .replace(SELF_REPLACE, handle) + .replace(speakerName ? `${speakerName}: ` : '', '') + + const obj: CompletionItem = { + role: 'assistant', + content, + name: openAiName(speakerName), + } const insert = inserts.get(distanceFromBottom) if (insert) history.push({ role: 'system', content: insert }) @@ -291,7 +311,8 @@ export async function toChatCompletionPayload( obj.role = 'user' } - const length = await encoder()(obj.content) + const nameCost = obj.name ? 1 + (await encoder()(obj.name)) : 0 + const length = nameCost + (await encoder()(obj.content)) if (tokens + length > maxBudget) { --i break @@ -339,12 +360,16 @@ export async function splitSampleChat(opts: SplitSampleChatProps) { ? 'user' : 'system' - const msg: CompletionItem = { - role: role, - content: sample.replace(BOT_REPLACE, char).replace(SELF_REPLACE, sender), - } + const speakerName = role === 'assistant' ? char : role === 'user' ? sender : undefined + + const content = sample + .replace(BOT_REPLACE, char) + .replace(SELF_REPLACE, sender) + .replace(speakerName ? `${speakerName}: ` : '', '') - const length = await encoder()(msg.content) + const msg: CompletionItem = { role, content, name: openAiName(speakerName) } + const nameCost = msg.name ? 1 + (await encoder()(msg.name)) : 0 + const length = nameCost + (await encoder()(msg.content)) if (budget && tokens + length > budget) break additions.push(msg)