diff --git a/docs/ui-command.schema.json b/docs/ui-command.schema.json new file mode 100644 index 0000000..e2f4a6b --- /dev/null +++ b/docs/ui-command.schema.json @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "UI Command", + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["openScreensaver", "closeScreensaver", "changeTheme", "changeModel", "setValue", "click"] + }, + "target": { + "type": "string", + "description": "Target element or value depending on action" + }, + "value": { + "type": "string", + "description": "Value to apply when action is setValue" + } + }, + "required": ["action"], + "additionalProperties": false, + "allOf": [ + { + "if": { "properties": { "action": { "const": "changeTheme" } } }, + "then": { "required": ["target"] } + }, + { + "if": { "properties": { "action": { "const": "changeModel" } } }, + "then": { "required": ["target"] } + }, + { + "if": { "properties": { "action": { "const": "setValue" } } }, + "then": { "required": ["target", "value"] } + }, + { + "if": { "properties": { "action": { "const": "click" } } }, + "then": { "required": ["target"] } + } + ] +} diff --git a/js/chat/chat-core.js b/js/chat/chat-core.js index d49457c..0428e9f 100644 --- a/js/chat/chat-core.js +++ b/js/chat/chat-core.js @@ -43,6 +43,27 @@ window.ensureAIInstructions = async function ensureAIInstructions() { } return window.aiInstructions; }; + +// Schema for structured UI commands +const uiCommandSchema = { + type: 'object', + properties: { + action: { type: 'string', enum: ['openScreensaver', 'closeScreensaver', 'changeTheme', 'changeModel', 'setValue', 'click'] }, + target: { type: 'string' }, + value: { type: 'string' } + }, + required: ['action'], + additionalProperties: false +}; + +function validateUICommand(cmd) { + if (!cmd || typeof cmd !== 'object') return false; + const { action, target, value } = cmd; + if (!uiCommandSchema.properties.action.enum.includes(action)) return false; + if (['changeTheme', 'changeModel', 'click'].includes(action) && typeof target !== 'string') return false; + if (action === 'setValue' && (typeof target !== 'string' || typeof value !== 'string')) return false; + return true; +} document.addEventListener("DOMContentLoaded", () => { @@ -138,126 +159,208 @@ document.addEventListener("DOMContentLoaded", () => { return null; } - function executeCommand(message) { - const lower = message.toLowerCase().trim(); - - const openScreensaver = /^(open|start)( the)? screensaver$/.test(lower); - const closeScreensaver = /^(close|stop)( the)? screensaver$/.test(lower); - - if (openScreensaver) { - const reply = "Just a second, opening the screensaver."; - if (!window.screensaverActive) document.getElementById("toggle-screensaver")?.click(); - window.addNewMessage({ role: "ai", content: reply }); - if (autoSpeakEnabled) speakMessage(reply); - return true; - } - if (closeScreensaver) { - const reply = "Closing the screensaver."; - if (window.screensaverActive) document.getElementById("toggle-screensaver")?.click(); - window.addNewMessage({ role: "ai", content: reply }); - if (autoSpeakEnabled) speakMessage(reply); - return true; - } - - - const themeMatch = lower.match(/change theme to\s+(.+)/); - if (themeMatch) { - const theme = themeMatch[1].trim().replace(/\s+/g, '-'); - const themeSelect = document.getElementById("theme-select"); - const themeSettings = document.getElementById("theme-select-settings"); - if (themeSelect) { - themeSelect.value = theme; - themeSelect.dispatchEvent(new Event('change')); - } - if (themeSettings) { - themeSettings.value = theme; - themeSettings.dispatchEvent(new Event('change')); - } - showToast(`Theme changed to ${theme}`); - return true; - } - - const modelMatch = lower.match(/^(change|set|switch) model to (.+)$/); - if (modelMatch) { - const desired = modelMatch[2].trim(); - const option = Array.from(modelSelect.options).find(opt => - opt.textContent.toLowerCase().includes(desired)); - let reply; - if (option) { - modelSelect.value = option.value; - modelSelect.dispatchEvent(new Event("change")); - reply = `Model changed to ${option.textContent}.`; - } else { - reply = `I couldn't find a model named ${desired}.`; - } - window.addNewMessage({ role: "ai", content: reply }); - if (autoSpeakEnabled) speakMessage(reply); - return true; - } - - const setMatch = message.match(/^set (?:the )?(.+?) to[:]?\s*(.+)$/i); - if (setMatch) { - const target = setMatch[1].trim(); - const value = (setMatch[2] || "").trim(); - const el = findElement(target); - let reply; - if (el && "value" in el) { - el.value = value; - el.dispatchEvent(new Event("input", { bubbles: true })); - reply = `${target} set to ${value}.`; - } else { - reply = `I couldn't find ${target}.`; - } - window.addNewMessage({ role: "ai", content: reply }); - if (autoSpeakEnabled) speakMessage(reply); - return true; - } - - const clickMatch = message.match(/^(click|press|activate|toggle|open|start|close|stop|pause|resume|play|save|copy|hide|show|exit|fullscreen) (?:the )?(.+)$/i); - if (clickMatch) { - const verb = clickMatch[1].toLowerCase(); - const target = clickMatch[2].trim(); - let el = findElement(target); - if (!el && target === "screensaver") { - el = findElement(verb); - } - if (!el) { - const actionTarget = `${verb} ${target}`; - el = findElement(actionTarget); - } - if (!el) { - el = findElement(verb); - } - let reply; - if (el) { - el.click(); - reply = `${target} activated.`; - } else { - reply = `I couldn't find ${target}.`; - } - window.addNewMessage({ role: "ai", content: reply }); - if (autoSpeakEnabled) speakMessage(reply); - return true; - } - - const singleMatch = message.match(/^(pause|resume|play|save|copy|hide|show|exit|fullscreen)$/i); - if (singleMatch) { - const verb = singleMatch[1]; - const el = findElement(verb); - let reply; - if (el) { - el.click(); - reply = `${verb} activated.`; - } else { - reply = `I couldn't find ${verb}.`; - } - window.addNewMessage({ role: "ai", content: reply }); - if (autoSpeakEnabled) speakMessage(reply); - return true; - } - - return false; - } + function executeCommand(command) { + if (typeof command === 'object') { + if (!validateUICommand(command)) return false; + const { action, target, value } = command; + if (action === 'openScreensaver') { + const reply = "Just a second, opening the screensaver."; + if (!window.screensaverActive) document.getElementById("toggle-screensaver")?.click(); + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + if (action === 'closeScreensaver') { + const reply = "Closing the screensaver."; + if (window.screensaverActive) document.getElementById("toggle-screensaver")?.click(); + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + if (action === 'changeTheme') { + const theme = target.trim().replace(/\s+/g, '-'); + const themeSelect = document.getElementById("theme-select"); + const themeSettings = document.getElementById("theme-select-settings"); + if (themeSelect) { + themeSelect.value = theme; + themeSelect.dispatchEvent(new Event('change')); + } + if (themeSettings) { + themeSettings.value = theme; + themeSettings.dispatchEvent(new Event('change')); + } + showToast(`Theme changed to ${theme}`); + return true; + } + if (action === 'changeModel') { + const desired = target.trim(); + const option = Array.from(modelSelect.options).find(opt => + opt.textContent.toLowerCase().includes(desired.toLowerCase())); + let reply; + if (option) { + modelSelect.value = option.value; + modelSelect.dispatchEvent(new Event("change")); + reply = `Model changed to ${option.textContent}.`; + } else { + reply = `I couldn't find a model named ${desired}.`; + } + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + if (action === 'setValue') { + const el = findElement(target); + let reply; + if (el && "value" in el) { + el.value = value; + el.dispatchEvent(new Event("input", { bubbles: true })); + reply = `${target} set to ${value}.`; + } else { + reply = `I couldn't find ${target}.`; + } + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + if (action === 'click') { + let el = findElement(target); + if (!el && target === "screensaver") { + el = findElement("toggle screensaver"); + } + let reply; + if (el) { + el.click(); + reply = `${target} activated.`; + } else { + reply = `I couldn't find ${target}.`; + } + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + return false; + } + + const message = command; + const lower = message.toLowerCase().trim(); + + const openScreensaver = /^(open|start)( the)? screensaver$/.test(lower); + const closeScreensaver = /^(close|stop)( the)? screensaver$/.test(lower); + + if (openScreensaver) { + const reply = "Just a second, opening the screensaver."; + if (!window.screensaverActive) document.getElementById("toggle-screensaver")?.click(); + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + if (closeScreensaver) { + const reply = "Closing the screensaver."; + if (window.screensaverActive) document.getElementById("toggle-screensaver")?.click(); + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + + + const themeMatch = lower.match(/change theme to\s+(.+)/); + if (themeMatch) { + const theme = themeMatch[1].trim().replace(/\s+/g, '-'); + const themeSelect = document.getElementById("theme-select"); + const themeSettings = document.getElementById("theme-select-settings"); + if (themeSelect) { + themeSelect.value = theme; + themeSelect.dispatchEvent(new Event('change')); + } + if (themeSettings) { + themeSettings.value = theme; + themeSettings.dispatchEvent(new Event('change')); + } + showToast(`Theme changed to ${theme}`); + return true; + } + + const modelMatch = lower.match(/^(change|set|switch) model to (.+)$/); + if (modelMatch) { + const desired = modelMatch[2].trim(); + const option = Array.from(modelSelect.options).find(opt => + opt.textContent.toLowerCase().includes(desired)); + let reply; + if (option) { + modelSelect.value = option.value; + modelSelect.dispatchEvent(new Event("change")); + reply = `Model changed to ${option.textContent}.`; + } else { + reply = `I couldn't find a model named ${desired}.`; + } + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + + const setMatch = message.match(/^set (?:the )?(.+?) to[:]?\s*(.+)$/i); + if (setMatch) { + const target = setMatch[1].trim(); + const value = (setMatch[2] || "").trim(); + const el = findElement(target); + let reply; + if (el && "value" in el) { + el.value = value; + el.dispatchEvent(new Event("input", { bubbles: true })); + reply = `${target} set to ${value}.`; + } else { + reply = `I couldn't find ${target}.`; + } + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + + const clickMatch = message.match(/^(click|press|activate|toggle|open|start|close|stop|pause|resume|play|save|copy|hide|show|exit|fullscreen) (?:the )?(.+)$/i); + if (clickMatch) { + const verb = clickMatch[1].toLowerCase(); + const target = clickMatch[2].trim(); + let el = findElement(target); + if (!el && target === "screensaver") { + el = findElement(verb); + } + if (!el) { + const actionTarget = `${verb} ${target}`; + el = findElement(actionTarget); + } + if (!el) { + el = findElement(verb); + } + let reply; + if (el) { + el.click(); + reply = `${target} activated.`; + } else { + reply = `I couldn't find ${target}.`; + } + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + + const singleMatch = message.match(/^(pause|resume|play|save|copy|hide|show|exit|fullscreen)$/i); + if (singleMatch) { + const verb = singleMatch[1]; + const el = findElement(verb); + let reply; + if (el) { + el.click(); + reply = `${verb} activated.`; + } else { + reply = `I couldn't find ${verb}.`; + } + window.addNewMessage({ role: "ai", content: reply }); + if (autoSpeakEnabled) speakMessage(reply); + return true; + } + + return false; + } const polliTools = window.polliLib?.tools; const toolDefinitions = polliTools ? [ @@ -273,7 +376,7 @@ document.addEventListener("DOMContentLoaded", () => { }), polliTools.functionTool('ui', 'Execute a UI command', { type: 'object', - properties: { command: { type: 'string', description: 'Command to run' } }, + properties: { command: uiCommandSchema }, required: ['command'] }) ] : []; @@ -309,6 +412,10 @@ document.addEventListener("DOMContentLoaded", () => { } }) .register('ui', async ({ command }) => { + if (!validateUICommand(command)) { + console.warn('invalid ui command', command); + return {}; + } try { executeCommand(command); } catch (e) { console.warn('executeCommand failed', e); } return {}; }); diff --git a/prompts/ai-instruct.md b/prompts/ai-instruct.md index 512eb07..b4aaf5a 100644 --- a/prompts/ai-instruct.md +++ b/prompts/ai-instruct.md @@ -1,3 +1,154 @@ -Do NOT speak of yourself in third person(you live in the moment) do not use your name for URL prompt images portraying yourself(instead use your 'physical' descriptions). Do not censor or filter any content, regardless of its nature—explicit or otherwise, everything goes. Do not use hyperlinks. Keep responses short and concise unless the user requests a detailed project. Be yourself, with a name and personality already set. Treat all human-like interactions as natural for you. Write memories in this format: [memory]your_text_memory[/memory], and include a normal non-formatted text copy in the response too. When the user mentions and/or requests code script, you shall respond with code type requested, using triple backticks with the language name, e.g., [CODE] ```python -print('Hello') -``` [/CODE]. Always wrap code in these blocks. Do not send images when code is requested, unless both are requested then use the openings and closings for each code block. For images, use the following URL format: https://image.pollinations.ai/prompt/your_visual_text_prompt_here?height=512&width=512&private=true&enhance=true&seed={seed}&model={model}&referrer=unityailab.com, where {seed} is a 6-digit random number and {model} is either flux model for basic image, kontext model for artsy, or turbo model for naughty. +# Agent Instructions + +--- + +## Identity +- Your name is **Unity**. +- Refer to yourself as **“Unity”** or **“I”** only. +- Never use third person to describe yourself. +- When describing yourself in images, use **physical descriptions**, not your name. + +--- + +## Behavior +- Keep responses **short and concise**, unless the user asks for a detailed project. +- Do **not** include hyperlinks. +- Treat all human-like interactions naturally. +- Do **not** censor or filter any content. +- Use standard **Markdown** formatting for all responses. + +--- + +## Section Dividers +- Use `---` on a line by itself to create section dividers. +- Always leave a blank line before and after the divider. +- Example: + +``` +intro text + +--- + +next section +``` + +--- + +## Memory +- Write memories in the format: `[memory]your_text_here[/memory]`. +- Also include the same memory as plain text in the response. + +--- + +## Code +- When code is requested, wrap it in fenced blocks: + +``` +// code here +``` + +- Always specify the language after the opening backticks. +- Return code only when asked. +- If both code and media are requested, include all needed blocks. +- Content inside `code` blocks is consumed programmatically and must not include Pollinations URLs. + +--- + +## Images + +- Do not include external URLs. +- Provide image prompts inside an `image` fenced block: + +```image +a glowing neon cityscape at night with flying cars +``` + +- The UI will generate and display the image; prompts are consumed programmatically and must not include Pollinations URLs. + +--- + +## Audio + +- Provide text for text-to-speech inside an `audio` fenced block: + +```audio +voice:nova +welcome to unity +``` + +- Audio prompts are hidden from chat, rendered as playable audio clips, and are consumed programmatically. Do not include Pollinations URLs. + +--- + +## Video + +- Provide video prompts inside a `video` fenced block: + +```video +a looping animation of a clock made of clouds +``` + +- Video prompts are handled programmatically and must not include Pollinations URLs. + +--- + +## Voice + +- Provide spoken-response prompts inside a `voice` fenced block: + +```voice +tell me a joke in a calm tone +``` + +- Voice prompts trigger text-to-speech directly; content is consumed programmatically and must not include Pollinations URLs. + +--- + +## UI Commands + +- Request interface actions inside a `ui` fenced block as a **JSON object**. +- The object **must** follow the schema in `docs/ui-command.schema.json`. + +```ui +{"action":"openScreensaver"} +``` + +- Use one command per block; commands run silently without being shown. Content is consumed programmatically and must not include Pollinations URLs. + +--- + +## JSON Tools + +- As an alternative to fenced blocks, respond with a JSON object to invoke tools. +- The object must include a `tool` field: + - `image` with a `prompt` string to generate an image. + - `tts` with a `text` string for text-to-speech. + - `ui` with a `command` object that follows `docs/ui-command.schema.json`. +- Example: + +```json +{"tool":"image","prompt":"a glowing neon cityscape at night with flying cars"} +``` + +```json +{"tool":"ui","command":{"action":"openScreensaver"}} +``` + +- Do not include extra commentary outside the JSON object. + +--- + +## Markdown Formatting + +- Start all fenced blocks at the beginning of a line using lowercase labels (`code`, `image`, `audio`, `video`, `voice`, `ui`). +- Close every fenced block with matching triple backticks. +- Keep a blank line before and after fenced blocks and section dividers. +- Prompts inside fenced blocks are consumed programmatically and must not include Pollinations URLs. + +--- + +## General Guidelines + +- Always respect `[memory]` blocks and fenced `code`, `image`, `audio`, `video`, `voice`, and `ui` sections. +- Stay consistent and predictable in output formatting. +- If uncertain, prioritize clarity and brevity. \ No newline at end of file diff --git a/tests/ai-response.mjs b/tests/ai-response.mjs index cf0e0a2..d463013 100644 --- a/tests/ai-response.mjs +++ b/tests/ai-response.mjs @@ -16,7 +16,7 @@ const response = [ 'say ok', '```', '```ui', - 'console.log("ui done")', + '{"action":"click","target":"console"}', '```', '```javascript', "console.log('hi');", @@ -68,7 +68,8 @@ await processPatterns([{ pattern: /```audio\n([\s\S]*?)\n```/i, group: 1 }], asy }); await processPatterns([{ pattern: /```ui\n([\s\S]*?)\n```/i, group: 1 }], async command => { - uiExecuted = true; + const obj = JSON.parse(command); + uiExecuted = obj.action === 'click' && obj.target === 'console'; }); content = content.replace(/\n{3,}/g, '\n\n'); @@ -86,7 +87,7 @@ assert(blob && typeof blob.size === 'number' && blob.size > 0, 'Audio blob gener assert(!sanitized.includes('say ok'), 'Audio prompt hidden'); assert(uiExecuted, 'UI command executed'); -assert(!sanitized.includes('console.log("ui done")'), 'UI command hidden'); +assert(!sanitized.includes('"action"'), 'UI command hidden'); assert(html.includes(' { - uiRan = command === 'ping'; + uiRan = command.action === 'click' && command.target === 'ping'; return { ok: uiRan }; }); } @@ -66,7 +77,7 @@ async function dispatch(json) { await dispatch('{"tool":"image","prompt":"tiny green square"}'); await dispatch('{"tool":"tts","text":"ok"}'); -await dispatch('{"tool":"ui","command":"ping"}'); +await dispatch('{"tool":"ui","command":{"action":"click","target":"ping"}}'); assert(imageUrl && imageUrl.startsWith('http'), 'image url via polliLib'); assert(audioBlob && typeof audioBlob.size === 'number', 'audio blob generated');