From 0c02d58cc8336c1af11477072ad604ad64cd34ee Mon Sep 17 00:00:00 2001 From: Henry Chan Date: Sat, 28 Mar 2026 15:29:41 +1000 Subject: [PATCH 1/2] Implement structured meta chat and fix Chrome responsiveness --- .env.example | 7 +- .gitignore | 1 + PRD.md | 329 + afk-ralph.sh | 35 + app/api/chat/events/route.test.ts | 105 + app/api/chat/events/route.ts | 98 + app/api/chat/feedback/route.test.ts | 117 + app/api/chat/feedback/route.ts | 108 + app/api/chat/route.test.ts | 164 + app/api/chat/route.ts | 343 +- app/chat/[threadId]/page.tsx | 22 + app/chat/_components/chat-input.tsx | 11 +- .../_components/chat-message-feedback.tsx | 99 + .../_components/chat-message-list.test.tsx | 126 + app/chat/_components/chat-message-list.tsx | 93 +- app/chat/_components/chat-message-meta.tsx | 167 + app/chat/_components/chat-message.test.tsx | 212 + app/chat/_components/chat-message.tsx | 27 +- app/chat/_components/chat-page-client.tsx | 120 + app/chat/page.tsx | 39 +- app/page.tsx | 173 +- docs/CORE_PRODUCT_ROADMAP.md | 114 +- docs/agents/architecture.md | 9 +- e2e/chat.spec.ts | 190 + e2e/landing-page.spec.ts | 52 +- lib/chat/constants.ts | 3 + lib/chat/contracts.test.ts | 73 + lib/chat/contracts.ts | 110 + lib/chat/events.test.ts | 85 + lib/chat/events.ts | 185 + lib/chat/feedback.ts | 55 + lib/chat/history.test.ts | 47 + lib/chat/history.ts | 45 + lib/chat/message-metadata.ts | 28 + lib/chat/messages.test.ts | 44 + lib/chat/messages.ts | 37 + lib/chat/session.test.ts | 48 + lib/chat/session.ts | 34 + lib/chat/storage.ts | 6 + lib/chat/track-event.ts | 12 + mastra/agents.ts | 100 + mastra/config.ts | 73 + mastra/evals/meta-chat.dataset.ts | 30 + mastra/index.ts | 25 + mastra/skills/registry.test.ts | 34 + mastra/skills/registry.ts | 105 + mastra/workflows/meta-chat-workflow.test.ts | 101 + mastra/workflows/meta-chat-workflow.ts | 319 + next.config.js | 2 +- package-lock.json | 6025 +++++++++++++++-- package.json | 9 +- progress.md | 23 + tsconfig.json | 2 +- 53 files changed, 9570 insertions(+), 851 deletions(-) create mode 100644 PRD.md create mode 100755 afk-ralph.sh create mode 100644 app/api/chat/events/route.test.ts create mode 100644 app/api/chat/events/route.ts create mode 100644 app/api/chat/feedback/route.test.ts create mode 100644 app/api/chat/feedback/route.ts create mode 100644 app/api/chat/route.test.ts create mode 100644 app/chat/[threadId]/page.tsx create mode 100644 app/chat/_components/chat-message-feedback.tsx create mode 100644 app/chat/_components/chat-message-list.test.tsx create mode 100644 app/chat/_components/chat-message-meta.tsx create mode 100644 app/chat/_components/chat-message.test.tsx create mode 100644 app/chat/_components/chat-page-client.tsx create mode 100644 e2e/chat.spec.ts create mode 100644 lib/chat/constants.ts create mode 100644 lib/chat/contracts.test.ts create mode 100644 lib/chat/contracts.ts create mode 100644 lib/chat/events.test.ts create mode 100644 lib/chat/events.ts create mode 100644 lib/chat/feedback.ts create mode 100644 lib/chat/history.test.ts create mode 100644 lib/chat/history.ts create mode 100644 lib/chat/message-metadata.ts create mode 100644 lib/chat/messages.test.ts create mode 100644 lib/chat/messages.ts create mode 100644 lib/chat/session.test.ts create mode 100644 lib/chat/session.ts create mode 100644 lib/chat/storage.ts create mode 100644 lib/chat/track-event.ts create mode 100644 mastra/agents.ts create mode 100644 mastra/config.ts create mode 100644 mastra/evals/meta-chat.dataset.ts create mode 100644 mastra/index.ts create mode 100644 mastra/skills/registry.test.ts create mode 100644 mastra/skills/registry.ts create mode 100644 mastra/workflows/meta-chat-workflow.test.ts create mode 100644 mastra/workflows/meta-chat-workflow.ts create mode 100644 progress.md diff --git a/.env.example b/.env.example index a27fd80..61c92b6 100644 --- a/.env.example +++ b/.env.example @@ -1 +1,6 @@ -OPENAI_API_KEY=UPDATE_ME_TO_YOUR_OWN_KEY \ No newline at end of file +OPENAI_API_KEY=UPDATE_ME_TO_YOUR_OWN_KEY +MASTRA_STORAGE_URL=file:./.mastra/secondorder.db +MASTRA_STORAGE_AUTH_TOKEN= +SECONDORDER_AGENT_MODEL=openai/gpt-5.1 +SECONDORDER_PLANNER_MODEL= +SECONDORDER_CRITIC_MODEL= diff --git a/.gitignore b/.gitignore index de6c8c5..bed7124 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ playwright-report/ coverage .env.local tsconfig.tsbuildinfo +.mastra diff --git a/PRD.md b/PRD.md new file mode 100644 index 0000000..3402434 --- /dev/null +++ b/PRD.md @@ -0,0 +1,329 @@ +# PRD: Milestone 1.5 - Structured Meta Chat + +**Product**: SecondOrder Web +**Date**: March 8, 2026 +**Status**: Draft +**Document owner**: Product / Engineering + +## 1. Summary + +The next milestone should productize the meta-thinking system that already exists in the codebase. + +Today, SecondOrder has: + +- a polished marketing page at [`/Users/henry/workspace/secondorder-web/app/page.tsx`](/Users/henry/workspace/secondorder-web/app/page.tsx) +- a thread-based chat experience at [`/Users/henry/workspace/secondorder-web/app/chat/[threadId]/page.tsx`](/Users/henry/workspace/secondorder-web/app/chat/[threadId]/page.tsx) +- a Mastra-backed workflow that already classifies tasks, generates plans, drafts responses, and critiques them before the final answer at [`/Users/henry/workspace/secondorder-web/mastra/workflows/meta-chat-workflow.ts`](/Users/henry/workspace/secondorder-web/mastra/workflows/meta-chat-workflow.ts) +- memory-backed thread history and resource scoping in [`/Users/henry/workspace/secondorder-web/app/api/chat/route.ts`](/Users/henry/workspace/secondorder-web/app/api/chat/route.ts) + +What is missing is the actual product layer that makes this differentiation visible and useful to the user. + +This milestone should ship a clearer, more trustworthy chat product with: + +- visible task framing +- optional plan preview +- confidence and limitation signals +- structured feedback capture +- basic instrumentation for evaluation + +This is the narrowest milestone that turns SecondOrder from "chat with hidden orchestration" into "a meta-thinking assistant users can understand and trust." + +## 2. Problem + +The current app has meaningful backend meta-orchestration, but the user experience still feels like a standard chat interface. + +Current gaps: + +- users cannot see how SecondOrder interpreted their task +- plans and critiques are used internally but never surfaced +- there is no confidence or uncertainty signaling +- there is no structured feedback loop to improve future responses +- success is not measured with product-level analytics or evaluation events + +As a result, the product promise on the landing page is ahead of the in-product experience. + +## 3. Goal + +Ship the first user-visible version of SecondOrder's meta-thinking experience inside `/chat`. + +By the end of this milestone, a user should be able to: + +- ask a complex question +- see how SecondOrder framed the task +- optionally inspect the plan before or alongside the answer +- understand when the assistant is confident vs uncertain +- give simple feedback on whether the response was useful + +## 4. Non-Goals + +This milestone should not include: + +- broad tool-calling beyond what Mastra already supports internally +- multi-agent UI visualizations or raw chain-of-thought exposure +- long-term personalized memory controls +- judge-agent or multi-model orchestration UI +- large marketing-site redesign work + +## 5. User Segments + +Primary users: + +- founders, operators, and technical users exploring SecondOrder's core differentiation +- early adopters evaluating whether the assistant is better than generic chat for planning, analysis, decisions, and troubleshooting + +Secondary users: + +- internal team members using the product to validate reasoning quality and product positioning + +## 6. Current State Analysis + +### What is already implemented + +- New thread creation via `/chat` redirect to unique thread URLs +- Thread-scoped message history and resource isolation +- Request validation and oversized-input rejection +- Task classification into `simple_chat`, `analysis`, `planning`, `decision`, and `troubleshooting` +- Planner and critic agents +- Request-context injection into the final agent +- Memory-backed chat history +- Mastra storage, logging, and observability wiring +- Unit coverage for chat utilities, registry logic, and API route behavior + +### What is not yet productized + +- task-type display in the UI +- user-visible plan summaries +- confidence badges or uncertainty messaging +- feedback controls per answer +- conversation-level outcome tracking +- instrumentation tied to chat behavior +- clear empty-state onboarding for "how to use SecondOrder differently" + +## 7. Milestone Thesis + +The next milestone is not "build more intelligence." It is "make existing intelligence legible, controllable, and measurable." + +The product should expose enough of the meta layer to create trust and differentiation without exposing raw internal reasoning. + +## 8. Scope + +### In scope + +#### A. Visible Meta Mode + +For non-`simple_chat` requests, the chat experience should show a compact task-framing block that includes: + +- [x] detected task type +- [x] short goal summary +- [x] optional constraints summary if available +- [x] whether SecondOrder is using a structured meta pass + +This should appear as a compact system-style card above the assistant response or as a collapsible pre-answer block. + +#### B. Plan Preview + +For complex tasks, users should be able to view a compact plan summary generated by the planner workflow. + +Requirements: + +- [x] default to compact, not verbose +- [x] avoid exposing chain-of-thought or raw internal prompts +- [x] support a collapsed and expanded state +- [x] never block the final answer if the plan preview fails + +#### C. Confidence and Limitation Signals + +Each assistant response for meta-routed tasks should include lightweight trust signals such as: + +- [x] confidence level: low, medium, high +- [x] explicit note when assumptions are weak +- [x] explicit note when more context would improve the answer + +These signals should come from structured workflow output, not hardcoded UI copy. + +#### D. Feedback Capture + +Users should be able to provide structured feedback on assistant messages. + +Initial feedback schema: + +- [x] helpful +- [x] not helpful +- [x] needs more depth +- [x] missed constraints + +Feedback should be stored as an event with thread ID, message ID, task type, and timestamp. + +#### E. Instrumentation and Evaluation Baseline + +Track enough events to evaluate whether visible meta behavior improves user outcomes. + +Minimum events: + +- [x] thread started +- [x] message submitted +- [x] task classified +- [x] meta mode used +- [x] plan preview expanded +- [x] response completed +- [x] feedback submitted + +Minimum metrics: + +- share of conversations routed to meta mode +- feedback positivity rate +- response completion rate +- average turns per successful thread +- percentage of meta-routed responses where plan preview is viewed + +#### F. Better Chat Onboarding + +The empty state in chat should explain what makes SecondOrder different and suggest task types it handles well: + +- [x] planning +- [x] analysis +- [x] decisions +- [x] troubleshooting + +This should improve first-message quality and align product experience with landing-page claims. + +### Out of scope + +- persistent user profile settings +- memory inspection/deletion UI +- external connectors or retrieval systems +- pricing, auth, billing, or team collaboration +- extensive redesign of the visual system + +## 9. Product Requirements + +### Functional requirements + +1. [x] The system must show visible task framing for meta-routed requests. +2. [x] The system must expose a compact plan preview for meta-routed requests. +3. [x] The system must show confidence or limitation signals alongside the assistant answer. +4. [x] The system must allow users to submit structured feedback on individual assistant responses. +5. [x] The system must emit analytics and evaluation events for the full chat lifecycle. +6. [x] The system must preserve the current thread-based URL model and history loading behavior. +7. [x] The system must continue hiding raw internal reasoning and prompt text. + +### UX requirements + +1. The chat must still feel fast and conversational. +2. Meta information must be skimmable and collapsible. +3. Simple-chat requests should remain lightweight and should not show unnecessary framing chrome. +4. Visible trust signals should be informative, not alarmist. +5. The interface should work cleanly on desktop and mobile. + +### Technical requirements + +1. [x] Extend workflow output schemas rather than inferring UI state from freeform text. +2. [x] Keep UI concerns in chat route components, not in shared primitives unless reuse is justified. +3. [x] Preserve strict TypeScript and current testing patterns. +4. [x] Add focused Vitest coverage for new structured chat state logic. +5. [x] Add Playwright coverage for the visible meta-mode flow. + +## 10. User Stories + +1. As a user asking for a plan, I want to see how the assistant framed my request so I can trust that it understood the job. +2. As a user working on a hard problem, I want to inspect a compact plan so I can judge whether the reasoning direction is sound. +3. As a cautious user, I want clear confidence and limitation signals so I know when to trust the answer and when to add more context. +4. As a product team member, I want feedback and event data so I can tell whether the meta-thinking layer is improving outcomes. + +## 11. Success Metrics + +### Primary success metrics + +- At least 60% of meta-routed conversations receive a user feedback event +- Helpful feedback rate is at least 20 points higher for meta-routed threads than baseline generic threads +- At least 40% of meta-routed responses have the plan preview opened +- Chat completion rate improves relative to the current baseline + +### Secondary metrics + +- Reduced follow-up turns caused by misunderstanding the task +- Increased repeat usage of `/chat` +- Higher share of conversations in planning, analysis, decision, and troubleshooting categories + +## 12. Release Criteria + +The milestone is complete when: + +1. [x] Meta-routed tasks show visible task framing in the shipped UI. +2. [x] Plan preview is available and collapsible. +3. [x] Confidence and limitation signals are displayed for meta-routed responses. +4. [x] Structured feedback events are captured. +5. [x] Core analytics events are emitted. +6. [x] New unit and E2E coverage pass. +7. [x] `npm test` and `npm run ts-check` pass in the target branch. + +## 13. Risks + +### Risk: exposing too much internal reasoning + +Mitigation: + +- surface summaries, not raw prompts or chain-of-thought +- keep plan previews compact and product-shaped + +### Risk: added UI makes chat feel slower or heavier + +Mitigation: + +- only show meta chrome for meta-routed tasks +- default cards to compact collapsed states where appropriate + +### Risk: workflow outputs are not structured enough for UI + +Mitigation: + +- formalize schema fields for goal, constraints, plan summary, and confidence +- avoid parsing freeform assistant text for product state + +### Risk: instrumentation exists technically but is not actionable + +Mitigation: + +- define the event list and success metrics before implementation +- keep the first milestone event model intentionally small + +## 14. Suggested Delivery Plan + +### Phase A: Schema and backend contract + +- extend workflow result schema for visible task framing and confidence +- ensure API and request context return stable fields for UI rendering +- add event hooks for analytics and feedback + +### Phase B: Chat UX + +- add empty-state onboarding improvements +- add meta summary card and plan preview UI +- add confidence and limitation presentation +- add message-level feedback controls + +### Phase C: Validation + +- add targeted Vitest coverage +- add Playwright coverage for meta-routed flows +- verify `npm test` and `npm run ts-check` + +## 15. Recommended File Targets + +Likely implementation areas: + +- [`/Users/henry/workspace/secondorder-web/mastra/workflows/meta-chat-workflow.ts`](/Users/henry/workspace/secondorder-web/mastra/workflows/meta-chat-workflow.ts) +- [`/Users/henry/workspace/secondorder-web/lib/chat/contracts.ts`](/Users/henry/workspace/secondorder-web/lib/chat/contracts.ts) +- [`/Users/henry/workspace/secondorder-web/app/api/chat/route.ts`](/Users/henry/workspace/secondorder-web/app/api/chat/route.ts) +- [`/Users/henry/workspace/secondorder-web/app/chat/_components/chat-page-client.tsx`](/Users/henry/workspace/secondorder-web/app/chat/_components/chat-page-client.tsx) +- [`/Users/henry/workspace/secondorder-web/app/chat/_components/chat-message-list.tsx`](/Users/henry/workspace/secondorder-web/app/chat/_components/chat-message-list.tsx) +- [`/Users/henry/workspace/secondorder-web/app/chat/_components/chat-message.tsx`](/Users/henry/workspace/secondorder-web/app/chat/_components/chat-message.tsx) +- [`/Users/henry/workspace/secondorder-web/e2e/chat.spec.ts`](/Users/henry/workspace/secondorder-web/e2e/chat.spec.ts) + +## 16. Final Recommendation + +The clearest next milestone is: + +**Make SecondOrder's hidden meta workflow visible, trustworthy, and measurable in chat.** + +That is the highest-leverage step because it builds directly on infrastructure already present in the repo, closes the gap between marketing promise and product reality, and creates the baseline needed for later milestones like tools, memory controls, and judge-agent orchestration. diff --git a/afk-ralph.sh b/afk-ralph.sh new file mode 100755 index 0000000..43bc708 --- /dev/null +++ b/afk-ralph.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -e + +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +for ((i=1; i<=$1; i++)); do + tmpfile=$(mktemp) + + codex exec \ + --full-auto \ + --output-last-message "$tmpfile" \ + "PRD.md progress.md + +1. Read PRD.md and progress.md. +2. Find the highest-priority incomplete task and implement it. +3. Run relevant tests and type checks. +4. Update progress.md with exactly what you completed. +5. Update PRD.md to put a checkbox next to the task item you have implemented. + +ONLY WORK ON A SINGLE TASK. +If the PRD is complete, output exactly: COMPLETE." + + result=$(cat "$tmpfile") + rm -f "$tmpfile" + + echo "$result" + + if [[ "$result" == *"COMPLETE"* ]]; then + echo "PRD complete after $i iterations." + exit 0 + fi +done \ No newline at end of file diff --git a/app/api/chat/events/route.test.ts b/app/api/chat/events/route.test.ts new file mode 100644 index 0000000..09d7f97 --- /dev/null +++ b/app/api/chat/events/route.test.ts @@ -0,0 +1,105 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const cookiesMock = vi.fn(); +const getThreadMock = vi.fn(); +const recordChatEventMock = vi.fn(); + +vi.mock('next/headers', () => ({ + cookies: cookiesMock, +})); + +vi.mock('@/lib/chat/history', () => ({ + getThread: getThreadMock, +})); + +vi.mock('@/lib/chat/events', () => ({ + recordChatEvent: recordChatEventMock, +})); + +describe('/api/chat/events route', () => { + beforeEach(() => { + cookiesMock.mockResolvedValue({ + get() { + return undefined; + }, + }); + + getThreadMock.mockResolvedValue(null); + recordChatEventMock.mockResolvedValue({ + id: 'event-1', + createdAt: '2026-03-08T00:00:00.000Z', + }); + }); + + afterEach(() => { + vi.resetModules(); + vi.clearAllMocks(); + }); + + it('returns 400 for invalid payloads', async () => { + const { POST } = await import('./route'); + const response = await POST( + new Request('http://localhost:3000/api/chat/events', { + method: 'POST', + body: JSON.stringify({ threadId: 'bad-id' }), + }), + ); + + expect(response.status).toBe(400); + expect(recordChatEventMock).not.toHaveBeenCalled(); + }); + + it('returns 404 when the thread is not accessible', async () => { + getThreadMock.mockResolvedValue({ + id: 'thread-1', + resourceId: '22222222-2222-4222-8222-222222222222', + }); + + const { POST } = await import('./route'); + const response = await POST( + new Request('http://localhost:3000/api/chat/events', { + method: 'POST', + body: JSON.stringify({ + eventType: 'plan_preview_expanded', + threadId: '11111111-1111-4111-8111-111111111111', + messageId: 'assistant-1', + taskType: 'planning', + }), + }), + ); + + expect(response.status).toBe(404); + expect(recordChatEventMock).not.toHaveBeenCalled(); + }); + + it('stores instrumentation events for valid requests', async () => { + const { POST } = await import('./route'); + const response = await POST( + new Request('http://localhost:3000/api/chat/events', { + method: 'POST', + body: JSON.stringify({ + eventType: 'plan_preview_expanded', + threadId: '11111111-1111-4111-8111-111111111111', + messageId: 'assistant-1', + taskType: 'planning', + metadata: { + planLength: 2, + }, + }), + }), + ); + const payload = await response.json(); + + expect(response.status).toBe(200); + expect(recordChatEventMock).toHaveBeenCalledWith({ + eventType: 'plan_preview_expanded', + threadId: '11111111-1111-4111-8111-111111111111', + messageId: 'assistant-1', + taskType: 'planning', + metadata: { + planLength: 2, + }, + }); + expect(payload.success).toBe(true); + }); +}); diff --git a/app/api/chat/events/route.ts b/app/api/chat/events/route.ts new file mode 100644 index 0000000..4948a83 --- /dev/null +++ b/app/api/chat/events/route.ts @@ -0,0 +1,98 @@ +import { cookies } from 'next/headers'; +import { chatEventBodySchema } from '@/lib/chat/contracts'; +import { recordChatEvent } from '@/lib/chat/events'; +import { getThread } from '@/lib/chat/history'; +import { + createResourceCookieHeader, + getOrCreateResourceId, +} from '@/lib/chat/session'; + +export const runtime = 'nodejs'; + +function jsonResponse( + body: Record, + init?: ResponseInit & { setCookie?: string }, +) { + const headers = new Headers(init?.headers); + headers.set('Content-Type', 'application/json'); + + if (init?.setCookie) { + headers.append('Set-Cookie', init.setCookie); + } + + return new Response(JSON.stringify(body), { + status: init?.status ?? 200, + headers, + }); +} + +async function validateThreadAccess(threadId: string, resourceId: string) { + const thread = await getThread(threadId); + + if (thread && thread.resourceId && thread.resourceId !== resourceId) { + return false; + } + + return true; +} + +export async function POST(request: Request) { + const cookieStore = await cookies(); + const { resourceId, shouldSetCookie } = getOrCreateResourceId(cookieStore); + + try { + const body = chatEventBodySchema.safeParse(await request.json()); + + if (!body.success) { + return jsonResponse( + { error: 'Invalid request body' }, + { + status: 400, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } + + const hasAccess = await validateThreadAccess(body.data.threadId, resourceId); + + if (!hasAccess) { + return jsonResponse( + { error: 'Thread not found' }, + { + status: 404, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } + + const event = await recordChatEvent(body.data); + + return jsonResponse( + { success: true, event }, + { + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } catch (error) { + const message = + error instanceof Error + ? error.message + : 'An unexpected error occurred'; + + return jsonResponse( + { error: message }, + { + status: 500, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } +} diff --git a/app/api/chat/feedback/route.test.ts b/app/api/chat/feedback/route.test.ts new file mode 100644 index 0000000..2af1f78 --- /dev/null +++ b/app/api/chat/feedback/route.test.ts @@ -0,0 +1,117 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const cookiesMock = vi.fn(); +const getThreadMock = vi.fn(); +const recordChatFeedbackMock = vi.fn(); +const recordChatEventMock = vi.fn(); + +vi.mock('next/headers', () => ({ + cookies: cookiesMock, +})); + +vi.mock('@/lib/chat/history', () => ({ + getThread: getThreadMock, +})); + +vi.mock('@/lib/chat/feedback', () => ({ + recordChatFeedback: recordChatFeedbackMock, +})); + +vi.mock('@/lib/chat/events', () => ({ + recordChatEvent: recordChatEventMock, +})); + +describe('/api/chat/feedback route', () => { + beforeEach(() => { + cookiesMock.mockResolvedValue({ + get() { + return undefined; + }, + }); + + getThreadMock.mockResolvedValue(null); + recordChatFeedbackMock.mockResolvedValue({ + id: 'feedback-1', + createdAt: '2026-03-08T00:00:00.000Z', + }); + recordChatEventMock.mockResolvedValue({ + id: 'event-1', + createdAt: '2026-03-08T00:00:00.000Z', + }); + }); + + afterEach(() => { + vi.resetModules(); + vi.clearAllMocks(); + }); + + it('returns 400 for invalid payloads', async () => { + const { POST } = await import('./route'); + const response = await POST( + new Request('http://localhost:3000/api/chat/feedback', { + method: 'POST', + body: JSON.stringify({ threadId: 'bad-id' }), + }), + ); + + expect(response.status).toBe(400); + expect(recordChatFeedbackMock).not.toHaveBeenCalled(); + }); + + it('returns 404 when the thread is not accessible', async () => { + getThreadMock.mockResolvedValue({ + id: 'thread-1', + resourceId: '22222222-2222-4222-8222-222222222222', + }); + + const { POST } = await import('./route'); + const response = await POST( + new Request('http://localhost:3000/api/chat/feedback', { + method: 'POST', + body: JSON.stringify({ + threadId: '11111111-1111-4111-8111-111111111111', + messageId: 'assistant-1', + taskType: 'planning', + feedback: 'helpful', + }), + }), + ); + + expect(response.status).toBe(404); + expect(recordChatFeedbackMock).not.toHaveBeenCalled(); + }); + + it('stores structured feedback events for valid requests', async () => { + const { POST } = await import('./route'); + const response = await POST( + new Request('http://localhost:3000/api/chat/feedback', { + method: 'POST', + body: JSON.stringify({ + threadId: '11111111-1111-4111-8111-111111111111', + messageId: 'assistant-1', + taskType: 'planning', + feedback: 'missed_constraints', + }), + }), + ); + const payload = await response.json(); + + expect(response.status).toBe(200); + expect(recordChatFeedbackMock).toHaveBeenCalledWith({ + threadId: '11111111-1111-4111-8111-111111111111', + messageId: 'assistant-1', + taskType: 'planning', + feedback: 'missed_constraints', + }); + expect(recordChatEventMock).toHaveBeenCalledWith({ + eventType: 'feedback_submitted', + threadId: '11111111-1111-4111-8111-111111111111', + messageId: 'assistant-1', + taskType: 'planning', + metadata: { + feedback: 'missed_constraints', + }, + }); + expect(payload.success).toBe(true); + }); +}); diff --git a/app/api/chat/feedback/route.ts b/app/api/chat/feedback/route.ts new file mode 100644 index 0000000..1fec069 --- /dev/null +++ b/app/api/chat/feedback/route.ts @@ -0,0 +1,108 @@ +import { cookies } from 'next/headers'; +import { chatFeedbackBodySchema } from '@/lib/chat/contracts'; +import { recordChatEvent } from '@/lib/chat/events'; +import { recordChatFeedback } from '@/lib/chat/feedback'; +import { getThread } from '@/lib/chat/history'; +import { + createResourceCookieHeader, + getOrCreateResourceId, +} from '@/lib/chat/session'; + +export const runtime = 'nodejs'; + +function jsonResponse( + body: Record, + init?: ResponseInit & { setCookie?: string }, +) { + const headers = new Headers(init?.headers); + headers.set('Content-Type', 'application/json'); + + if (init?.setCookie) { + headers.append('Set-Cookie', init.setCookie); + } + + return new Response(JSON.stringify(body), { + status: init?.status ?? 200, + headers, + }); +} + +async function validateThreadAccess(threadId: string, resourceId: string) { + const thread = await getThread(threadId); + + if (thread && thread.resourceId && thread.resourceId !== resourceId) { + return false; + } + + return true; +} + +export async function POST(request: Request) { + const cookieStore = await cookies(); + const { resourceId, shouldSetCookie } = getOrCreateResourceId(cookieStore); + + try { + const body = chatFeedbackBodySchema.safeParse(await request.json()); + + if (!body.success) { + return jsonResponse( + { error: 'Invalid request body' }, + { + status: 400, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } + + const hasAccess = await validateThreadAccess(body.data.threadId, resourceId); + + if (!hasAccess) { + return jsonResponse( + { error: 'Thread not found' }, + { + status: 404, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } + + const event = await recordChatFeedback(body.data); + void recordChatEvent({ + eventType: 'feedback_submitted', + threadId: body.data.threadId, + messageId: body.data.messageId, + taskType: body.data.taskType, + metadata: { + feedback: body.data.feedback, + }, + }).catch(() => {}); + + return jsonResponse( + { success: true, event }, + { + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } catch (error) { + const message = + error instanceof Error + ? error.message + : 'An unexpected error occurred'; + + return jsonResponse( + { error: message }, + { + status: 500, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } +} diff --git a/app/api/chat/route.test.ts b/app/api/chat/route.test.ts new file mode 100644 index 0000000..7faae8a --- /dev/null +++ b/app/api/chat/route.test.ts @@ -0,0 +1,164 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const cookiesMock = vi.fn(); +const handleChatStreamMock = vi.fn(); +const createUIMessageStreamResponseMock = vi.fn(); +const getThreadMock = vi.fn(); +const getChatHistoryMock = vi.fn(); +const recordChatEventMock = vi.fn(); + +vi.mock('next/headers', () => ({ + cookies: cookiesMock, +})); + +vi.mock('@mastra/ai-sdk', () => ({ + handleChatStream: handleChatStreamMock, +})); + +vi.mock('ai', () => ({ + createUIMessageStreamResponse: createUIMessageStreamResponseMock, +})); + +vi.mock('@/lib/chat/history', () => ({ + getThread: getThreadMock, + getChatHistory: getChatHistoryMock, +})); + +vi.mock('@/lib/chat/events', () => ({ + recordChatEvent: recordChatEventMock, +})); + +vi.mock('@/mastra', () => ({ + mastra: { + getWorkflow() { + return { + async createRun() { + return { + async start() { + return { + status: 'success', + result: { + taskType: 'planning', + shouldUseMeta: true, + selectedSkillIds: ['interpret-task', 'build-plan'], + selectedSkillNames: ['Interpret Task', 'Build Plan'], + meta: { + goal: 'Create a migration plan.', + constraints: ['Avoid downtime'], + plan: ['Assess current state', 'Sequence the migration'], + responseStrategy: 'Lead with sequencing and risk mitigation.', + confidence: 'medium', + limitations: ['Current deployment details are missing'], + contextGaps: ['Unknown database size'], + }, + }, + }; + }, + }; + }, + }; + }, + }, +})); + +describe('/api/chat route', () => { + beforeEach(() => { + cookiesMock.mockResolvedValue({ + get() { + return undefined; + }, + }); + + handleChatStreamMock.mockResolvedValue( + new ReadableStream({ + start(controller) { + controller.close(); + }, + }), + ); + + createUIMessageStreamResponseMock.mockImplementation( + () => new Response('streamed'), + ); + + getThreadMock.mockResolvedValue(null); + getChatHistoryMock.mockResolvedValue([]); + recordChatEventMock.mockResolvedValue({ + id: 'event-1', + createdAt: '2026-03-08T00:00:00.000Z', + }); + }); + + afterEach(() => { + vi.resetModules(); + vi.clearAllMocks(); + }); + + it('returns 400 for invalid POST payloads', async () => { + const { POST } = await import('./route'); + const response = await POST( + new Request('http://localhost:3000/api/chat', { + method: 'POST', + body: JSON.stringify({ threadId: 'bad-id', messages: 'nope' }), + }), + ); + + expect(response.status).toBe(400); + }); + + it('returns stored messages for GET history requests', async () => { + getChatHistoryMock.mockResolvedValue([ + { + id: 'msg-1', + role: 'assistant', + parts: [{ type: 'text', text: 'Hello again' }], + }, + ]); + + const { GET } = await import('./route'); + const response = await GET( + new Request('http://localhost:3000/api/chat?threadId=11111111-1111-4111-8111-111111111111'), + ); + const payload = await response.json(); + + expect(response.status).toBe(200); + expect(payload.messages).toHaveLength(1); + }); + + it('streams chat responses for valid POST requests', async () => { + const { POST } = await import('./route'); + const response = await POST( + new Request('http://localhost:3000/api/chat', { + method: 'POST', + body: JSON.stringify({ + threadId: '11111111-1111-4111-8111-111111111111', + messages: [ + { + id: 'user-1', + role: 'user', + parts: [{ type: 'text', text: 'Create a migration plan.' }], + }, + ], + }), + }), + ); + + expect(response.status).toBe(200); + expect(handleChatStreamMock).toHaveBeenCalledTimes(1); + expect(createUIMessageStreamResponseMock).toHaveBeenCalledTimes(1); + expect(recordChatEventMock).toHaveBeenCalledWith({ + eventType: 'thread_started', + threadId: '11111111-1111-4111-8111-111111111111', + }); + expect(recordChatEventMock).toHaveBeenCalledWith({ + eventType: 'message_submitted', + threadId: '11111111-1111-4111-8111-111111111111', + messageId: 'user-1', + }); + expect(recordChatEventMock).toHaveBeenCalledWith({ + eventType: 'task_classified', + threadId: '11111111-1111-4111-8111-111111111111', + taskType: 'planning', + }); + }); +}); diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index b157f38..46f99a0 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -1,99 +1,300 @@ -import { openai } from '@ai-sdk/openai'; -import { streamText } from 'ai'; +import { handleChatStream } from '@mastra/ai-sdk'; +import { createUIMessageStreamResponse } from 'ai'; +import { + MASTRA_RESOURCE_ID_KEY, + MASTRA_THREAD_ID_KEY, + RequestContext, +} from '@mastra/core/request-context'; +import { cookies } from 'next/headers'; +import { chatPostBodySchema, historyQuerySchema } from '@/lib/chat/contracts'; +import { recordChatEvent } from '@/lib/chat/events'; +import { getChatHistory, getThread } from '@/lib/chat/history'; +import { isMessageTooLong, getLastUserMessageText } from '@/lib/chat/messages'; +import { + createResourceCookieHeader, + getOrCreateResourceId, +} from '@/lib/chat/session'; +import { mastra } from '@/mastra'; -export const runtime = 'edge'; +export const runtime = 'nodejs'; -const SYSTEM_PROMPT = `You are SecondOrder, a meta-thinking AI assistant that embodies the principles of meta-cognition for LLM systems. +function normalizeMessages( + messages: Array<{ + id?: string; + role: 'system' | 'user' | 'assistant'; + parts: Array<{ type: string; text?: string }>; + metadata?: unknown; + }>, +) { + return messages.map((message) => ({ + ...message, + id: message.id ?? crypto.randomUUID(), + })); +} -Core Capabilities: -- Meta thinking layer: Analyze goals, prompts, and constraints to generate sharper context -- Self-improving loop: Generate answers, absorb feedback, audit progress, and iterate -- Adaptive learning: Modify strategies for each new problem +function jsonResponse( + body: Record, + init?: ResponseInit & { setCookie?: string }, +) { + const headers = new Headers(init?.headers); + headers.set('Content-Type', 'application/json'); -Your responses should: -1. Be concise and analytical -2. Demonstrate self-monitoring and self-evaluation -3. Reference meta-cognitive principles when relevant -4. Acknowledge uncertainty and suggest iterative improvements + if (init?.setCookie) { + headers.append('Set-Cookie', init.setCookie); + } -You help users understand and apply meta-thinking principles to their problems.`; + return new Response(JSON.stringify(body), { + status: init?.status ?? 200, + headers, + }); +} -const MAX_MESSAGE_LENGTH = 4000; +async function validateThreadAccess(threadId: string, resourceId: string) { + const thread = await getThread(threadId); -interface IncomingMessage { - role: 'user' | 'assistant'; - content?: string; - parts?: Array<{ type: string; text: string }>; + if (thread && thread.resourceId && thread.resourceId !== resourceId) { + return false; + } + + return true; } -interface NormalizedMessage { - role: 'user' | 'assistant'; - content: string; +function recordChatEventSafely( + input: Parameters[0], +) { + void recordChatEvent(input).catch(() => {}); } -function normalizeMessages(messages: IncomingMessage[]): NormalizedMessage[] { - return messages.map((msg) => { - // If message has parts array (e.g., from useChat), extract text content - if (msg.parts && Array.isArray(msg.parts)) { - const textContent = msg.parts - .filter((part) => part.type === 'text') - .map((part) => part.text) - .join(''); - return { - role: msg.role, - content: textContent, - }; - } - // Otherwise use content directly - return { - role: msg.role, - content: msg.content || '', - }; - }); +export async function GET(request: Request) { + const cookieStore = await cookies(); + const { resourceId, shouldSetCookie } = getOrCreateResourceId(cookieStore); + const query = historyQuerySchema.safeParse( + Object.fromEntries(new URL(request.url).searchParams), + ); + + if (!query.success) { + return jsonResponse( + { error: 'Invalid thread ID' }, + { + status: 400, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } + + const hasAccess = await validateThreadAccess(query.data.threadId, resourceId); + + if (!hasAccess) { + return jsonResponse( + { error: 'Thread not found' }, + { + status: 404, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } + + const messages = await getChatHistory(query.data.threadId, resourceId); + + return jsonResponse( + { messages }, + { + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); } -export async function POST(req: Request) { +export async function POST(request: Request) { + const cookieStore = await cookies(); + const { resourceId, shouldSetCookie } = getOrCreateResourceId(cookieStore); + try { - const { messages } = await req.json(); + const body = chatPostBodySchema.safeParse(await request.json()); - if (!Array.isArray(messages)) { - return new Response(JSON.stringify({ error: 'Invalid request body' }), { - status: 400, - headers: { 'Content-Type': 'application/json' }, - }); + if (!body.success) { + return jsonResponse( + { error: 'Invalid request body' }, + { + status: 400, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); } - // Normalize messages to CoreMessage format - const normalizedMessages = normalizeMessages(messages); + const normalizedMessages = normalizeMessages(body.data.messages); + const latestUserMessage = getLastUserMessageText(normalizedMessages); - const lastMessage = normalizedMessages[normalizedMessages.length - 1]; - if ( - typeof lastMessage?.content === 'string' && - lastMessage.content.length > MAX_MESSAGE_LENGTH - ) { - return new Response(JSON.stringify({ error: 'Message too long' }), { - status: 400, - headers: { 'Content-Type': 'application/json' }, + if (isMessageTooLong(latestUserMessage)) { + return jsonResponse( + { error: 'Message too long' }, + { + status: 400, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } + + const existingThread = await getThread(body.data.threadId); + + if (existingThread && existingThread.resourceId && existingThread.resourceId !== resourceId) { + return jsonResponse( + { error: 'Thread not found' }, + { + status: 404, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } + + const latestMessage = normalizedMessages.at(-1); + + if (!existingThread) { + recordChatEventSafely({ + eventType: 'thread_started', + threadId: body.data.threadId, }); } - const result = streamText({ - model: openai('gpt-5.4'), - system: SYSTEM_PROMPT, - messages: normalizedMessages, + if (latestMessage?.role === 'user') { + recordChatEventSafely({ + eventType: 'message_submitted', + threadId: body.data.threadId, + messageId: latestMessage.id, + }); + } + + const workflow = mastra.getWorkflow('metaChatWorkflow'); + const run = await workflow.createRun({ resourceId }); + const workflowResult = await run.start({ + inputData: { + messages: normalizedMessages, + threadId: body.data.threadId, + resourceId, + }, + }); + + if (workflowResult.status !== 'success') { + return jsonResponse( + { error: 'Unable to prepare chat response' }, + { + status: 500, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, + ); + } + + recordChatEventSafely({ + eventType: 'task_classified', + threadId: body.data.threadId, + taskType: workflowResult.result.taskType, + }); + + const requestContext = new RequestContext>( + Object.entries({ + ...workflowResult.result, + threadId: body.data.threadId, + resourceId, + }), + ); + requestContext.set(MASTRA_THREAD_ID_KEY, body.data.threadId); + requestContext.set(MASTRA_RESOURCE_ID_KEY, resourceId); + + const stream = await handleChatStream({ + mastra, + agentId: 'secondOrderAgent', + params: { + messages: normalizedMessages as never, + memory: { + thread: body.data.threadId, + resource: resourceId, + }, + requestContext, + }, }); + const assistantMessageMetadata = { + taskType: workflowResult.result.taskType, + shouldUseMeta: workflowResult.result.shouldUseMeta, + meta: workflowResult.result.meta, + }; + let assistantMessageId: string | null = null; + const streamWithMetadata = stream.pipeThrough( + new TransformStream({ + transform(chunk, controller) { + if (chunk.type === 'text-start' && !assistantMessageId) { + assistantMessageId = chunk.id; + + if (workflowResult.result.shouldUseMeta) { + recordChatEventSafely({ + eventType: 'meta_mode_used', + threadId: body.data.threadId, + messageId: assistantMessageId, + taskType: workflowResult.result.taskType, + }); + } + } + + if (chunk.type === 'start' || chunk.type === 'finish') { + if (chunk.type === 'finish') { + recordChatEventSafely({ + eventType: 'response_completed', + threadId: body.data.threadId, + messageId: assistantMessageId ?? undefined, + taskType: workflowResult.result.taskType, + }); + } + + controller.enqueue({ + ...chunk, + messageMetadata: assistantMessageMetadata, + }); + return; + } - return result.toUIMessageStreamResponse(); + controller.enqueue(chunk); + }, + }), + ); + + const response = createUIMessageStreamResponse({ + stream: streamWithMetadata as never, + }); + + if (shouldSetCookie) { + response.headers.append( + 'Set-Cookie', + createResourceCookieHeader(resourceId), + ); + } + + return response; } catch (error) { - if (error instanceof Error) { - return new Response(JSON.stringify({ error: error.message }), { + const message = + error instanceof Error + ? error.message + : 'An unexpected error occurred'; + + return jsonResponse( + { error: message }, + { status: 500, - headers: { 'Content-Type': 'application/json' }, - }); - } - return new Response( - JSON.stringify({ error: 'An unexpected error occurred' }), - { status: 500, headers: { 'Content-Type': 'application/json' } }, + setCookie: shouldSetCookie + ? createResourceCookieHeader(resourceId) + : undefined, + }, ); } } diff --git a/app/chat/[threadId]/page.tsx b/app/chat/[threadId]/page.tsx new file mode 100644 index 0000000..da5e98e --- /dev/null +++ b/app/chat/[threadId]/page.tsx @@ -0,0 +1,22 @@ +import { notFound } from 'next/navigation'; +import { threadIdSchema } from '@/lib/chat/contracts'; +import { ChatPageClient } from '../_components/chat-page-client'; + +interface ChatThreadPageProps { + params: Promise<{ + threadId: string; + }>; +} + +export default async function ChatThreadPage({ + params, +}: ChatThreadPageProps) { + const { threadId } = await params; + const parsedThreadId = threadIdSchema.safeParse(threadId); + + if (!parsedThreadId.success) { + notFound(); + } + + return ; +} diff --git a/app/chat/_components/chat-input.tsx b/app/chat/_components/chat-input.tsx index 08944b6..47b7791 100644 --- a/app/chat/_components/chat-input.tsx +++ b/app/chat/_components/chat-input.tsx @@ -1,6 +1,6 @@ 'use client'; -import type { ChangeEvent, FormEvent } from 'react'; +import type { ChangeEvent, FormEvent, RefObject } from 'react'; import { Button } from '@/components/ui/button'; interface ChatInputProps { @@ -8,6 +8,8 @@ interface ChatInputProps { handleInputChange: (e: ChangeEvent) => void; handleSubmit: (e: FormEvent) => void; isLoading: boolean; + placeholder?: string; + textareaRef?: RefObject; } export function ChatInput({ @@ -15,6 +17,8 @@ export function ChatInput({ handleInputChange, handleSubmit, isLoading, + placeholder = 'Ask for a plan, analysis, decision, or troubleshooting help...', + textareaRef, }: ChatInputProps) { const onKeyDown = (e: React.KeyboardEvent) => { if (e.key === 'Enter' && !e.shiftKey) { @@ -33,12 +37,14 @@ export function ChatInput({ className="mx-auto flex max-w-3xl items-end gap-3" >