From 5d6dd70f0393caa28b15e9904cb6c20ff7b45486 Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Tue, 24 Feb 2026 10:18:27 -0500 Subject: [PATCH 01/13] feat: add CitationsContentBlock for document citation support (#487) Add CitationsBlock content type to support document citations returned by models (particularly Bedrock) when citations are enabled. This aligns the TypeScript SDK with the Python SDK's citation support. --- src/__fixtures__/mock-message-model.ts | 13 ++ src/__fixtures__/slim-types.ts | 2 + src/index.ts | 18 +++ src/models/__tests__/bedrock.test.ts | 64 ++++++++ src/models/bedrock.ts | 28 +++- src/models/model.ts | 42 +++-- src/models/streaming.ts | 24 ++- src/types/__tests__/citations.test.ts | 162 +++++++++++++++++++ src/types/__tests__/messages.test.ts | 26 +++ src/types/citations.ts | 214 +++++++++++++++++++++++++ src/types/messages.ts | 6 + 11 files changed, 584 insertions(+), 15 deletions(-) create mode 100644 src/types/__tests__/citations.test.ts create mode 100644 src/types/citations.ts diff --git a/src/__fixtures__/mock-message-model.ts b/src/__fixtures__/mock-message-model.ts index fa128aaa..4077cf05 100644 --- a/src/__fixtures__/mock-message-model.ts +++ b/src/__fixtures__/mock-message-model.ts @@ -263,6 +263,19 @@ export class MockMessageModel extends Model { // This is typically used in system prompts or message content for guardrail evaluation break + case 'citationsBlock': + yield { type: 'modelContentBlockStartEvent' } + yield { + type: 'modelContentBlockDeltaEvent', + delta: { + type: 'citationsContentDelta', + citations: block.citations, + content: block.content, + }, + } + yield { type: 'modelContentBlockStopEvent' } + break + case 'imageBlock': case 'videoBlock': case 'documentBlock': diff --git a/src/__fixtures__/slim-types.ts b/src/__fixtures__/slim-types.ts index 9a475893..e4f6684e 100644 --- a/src/__fixtures__/slim-types.ts +++ b/src/__fixtures__/slim-types.ts @@ -14,6 +14,7 @@ import type { JsonBlock, } from '../types/messages.js' import type { ImageBlock, VideoBlock, DocumentBlock } from '../types/media.js' +import type { CitationsBlock } from '../types/citations.js' /** * Strips the toJSON method from a type, allowing plain objects to be used in tests. @@ -42,6 +43,7 @@ export type PlainContentBlock = | NoJSON | NoJSON | NoJSON + | NoJSON /** * Plain system content block without toJSON method. diff --git a/src/index.ts b/src/index.ts index 64d0e41d..24d65f5a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -66,6 +66,23 @@ export { contentBlockFromData, } from './types/messages.js' +// Citation types +export type { + CitationsBlockData, + Citation, + CitationLocation, + CitationSourceContent, + CitationGeneratedContent, + DocumentCharLocation, + DocumentPageLocation, + DocumentChunkLocation, + SearchResultLocation, + WebLocation, +} from './types/citations.js' + +// Citation class +export { CitationsBlock } from './types/citations.js' + // Media classes export { S3Location, ImageBlock, VideoBlock, DocumentBlock } from './types/media.js' @@ -122,6 +139,7 @@ export type { TextDelta, ToolUseInputDelta, ReasoningContentDelta, + CitationsContentDelta, ContentBlockDelta, ModelContentBlockDeltaEventData, ModelContentBlockDeltaEvent, diff --git a/src/models/__tests__/bedrock.test.ts b/src/models/__tests__/bedrock.test.ts index 4fff826d..a0d160c6 100644 --- a/src/models/__tests__/bedrock.test.ts +++ b/src/models/__tests__/bedrock.test.ts @@ -761,6 +761,70 @@ describe('BedrockModel', () => { }) }) + it('yields and validates citationsContent events correctly', async () => { + const citationsData = { + citations: [ + { + location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, + sourceContent: [{ text: 'source text' }], + title: 'Test Doc', + }, + ], + content: [{ text: 'generated text' }], + } + + const mockSend = vi.fn(async () => { + if (stream) { + return { + stream: (async function* (): AsyncGenerator { + yield { messageStart: { role: 'assistant' } } + yield { contentBlockStart: {} } + yield { + contentBlockDelta: { + delta: { citationsContent: citationsData }, + }, + } + yield { contentBlockStop: {} } + yield { messageStop: { stopReason: 'end_turn' } } + yield { + metadata: { usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 }, metrics: { latencyMs: 100 } }, + } + })(), + } + } else { + return { + output: { + message: { + role: 'assistant', + content: [{ citationsContent: citationsData }], + }, + }, + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 }, + metrics: { latencyMs: 100 }, + } + } + }) + mockBedrockClientImplementation({ send: mockSend }) + + const provider = new BedrockModel({ stream }) + const messages = [new Message({ role: 'user', content: [new TextBlock('Cite this.')] })] + const events = await collectIterator(provider.stream(messages)) + + expect(events).toContainEqual({ role: 'assistant', type: 'modelMessageStartEvent' }) + expect(events).toContainEqual({ type: 'modelContentBlockStartEvent' }) + expect(events).toContainEqual({ + type: 'modelContentBlockDeltaEvent', + delta: { + type: 'citationsContentDelta', + citations: citationsData.citations, + content: citationsData.content, + }, + }) + expect(events).toContainEqual({ type: 'modelContentBlockStopEvent' }) + expect(events).toContainEqual({ stopReason: 'endTurn', type: 'modelMessageStopEvent' }) + }) + describe('error handling', async () => { it.each([ { diff --git a/src/models/bedrock.ts b/src/models/bedrock.ts index 06705dee..5436d6c8 100644 --- a/src/models/bedrock.ts +++ b/src/models/bedrock.ts @@ -39,7 +39,8 @@ import { import { type BaseModelConfig, Model, type StreamOptions } from '../models/model.js' import type { ContentBlock, Message, StopReason, ToolUseBlock } from '../types/messages.js' import type { ImageSource, VideoSource, DocumentSource } from '../types/media.js' -import type { ModelStreamEvent, ReasoningContentDelta, Usage } from '../models/streaming.js' +import type { CitationsContentDelta, ModelStreamEvent, ReasoningContentDelta, Usage } from '../models/streaming.js' +import type { CitationsBlockData } from '../types/citations.js' import type { JSONValue } from '../types/json.js' import { ContextWindowOverflowError, ModelThrottledError, normalizeError } from '../errors.js' import { ensureDefined } from '../types/validation.js' @@ -632,6 +633,10 @@ export class BedrockModel extends Model { }, } + case 'citationsBlock': + // Citations are output-only blocks, not sent back to models + return undefined + case 'guardContentBlock': { if (block.text) { return { @@ -802,6 +807,18 @@ export class BedrockModel extends Model { events.push({ type: 'modelContentBlockStopEvent' }) }, + citationsContent: (block: CitationsBlockData): void => { + if (!block) return + events.push({ type: 'modelContentBlockStartEvent' }) + + const delta: CitationsContentDelta = { + type: 'citationsContentDelta', + citations: block.citations, + content: block.content, + } + events.push({ type: 'modelContentBlockDeltaEvent', delta }) + events.push({ type: 'modelContentBlockStopEvent' }) + }, } const content = ensureDefined(message.content, 'message.content') @@ -915,6 +932,15 @@ export class BedrockModel extends Model { events.push({ type: 'modelContentBlockDeltaEvent', delta: reasoningDelta }) } }, + citationsContent: (block: CitationsBlockData): void => { + if (!block) return + const delta: CitationsContentDelta = { + type: 'citationsContentDelta', + citations: block.citations, + content: block.content, + } + events.push({ type: 'modelContentBlockDeltaEvent', delta }) + }, } for (const key in delta) { diff --git a/src/models/model.ts b/src/models/model.ts index 96555dc6..19e545c9 100644 --- a/src/models/model.ts +++ b/src/models/model.ts @@ -8,6 +8,8 @@ import { TextBlock, ToolUseBlock, } from '../types/messages.js' +import { CitationsBlock } from '../types/citations.js' +import type { Citation, CitationGeneratedContent } from '../types/citations.js' import type { ToolChoice, ToolSpec } from '../tools/types.js' import { ModelContentBlockDeltaEvent, @@ -203,6 +205,9 @@ export abstract class Model { signature?: string redactedContent?: Uint8Array } = {} + let accumulatedCitationsList: Citation[] = [] + let accumulatedCitationsContent: CitationGeneratedContent[] = [] + let hasCitations = false let errorToThrow: Error | undefined = undefined let stoppedMessage: Message | null = null let finalStopReason: StopReason | null = null @@ -228,23 +233,28 @@ export abstract class Model { accumulatedToolInput = '' accumulatedText = '' accumulatedReasoning = {} + accumulatedCitationsList = [] + accumulatedCitationsContent = [] + hasCitations = false break - case 'modelContentBlockDeltaEvent': - switch (event.delta.type) { - case 'textDelta': - accumulatedText += event.delta.text - break - case 'toolUseInputDelta': - accumulatedToolInput += event.delta.input - break - case 'reasoningContentDelta': - if (event.delta.text) accumulatedReasoning.text = (accumulatedReasoning.text ?? '') + event.delta.text - if (event.delta.signature) accumulatedReasoning.signature = event.delta.signature - if (event.delta.redactedContent) accumulatedReasoning.redactedContent = event.delta.redactedContent - break + case 'modelContentBlockDeltaEvent': { + const delta = event.delta + if (delta.type === 'textDelta') { + accumulatedText += delta.text + } else if (delta.type === 'toolUseInputDelta') { + accumulatedToolInput += delta.input + } else if (delta.type === 'reasoningContentDelta') { + if (delta.text) accumulatedReasoning.text = (accumulatedReasoning.text ?? '') + delta.text + if (delta.signature) accumulatedReasoning.signature = delta.signature + if (delta.redactedContent) accumulatedReasoning.redactedContent = delta.redactedContent + } else if (delta.type === 'citationsContentDelta') { + accumulatedCitationsList.push(...delta.citations) + accumulatedCitationsContent.push(...delta.content) + hasCitations = true } break + } case 'modelContentBlockStopEvent': { // Finalize and emit complete ContentBlock @@ -265,6 +275,12 @@ export abstract class Model { ...accumulatedReasoning, }) accumulatedReasoning = {} // Reset after creating reasoning block + } else if (hasCitations) { + block = new CitationsBlock({ + citations: accumulatedCitationsList, + content: accumulatedCitationsContent, + }) + hasCitations = false } else { block = new TextBlock(accumulatedText) } diff --git a/src/models/streaming.ts b/src/models/streaming.ts index 6b67e6e6..c908f88e 100644 --- a/src/models/streaming.ts +++ b/src/models/streaming.ts @@ -1,5 +1,6 @@ import type { Role, StopReason } from '../types/messages.js' import type { JSONValue } from '../types/json.js' +import type { Citation, CitationGeneratedContent } from '../types/citations.js' /** * ModelStreamEvent types for Model interactions. @@ -323,7 +324,7 @@ export interface ToolUseStart { * * This is a discriminated union for type-safe delta handling. */ -export type ContentBlockDelta = TextDelta | ToolUseInputDelta | ReasoningContentDelta +export type ContentBlockDelta = TextDelta | ToolUseInputDelta | ReasoningContentDelta | CitationsContentDelta /** * Text delta within a content block. @@ -383,6 +384,27 @@ export interface ReasoningContentDelta { redactedContent?: Uint8Array } +/** + * Citations content delta within a content block. + * Represents a citations content block from the model. + */ +export interface CitationsContentDelta { + /** + * Discriminator for citations content delta. + */ + type: 'citationsContentDelta' + + /** + * Array of citations linking generated content to source locations. + */ + citations: Citation[] + + /** + * The generated content associated with these citations. + */ + content: CitationGeneratedContent[] +} + /** * Token usage statistics for a model invocation. * Tracks input, output, and total tokens, plus cache-related metrics. diff --git a/src/types/__tests__/citations.test.ts b/src/types/__tests__/citations.test.ts new file mode 100644 index 00000000..5428ea4e --- /dev/null +++ b/src/types/__tests__/citations.test.ts @@ -0,0 +1,162 @@ +import { describe, expect, it } from 'vitest' +import { CitationsBlock, type CitationsBlockData } from '../citations.js' + +describe('CitationsBlock', () => { + const documentCharData: CitationsBlockData = { + citations: [ + { + location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, + sourceContent: [{ text: 'source text from document' }], + title: 'Test Document', + }, + ], + content: [{ text: 'generated text with citation' }], + } + + it('creates block with correct type discriminator', () => { + const block = new CitationsBlock(documentCharData) + expect(block.type).toBe('citationsBlock') + }) + + it('stores citations and content', () => { + const block = new CitationsBlock(documentCharData) + expect(block.citations).toStrictEqual(documentCharData.citations) + expect(block.content).toStrictEqual(documentCharData.content) + }) + + describe('toJSON/fromJSON round-trips', () => { + it('round-trips with documentChar location', () => { + const original = new CitationsBlock(documentCharData) + const restored = CitationsBlock.fromJSON(original.toJSON()) + expect(restored).toEqual(original) + }) + + it('round-trips with documentPage location', () => { + const data: CitationsBlockData = { + citations: [ + { + location: { documentPage: { documentIndex: 1, start: 3, end: 7 } }, + sourceContent: [{ text: 'page content' }], + }, + ], + content: [{ text: 'generated from pages' }], + } + const original = new CitationsBlock(data) + const restored = CitationsBlock.fromJSON(original.toJSON()) + expect(restored).toEqual(original) + }) + + it('round-trips with documentChunk location', () => { + const data: CitationsBlockData = { + citations: [ + { + location: { documentChunk: { documentIndex: 0, start: 0, end: 2 } }, + sourceContent: [{ text: 'chunk content' }], + }, + ], + content: [{ text: 'generated from chunks' }], + } + const original = new CitationsBlock(data) + const restored = CitationsBlock.fromJSON(original.toJSON()) + expect(restored).toEqual(original) + }) + + it('round-trips with searchResult location', () => { + const data: CitationsBlockData = { + citations: [ + { + location: { searchResult: { searchResultIndex: 2, start: 0, end: 100 } }, + sourceContent: [{ text: 'search result content' }], + }, + ], + content: [{ text: 'generated from search' }], + } + const original = new CitationsBlock(data) + const restored = CitationsBlock.fromJSON(original.toJSON()) + expect(restored).toEqual(original) + }) + + it('round-trips with web location', () => { + const data: CitationsBlockData = { + citations: [ + { + location: { web: { url: 'https://example.com/article' } }, + sourceContent: [{ text: 'web content' }], + title: 'Example Article', + }, + ], + content: [{ text: 'generated from web' }], + } + const original = new CitationsBlock(data) + const restored = CitationsBlock.fromJSON(original.toJSON()) + expect(restored).toEqual(original) + }) + }) + + it('handles optional title field', () => { + const withTitle = new CitationsBlock(documentCharData) + expect(withTitle.citations[0]!.title).toBe('Test Document') + + const withoutTitle = new CitationsBlock({ + citations: [ + { + location: { documentChar: { documentIndex: 0, start: 0, end: 10 } }, + sourceContent: [{ text: 'source' }], + }, + ], + content: [{ text: 'generated' }], + }) + expect(withoutTitle.citations[0]!.title).toBeUndefined() + }) + + it('handles empty arrays', () => { + const data: CitationsBlockData = { + citations: [], + content: [], + } + const block = new CitationsBlock(data) + expect(block.citations).toStrictEqual([]) + expect(block.content).toStrictEqual([]) + + const restored = CitationsBlock.fromJSON(block.toJSON()) + expect(restored).toEqual(block) + }) + + it('toJSON returns wrapped format', () => { + const block = new CitationsBlock(documentCharData) + const json = block.toJSON() + expect(json).toStrictEqual({ + citationsContent: { + citations: documentCharData.citations, + content: documentCharData.content, + }, + }) + }) + + it('works with JSON.stringify', () => { + const original = new CitationsBlock(documentCharData) + const jsonString = JSON.stringify(original) + const restored = CitationsBlock.fromJSON(JSON.parse(jsonString)) + expect(restored).toEqual(original) + }) + + it('handles multiple citations and content blocks', () => { + const data: CitationsBlockData = { + citations: [ + { + location: { documentChar: { documentIndex: 0, start: 0, end: 50 } }, + sourceContent: [{ text: 'first source' }], + title: 'Doc 1', + }, + { + location: { documentPage: { documentIndex: 1, start: 1, end: 3 } }, + sourceContent: [{ text: 'second source' }, { text: 'additional source' }], + }, + ], + content: [{ text: 'first generated' }, { text: 'second generated' }], + } + const original = new CitationsBlock(data) + const restored = CitationsBlock.fromJSON(original.toJSON()) + expect(restored).toEqual(original) + }) +}) diff --git a/src/types/__tests__/messages.test.ts b/src/types/__tests__/messages.test.ts index 15210cbc..9ad75f3e 100644 --- a/src/types/__tests__/messages.test.ts +++ b/src/types/__tests__/messages.test.ts @@ -14,6 +14,7 @@ import { systemPromptToData, } from '../messages.js' import { ImageBlock, VideoBlock, DocumentBlock, encodeBase64 } from '../media.js' +import { CitationsBlock } from '../citations.js' describe('Message', () => { test('creates message with role and content', () => { @@ -281,6 +282,30 @@ describe('Message.fromMessageData', () => { expect(message.content[0]!.type).toBe('documentBlock') }) + it('converts citations content block data to CitationsBlock', () => { + const messageData: MessageData = { + role: 'assistant', + content: [ + { + citationsContent: { + citations: [ + { + location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, + sourceContent: [{ text: 'source text' }], + title: 'Test Doc', + }, + ], + content: [{ text: 'generated text' }], + }, + }, + ], + } + const message = Message.fromMessageData(messageData) + expect(message.content).toHaveLength(1) + expect(message.content[0]).toBeInstanceOf(CitationsBlock) + expect(message.content[0]!.type).toBe('citationsBlock') + }) + it('converts multiple content blocks', () => { const messageData: MessageData = { role: 'user', @@ -532,6 +557,7 @@ describe('toJSON/fromJSON round-trips', () => { ['Message with text content', () => new Message({ role: 'user', content: [new TextBlock('Hello')] })], ['Message with multiple content blocks', () => new Message({ role: 'assistant', content: [new TextBlock('Here is the result'), new ToolUseBlock({ name: 'test-tool', toolUseId: '123', input: { key: 'value' } })] })], ['Message with image content', () => new Message({ role: 'user', content: [new TextBlock('Check this image'), new ImageBlock({ format: 'png', source: { bytes: new Uint8Array([1, 2, 3]) } })] })], + ['CitationsBlock', () => new CitationsBlock({ citations: [{ location: { documentChar: { documentIndex: 0, start: 0, end: 10 } }, sourceContent: [{ text: 'source' }] }], content: [{ text: 'generated' }] })], ] as const it.each(roundTripCases)('%s', (_name, createBlock) => { diff --git a/src/types/citations.ts b/src/types/citations.ts new file mode 100644 index 00000000..710aa301 --- /dev/null +++ b/src/types/citations.ts @@ -0,0 +1,214 @@ +import type { JSONSerializable } from './json.js' + +/** + * Citation types for document citation content blocks. + * + * Citations are returned by models (particularly Bedrock) when document citations + * are enabled. They are output-only blocks that appear in conversation history. + */ + +/** + * Location referencing character positions within a document. + */ +export interface DocumentCharLocation { + /** + * Index of the source document. + */ + documentIndex: number + + /** + * Start character position. + */ + start: number + + /** + * End character position. + */ + end: number +} + +/** + * Location referencing page positions within a document. + */ +export interface DocumentPageLocation { + /** + * Index of the source document. + */ + documentIndex: number + + /** + * Start page number. + */ + start: number + + /** + * End page number. + */ + end: number +} + +/** + * Location referencing chunk positions within a document. + */ +export interface DocumentChunkLocation { + /** + * Index of the source document. + */ + documentIndex: number + + /** + * Start chunk index. + */ + start: number + + /** + * End chunk index. + */ + end: number +} + +/** + * Location referencing a search result. + */ +export interface SearchResultLocation { + /** + * Index of the search result. + */ + searchResultIndex: number + + /** + * Start position within the search result. + */ + start: number + + /** + * End position within the search result. + */ + end: number +} + +/** + * Location referencing a web URL. + */ +export interface WebLocation { + /** + * The URL of the web source. + */ + url: string +} + +/** + * Discriminated union of citation location types. + * Each variant uses a unique object key to identify the location type. + */ +export type CitationLocation = + | { documentChar: DocumentCharLocation } + | { documentPage: DocumentPageLocation } + | { documentChunk: DocumentChunkLocation } + | { searchResult: SearchResultLocation } + | { web: WebLocation } + +/** + * Source content referenced by a citation. + */ +export interface CitationSourceContent { + /** + * The text content from the source. + */ + text: string +} + +/** + * Generated content associated with a citation. + */ +export interface CitationGeneratedContent { + /** + * The generated text content. + */ + text: string +} + +/** + * A single citation linking generated content to a source location. + */ +export interface Citation { + /** + * The location of the cited source. + */ + location: CitationLocation + + /** + * The source content referenced by this citation. + */ + sourceContent: CitationSourceContent[] + + /** + * Optional title of the cited source. + */ + title?: string +} + +/** + * Data for a citations content block. + */ +export interface CitationsBlockData { + /** + * Array of citations linking generated content to source locations. + */ + citations: Citation[] + + /** + * The generated content associated with these citations. + */ + content: CitationGeneratedContent[] +} + +/** + * Citations content block within a message. + * Returned by models when document citations are enabled. + * This is an output-only block — users do not construct these directly. + */ +export class CitationsBlock implements CitationsBlockData, JSONSerializable<{ citationsContent: CitationsBlockData }> { + /** + * Discriminator for citations content. + */ + readonly type = 'citationsBlock' as const + + /** + * Array of citations linking generated content to source locations. + */ + readonly citations: Citation[] + + /** + * The generated content associated with these citations. + */ + readonly content: CitationGeneratedContent[] + + constructor(data: CitationsBlockData) { + this.citations = data.citations + this.content = data.content + } + + /** + * Serializes the CitationsBlock to a JSON-compatible ContentBlockData object. + * Called automatically by JSON.stringify(). + */ + toJSON(): { citationsContent: CitationsBlockData } { + return { + citationsContent: { + citations: this.citations, + content: this.content, + }, + } + } + + /** + * Creates a CitationsBlock instance from its wrapped data format. + * + * @param data - Wrapped CitationsBlockData to deserialize + * @returns CitationsBlock instance + */ + static fromJSON(data: { citationsContent: CitationsBlockData }): CitationsBlock { + return new CitationsBlock(data.citationsContent) + } +} diff --git a/src/types/messages.ts b/src/types/messages.ts index cf7e3621..b438c6cf 100644 --- a/src/types/messages.ts +++ b/src/types/messages.ts @@ -2,6 +2,8 @@ import type { JSONValue, Serialized, MaybeSerializedInput, JSONSerializable } fr import { omitUndefined } from './json.js' import type { ImageBlockData, VideoBlockData, DocumentBlockData } from './media.js' import { ImageBlock, VideoBlock, DocumentBlock, encodeBase64, decodeBase64 } from './media.js' +import type { CitationsBlockData } from './citations.js' +import { CitationsBlock } from './citations.js' /** * Message types and content blocks for conversational AI interactions. @@ -115,6 +117,7 @@ export type ContentBlockData = | { image: ImageBlockData } | { video: VideoBlockData } | { document: DocumentBlockData } + | { citationsContent: CitationsBlockData } export type ContentBlock = | TextBlock @@ -126,6 +129,7 @@ export type ContentBlock = | ImageBlock | VideoBlock | DocumentBlock + | CitationsBlock /** * Data for a text block. @@ -875,6 +879,8 @@ export function contentBlockFromData(data: ContentBlockData): ContentBlock { return VideoBlock.fromJSON(data) } else if ('document' in data) { return DocumentBlock.fromJSON(data) + } else if ('citationsContent' in data) { + return CitationsBlock.fromJSON(data) } else { throw new Error('Unknown ContentBlockData type') } From e6283f611492132203b7d8c4415a63af83ee335c Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Tue, 24 Feb 2026 10:43:04 -0500 Subject: [PATCH 02/13] fix: use Serialized<> wrapper in CitationsBlock to follow convention --- src/types/citations.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/types/citations.ts b/src/types/citations.ts index 710aa301..d5ade9a3 100644 --- a/src/types/citations.ts +++ b/src/types/citations.ts @@ -1,4 +1,4 @@ -import type { JSONSerializable } from './json.js' +import type { JSONSerializable, Serialized } from './json.js' /** * Citation types for document citation content blocks. @@ -168,7 +168,9 @@ export interface CitationsBlockData { * Returned by models when document citations are enabled. * This is an output-only block — users do not construct these directly. */ -export class CitationsBlock implements CitationsBlockData, JSONSerializable<{ citationsContent: CitationsBlockData }> { +export class CitationsBlock + implements CitationsBlockData, JSONSerializable<{ citationsContent: Serialized }> +{ /** * Discriminator for citations content. */ @@ -193,7 +195,7 @@ export class CitationsBlock implements CitationsBlockData, JSONSerializable<{ ci * Serializes the CitationsBlock to a JSON-compatible ContentBlockData object. * Called automatically by JSON.stringify(). */ - toJSON(): { citationsContent: CitationsBlockData } { + toJSON(): { citationsContent: Serialized } { return { citationsContent: { citations: this.citations, @@ -208,7 +210,7 @@ export class CitationsBlock implements CitationsBlockData, JSONSerializable<{ ci * @param data - Wrapped CitationsBlockData to deserialize * @returns CitationsBlock instance */ - static fromJSON(data: { citationsContent: CitationsBlockData }): CitationsBlock { + static fromJSON(data: { citationsContent: Serialized }): CitationsBlock { return new CitationsBlock(data.citationsContent) } } From f8cd8bc2e407c3c7f605e51dc606579cd71b2843 Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Tue, 24 Feb 2026 12:42:33 -0500 Subject: [PATCH 03/13] feat: filter and send citations back to Bedrock for Python SDK parity Bedrock's _formatContentBlock now filters CitationsBlock fields and sends them back in conversation history, matching the Python SDK behavior. Also adds citations feature flag to ProviderFeatures for integration test coverage. --- src/models/__tests__/bedrock.test.ts | 109 +++++++++++++++++++++ src/models/bedrock.ts | 23 ++++- test/integ/__fixtures__/model-providers.ts | 5 + 3 files changed, 134 insertions(+), 3 deletions(-) diff --git a/src/models/__tests__/bedrock.test.ts b/src/models/__tests__/bedrock.test.ts index a0d160c6..bcab5d12 100644 --- a/src/models/__tests__/bedrock.test.ts +++ b/src/models/__tests__/bedrock.test.ts @@ -6,6 +6,7 @@ import { ContextWindowOverflowError, ModelThrottledError } from '../../errors.js import { Message, ReasoningBlock, ToolUseBlock, ToolResultBlock, JsonBlock } from '../../types/messages.js' import type { SystemContentBlock } from '../../types/messages.js' import { TextBlock, GuardContentBlock, CachePointBlock } from '../../types/messages.js' +import { CitationsBlock } from '../../types/citations.js' import type { StreamOptions } from '../model.js' import { collectIterator } from '../../__fixtures__/model-test-helpers.js' @@ -1539,6 +1540,114 @@ describe('BedrockModel', () => { }) }) + describe('citations content block formatting', () => { + const mockConverseStreamCommand = vi.mocked(ConverseStreamCommand) + + it('formats citations block with filtered fields in request', async () => { + const provider = new BedrockModel() + const messages = [ + new Message({ + role: 'assistant', + content: [ + new CitationsBlock({ + citations: [ + { + location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, + sourceContent: [{ text: 'source text' }], + title: 'Test Doc', + }, + ], + content: [{ text: 'generated text' }], + }), + ], + }), + new Message({ + role: 'user', + content: [new TextBlock('Follow up')], + }), + ] + + collectIterator(provider.stream(messages)) + + expect(mockConverseStreamCommand).toHaveBeenLastCalledWith( + expect.objectContaining({ + messages: [ + { + role: 'assistant', + content: [ + { + citationsContent: { + citations: [ + { + location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, + sourceContent: [{ text: 'source text' }], + title: 'Test Doc', + }, + ], + content: [{ text: 'generated text' }], + }, + }, + ], + }, + { + role: 'user', + content: [{ text: 'Follow up' }], + }, + ], + }) + ) + }) + + it('formats citations block without optional title', async () => { + const provider = new BedrockModel() + const messages = [ + new Message({ + role: 'assistant', + content: [ + new CitationsBlock({ + citations: [ + { + location: { web: { url: 'https://example.com' } }, + sourceContent: [{ text: 'web source' }], + }, + ], + content: [{ text: 'cited text' }], + }), + ], + }), + new Message({ + role: 'user', + content: [new TextBlock('Thanks')], + }), + ] + + collectIterator(provider.stream(messages)) + + expect(mockConverseStreamCommand).toHaveBeenLastCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: 'assistant', + content: [ + { + citationsContent: { + citations: [ + { + location: { web: { url: 'https://example.com' } }, + sourceContent: [{ text: 'web source' }], + }, + ], + content: [{ text: 'cited text' }], + }, + }, + ], + }), + ]), + }) + ) + }) + }) + describe('includeToolResultStatus configuration', async () => { const mockConverseStreamCommand = vi.mocked(ConverseStreamCommand) diff --git a/src/models/bedrock.ts b/src/models/bedrock.ts index 5436d6c8..bc2dd4cd 100644 --- a/src/models/bedrock.ts +++ b/src/models/bedrock.ts @@ -633,9 +633,26 @@ export class BedrockModel extends Model { }, } - case 'citationsBlock': - // Citations are output-only blocks, not sent back to models - return undefined + case 'citationsBlock': { + const filteredCitations = block.citations.map((citation) => { + const filtered: Record = {} + if (citation.location) filtered.location = citation.location + if (citation.sourceContent) { + const filteredSource = citation.sourceContent.filter((sc) => sc.text).map((sc) => ({ text: sc.text })) + if (filteredSource.length > 0) filtered.sourceContent = filteredSource + } + if (citation.title) filtered.title = citation.title + return filtered + }) + const filteredContent = block.content.filter((gc) => gc.text).map((gc) => ({ text: gc.text })) + + return { + citationsContent: { + citations: filteredCitations, + ...(filteredContent.length > 0 && { content: filteredContent }), + }, + } + } case 'guardContentBlock': { if (block.text) { diff --git a/test/integ/__fixtures__/model-providers.ts b/test/integ/__fixtures__/model-providers.ts index 20ed42d9..56e5292a 100644 --- a/test/integ/__fixtures__/model-providers.ts +++ b/test/integ/__fixtures__/model-providers.ts @@ -22,6 +22,7 @@ export interface ProviderFeatures { images: boolean documents: boolean video: boolean + citations: boolean } export const bedrock = { @@ -34,6 +35,7 @@ export const bedrock = { images: true, documents: true, video: true, + citations: true, } satisfies ProviderFeatures, models: { default: {}, @@ -68,6 +70,7 @@ export const openai = { images: true, documents: true, video: false, + citations: false, } satisfies ProviderFeatures, models: { default: {}, @@ -100,6 +103,7 @@ export const anthropic = { images: true, documents: true, video: false, + citations: false, } satisfies ProviderFeatures, models: { default: {}, @@ -139,6 +143,7 @@ export const gemini = { images: true, documents: true, video: true, + citations: false, } satisfies ProviderFeatures, models: { default: {}, From 8803976c196a6407ed91ec3904383eed80a2156d Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Tue, 24 Feb 2026 12:55:38 -0500 Subject: [PATCH 04/13] test: add integration test for citations multi-turn with Bedrock --- test/integ/agent.test.ts | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/test/integ/agent.test.ts b/test/integ/agent.test.ts index b38e5f5c..b1ff3482 100644 --- a/test/integ/agent.test.ts +++ b/test/integ/agent.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from 'vitest' import { Agent, + CitationsBlock, DocumentBlock, ImageBlock, Message, @@ -262,6 +263,42 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode expect(textContent?.text).toMatch(/yellow/i) }) + it.skipIf(!supports.citations)('returns citations from document and preserves them in multi-turn', async () => { + const docBlock = new DocumentBlock({ + name: 'test-document', + format: 'txt', + source: { text: 'The capital of France is Paris. It is known as the City of Light.' }, + citations: { enabled: true }, + }) + + const agent = new Agent({ + model: createModel(), + printer: false, + }) + + // First turn: send document with citations enabled + const result = await agent.invoke([ + docBlock, + new TextBlock('What is the capital of France according to the document? Answer briefly.'), + ]) + + expect(result.stopReason).toBe('endTurn') + + // Verify citations block is present in the response + const citationsBlock = result.lastMessage.content.find( + (block): block is CitationsBlock => block.type === 'citationsBlock' + ) + expect(citationsBlock).toBeDefined() + expect(citationsBlock!.citations.length).toBeGreaterThan(0) + + // Second turn: verify conversation continues with citations in history + const followUp = await agent.invoke('What else does the document say about that city?') + + expect(followUp.stopReason).toBe('endTurn') + expect(followUp.lastMessage.role).toBe('assistant') + expect(followUp.lastMessage.content.length).toBeGreaterThan(0) + }) + describe.skipIf(!supports.images)('multimodal input', () => { it('accepts ContentBlock[] input', async () => { const agent = new Agent({ From c4808fcc97c0beabd67d39faee198ae528f25f78 Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Tue, 24 Feb 2026 13:40:38 -0500 Subject: [PATCH 05/13] fix: correct CitationLocation union key and expand citation integ tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix searchResult → searchResultLocation to match Bedrock API wire format - Add missing domain field to WebLocation - Add missing source field to Citation - Pass through source field in Bedrock citation filter - Test both streaming and non-streaming Bedrock paths for citations - Test documentChar (text) and documentPage (PDF) location variants - Verify citationsContentDelta events in streaming --- src/models/bedrock.ts | 1 + src/types/__tests__/citations.test.ts | 4 +- src/types/citations.ts | 12 ++- test/integ/agent.test.ts | 126 +++++++++++++++++++++----- 4 files changed, 118 insertions(+), 25 deletions(-) diff --git a/src/models/bedrock.ts b/src/models/bedrock.ts index bc2dd4cd..6137cae9 100644 --- a/src/models/bedrock.ts +++ b/src/models/bedrock.ts @@ -641,6 +641,7 @@ export class BedrockModel extends Model { const filteredSource = citation.sourceContent.filter((sc) => sc.text).map((sc) => ({ text: sc.text })) if (filteredSource.length > 0) filtered.sourceContent = filteredSource } + if (citation.source) filtered.source = citation.source if (citation.title) filtered.title = citation.title return filtered }) diff --git a/src/types/__tests__/citations.test.ts b/src/types/__tests__/citations.test.ts index 5428ea4e..0caeb5ae 100644 --- a/src/types/__tests__/citations.test.ts +++ b/src/types/__tests__/citations.test.ts @@ -61,11 +61,11 @@ describe('CitationsBlock', () => { expect(restored).toEqual(original) }) - it('round-trips with searchResult location', () => { + it('round-trips with searchResultLocation location', () => { const data: CitationsBlockData = { citations: [ { - location: { searchResult: { searchResultIndex: 2, start: 0, end: 100 } }, + location: { searchResultLocation: { searchResultIndex: 2, start: 0, end: 100 } }, sourceContent: [{ text: 'search result content' }], }, ], diff --git a/src/types/citations.ts b/src/types/citations.ts index d5ade9a3..847a3a11 100644 --- a/src/types/citations.ts +++ b/src/types/citations.ts @@ -95,6 +95,11 @@ export interface WebLocation { * The URL of the web source. */ url: string + + /** + * The domain of the web source. + */ + domain?: string } /** @@ -105,7 +110,7 @@ export type CitationLocation = | { documentChar: DocumentCharLocation } | { documentPage: DocumentPageLocation } | { documentChunk: DocumentChunkLocation } - | { searchResult: SearchResultLocation } + | { searchResultLocation: SearchResultLocation } | { web: WebLocation } /** @@ -137,6 +142,11 @@ export interface Citation { */ location: CitationLocation + /** + * The source identifier string. + */ + source?: string + /** * The source content referenced by this citation. */ diff --git a/test/integ/agent.test.ts b/test/integ/agent.test.ts index b1ff3482..aa55f220 100644 --- a/test/integ/agent.test.ts +++ b/test/integ/agent.test.ts @@ -263,40 +263,122 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode expect(textContent?.text).toMatch(/yellow/i) }) - it.skipIf(!supports.citations)('returns citations from document and preserves them in multi-turn', async () => { - const docBlock = new DocumentBlock({ + describe.skipIf(!supports.citations)('Citations', () => { + const textDocBlock = new DocumentBlock({ name: 'test-document', format: 'txt', source: { text: 'The capital of France is Paris. It is known as the City of Light.' }, citations: { enabled: true }, }) - const agent = new Agent({ - model: createModel(), - printer: false, + const textDocPrompt = new TextBlock('What is the capital of France according to the document? Answer briefly.') + + describe.each([ + { label: 'streaming', modelOptions: {} }, + { label: 'non-streaming', modelOptions: { stream: false } }, + ])('$label', ({ modelOptions }) => { + it('returns documentChar citations from text document and preserves them in multi-turn', async () => { + const agent = new Agent({ + model: createModel(modelOptions), + printer: false, + }) + + const result = await agent.invoke([textDocBlock, textDocPrompt]) + + expect(result.stopReason).toBe('endTurn') + + const citationsBlock = result.lastMessage.content.find( + (block): block is CitationsBlock => block.type === 'citationsBlock' + ) + expect(citationsBlock).toBeDefined() + expect(citationsBlock!.citations.length).toBeGreaterThan(0) + expect(citationsBlock!.content.length).toBeGreaterThan(0) + expect(citationsBlock!.content[0]!.text).toBeDefined() + + const citation = citationsBlock!.citations[0]! + expect(citation.location).toBeDefined() + expect('documentChar' in citation.location).toBe(true) + expect(citation.sourceContent.length).toBeGreaterThan(0) + expect(citation.sourceContent[0]!.text).toBeDefined() + + // Second turn: verify citations survive in conversation history + const followUp = await agent.invoke('What else does the document say about that city?') + expect(followUp.stopReason).toBe('endTurn') + expect(followUp.lastMessage.role).toBe('assistant') + expect(followUp.lastMessage.content.length).toBeGreaterThan(0) + }) + + it('returns documentPage citations from PDF document and preserves them in multi-turn', async () => { + const pdfBytes = await loadFixture(letterPdfUrl) + + const agent = new Agent({ + model: createModel(modelOptions), + printer: false, + }) + + const result = await agent.invoke([ + new DocumentBlock({ + name: 'letter', + format: 'pdf', + source: { bytes: pdfBytes }, + citations: { enabled: true }, + }), + new TextBlock('Summarize this document briefly.'), + ]) + + expect(result.stopReason).toBe('endTurn') + + const citationsBlock = result.lastMessage.content.find( + (block): block is CitationsBlock => block.type === 'citationsBlock' + ) + expect(citationsBlock).toBeDefined() + expect(citationsBlock!.citations.length).toBeGreaterThan(0) + expect(citationsBlock!.content.length).toBeGreaterThan(0) + expect(citationsBlock!.content[0]!.text).toBeDefined() + + const citation = citationsBlock!.citations[0]! + expect(citation.location).toBeDefined() + expect('documentPage' in citation.location).toBe(true) + expect(citation.sourceContent.length).toBeGreaterThan(0) + expect(citation.sourceContent[0]!.text).toBeDefined() + + // Second turn: verify citations survive in conversation history + const followUp = await agent.invoke('What else can you tell me about this document?') + expect(followUp.stopReason).toBe('endTurn') + expect(followUp.lastMessage.role).toBe('assistant') + expect(followUp.lastMessage.content.length).toBeGreaterThan(0) + }) }) - // First turn: send document with citations enabled - const result = await agent.invoke([ - docBlock, - new TextBlock('What is the capital of France according to the document? Answer briefly.'), - ]) + it('emits citationsContentDelta events during streaming', async () => { + const agent = new Agent({ + model: createModel(), + printer: false, + }) - expect(result.stopReason).toBe('endTurn') + const { items, result } = await collectGenerator(agent.stream([textDocBlock, textDocPrompt])) - // Verify citations block is present in the response - const citationsBlock = result.lastMessage.content.find( - (block): block is CitationsBlock => block.type === 'citationsBlock' - ) - expect(citationsBlock).toBeDefined() - expect(citationsBlock!.citations.length).toBeGreaterThan(0) + expect(result.stopReason).toBe('endTurn') + + // Verify citationsContentDelta events were emitted during streaming + const citationDeltas = items.filter( + (item) => + item.type === 'modelStreamUpdateEvent' && + item.event.type === 'modelContentBlockDeltaEvent' && + item.event.delta.type === 'citationsContentDelta' + ) + expect(citationDeltas.length).toBeGreaterThan(0) - // Second turn: verify conversation continues with citations in history - const followUp = await agent.invoke('What else does the document say about that city?') + // Verify the aggregated result also contains the CitationsBlock + const citationsBlock = result.lastMessage.content.find( + (block): block is CitationsBlock => block.type === 'citationsBlock' + ) + expect(citationsBlock).toBeDefined() + expect(citationsBlock!.citations.length).toBeGreaterThan(0) + }) - expect(followUp.stopReason).toBe('endTurn') - expect(followUp.lastMessage.role).toBe('assistant') - expect(followUp.lastMessage.content.length).toBeGreaterThan(0) + // Note: documentChunk, searchResultLocation, and web citation location variants + // require RAG/search integration and cannot be triggered from document uploads alone. }) describe.skipIf(!supports.images)('multimodal input', () => { From 515a5cef94b96b3fec3ccdf9df190d6626b5b77e Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Wed, 25 Feb 2026 11:53:50 -0500 Subject: [PATCH 06/13] fix: model CitationSourceContent and CitationGeneratedContent as union types Use type aliases instead of interfaces to follow the established pattern for Bedrock API union types (like DocumentSourceData). This allows non-breaking expansion when new variants are added in the future. --- src/types/citations.ts | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/types/citations.ts b/src/types/citations.ts index 847a3a11..3eb062ff 100644 --- a/src/types/citations.ts +++ b/src/types/citations.ts @@ -115,23 +115,15 @@ export type CitationLocation = /** * Source content referenced by a citation. + * Modeled as a union type for future extensibility (Bedrock UNION type). */ -export interface CitationSourceContent { - /** - * The text content from the source. - */ - text: string -} +export type CitationSourceContent = { text: string } /** * Generated content associated with a citation. + * Modeled as a union type for future extensibility (Bedrock UNION type). */ -export interface CitationGeneratedContent { - /** - * The generated text content. - */ - text: string -} +export type CitationGeneratedContent = { text: string } /** * A single citation linking generated content to a source location. From dbb73f03a5af7fa20aeca65d5ecbfb2d4af3fbea Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Wed, 25 Feb 2026 11:59:50 -0500 Subject: [PATCH 07/13] docs: document API union type convention in AGENTS.md --- AGENTS.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 08097a77..48b2792d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -581,6 +581,47 @@ export class CachePointBlock { **Rationale**: This consistent naming makes discriminated unions predictable and improves code readability. Developers can easily understand the relationship between the type value and the class. +### API Union Types (Bedrock Pattern) + +When the upstream API (e.g., Bedrock) defines a type as a **UNION** ("only one member can be specified"), model it as a TypeScript `type` union with each variant's field **required** — not an `interface` with optional fields. This allows non-breaking expansion when new variants are added. + +The Bedrock API marks all fields in union types as "Not Required" as a mechanism for future extensibility. In TypeScript, encode the mutual exclusivity using `|` with each variant having its field required. The "not required" from the API docs means "this field won't be present if a different variant is active." + +```typescript +// ✅ Correct: type union — each variant has its field required +// Adding a new variant later (e.g., | { image: ImageData }) is non-breaking +export type CitationSourceContent = { text: string } + +// ✅ Correct: multi-variant union with object-key discrimination +export type DocumentSourceData = + | { bytes: Uint8Array } + | { text: string } + | { content: DocumentContentBlockData[] } + | { s3Location: S3LocationData } + +// ✅ Correct: multi-variant union for citation locations +export type CitationLocation = + | { documentChar: DocumentCharLocation } + | { documentPage: DocumentPageLocation } + | { web: WebLocation } + +// ❌ Wrong: interface with optional fields — cannot expand without breaking +export interface CitationSourceContent { + text?: string +} + +// ❌ Wrong: interface with required field — changing to union later is breaking +export interface CitationSourceContent { + text: string +} +``` + +**Key points**: +- Use `type` alias (not `interface`) so it can be expanded to a union later +- Each variant's field is **required** within that variant +- Use object-key discrimination (`'text' in source`) to narrow variants at runtime +- See `DocumentSourceData` in `src/types/media.ts` and `CitationLocation` in `src/types/citations.ts` for reference implementations + ### Error Handling ```typescript From a57a2889fd44f7d5c13adb65179b0d02b7f3bcbc Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Wed, 25 Feb 2026 12:53:44 -0500 Subject: [PATCH 08/13] test: add all CitationLocation variant coverage across unit and integ tests Add comprehensive tests for all 5 CitationLocation union variants (documentChar, documentPage, documentChunk, searchResultLocation, web) to verify round-trip serialization, Bedrock formatting pipeline, and multi-turn conversation history preservation. --- src/models/__tests__/bedrock.test.ts | 70 ++++++++++++++++++++----- src/types/__tests__/citations.test.ts | 66 +++++++++++++++++++++++ test/integ/agent.test.ts | 75 ++++++++++++++++++++++++++- 3 files changed, 195 insertions(+), 16 deletions(-) diff --git a/src/models/__tests__/bedrock.test.ts b/src/models/__tests__/bedrock.test.ts index bcab5d12..76b47032 100644 --- a/src/models/__tests__/bedrock.test.ts +++ b/src/models/__tests__/bedrock.test.ts @@ -1543,21 +1543,43 @@ describe('BedrockModel', () => { describe('citations content block formatting', () => { const mockConverseStreamCommand = vi.mocked(ConverseStreamCommand) - it('formats citations block with filtered fields in request', async () => { + it('preserves all CitationLocation union variants through formatting pipeline', async () => { const provider = new BedrockModel() + const citations = [ + { + location: { documentChar: { documentIndex: 0, start: 150, end: 300 } }, + sourceContent: [{ text: 'char source' }], + title: 'Text Document', + }, + { + location: { documentPage: { documentIndex: 0, start: 2, end: 3 } }, + sourceContent: [{ text: 'page source' }], + title: 'PDF Document', + }, + { + location: { documentChunk: { documentIndex: 1, start: 5, end: 8 } }, + sourceContent: [{ text: 'chunk source' }], + title: 'Chunked Document', + }, + { + location: { searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 } }, + sourceContent: [{ text: 'search source' }], + title: 'Search Result', + }, + { + location: { web: { url: 'https://example.com/doc', domain: 'example.com' } }, + sourceContent: [{ text: 'web source' }], + title: 'Web Page', + }, + ] + const messages = [ new Message({ role: 'assistant', content: [ new CitationsBlock({ - citations: [ - { - location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, - sourceContent: [{ text: 'source text' }], - title: 'Test Doc', - }, - ], - content: [{ text: 'generated text' }], + citations, + content: [{ text: 'generated text with all citation types' }], }), ], }), @@ -1579,12 +1601,32 @@ describe('BedrockModel', () => { citationsContent: { citations: [ { - location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, - sourceContent: [{ text: 'source text' }], - title: 'Test Doc', + location: { documentChar: { documentIndex: 0, start: 150, end: 300 } }, + sourceContent: [{ text: 'char source' }], + title: 'Text Document', + }, + { + location: { documentPage: { documentIndex: 0, start: 2, end: 3 } }, + sourceContent: [{ text: 'page source' }], + title: 'PDF Document', + }, + { + location: { documentChunk: { documentIndex: 1, start: 5, end: 8 } }, + sourceContent: [{ text: 'chunk source' }], + title: 'Chunked Document', + }, + { + location: { searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 } }, + sourceContent: [{ text: 'search source' }], + title: 'Search Result', + }, + { + location: { web: { url: 'https://example.com/doc', domain: 'example.com' } }, + sourceContent: [{ text: 'web source' }], + title: 'Web Page', }, ], - content: [{ text: 'generated text' }], + content: [{ text: 'generated text with all citation types' }], }, }, ], @@ -1598,7 +1640,7 @@ describe('BedrockModel', () => { ) }) - it('formats citations block without optional title', async () => { + it('formats citations block without optional title or source', async () => { const provider = new BedrockModel() const messages = [ new Message({ diff --git a/src/types/__tests__/citations.test.ts b/src/types/__tests__/citations.test.ts index 0caeb5ae..1fd51dfb 100644 --- a/src/types/__tests__/citations.test.ts +++ b/src/types/__tests__/citations.test.ts @@ -159,4 +159,70 @@ describe('CitationsBlock', () => { const restored = CitationsBlock.fromJSON(original.toJSON()) expect(restored).toEqual(original) }) + + it('round-trips all CitationLocation union variants in a single block', () => { + const data: CitationsBlockData = { + citations: [ + { + location: { documentChar: { documentIndex: 0, start: 150, end: 300 } }, + sourceContent: [{ text: 'char source' }], + title: 'Text Document', + }, + { + location: { documentPage: { documentIndex: 0, start: 2, end: 3 } }, + sourceContent: [{ text: 'page source' }], + title: 'PDF Document', + }, + { + location: { documentChunk: { documentIndex: 1, start: 5, end: 8 } }, + sourceContent: [{ text: 'chunk source' }], + title: 'Chunked Document', + }, + { + location: { searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 } }, + sourceContent: [{ text: 'search source' }], + title: 'Search Result', + }, + { + location: { web: { url: 'https://example.com/doc', domain: 'example.com' } }, + sourceContent: [{ text: 'web source' }], + title: 'Web Page', + }, + ], + content: [{ text: 'generated text referencing all sources' }], + } + const original = new CitationsBlock(data) + const json = original.toJSON() + const restored = CitationsBlock.fromJSON(json) + + expect(restored).toEqual(original) + expect(restored.citations).toHaveLength(5) + + // Verify each variant has exactly one wrapper key with inner fields preserved + expect('documentChar' in restored.citations[0]!.location).toBe(true) + expect('documentPage' in restored.citations[1]!.location).toBe(true) + expect('documentChunk' in restored.citations[2]!.location).toBe(true) + expect('searchResultLocation' in restored.citations[3]!.location).toBe(true) + expect('web' in restored.citations[4]!.location).toBe(true) + }) + + it('preserves optional source and domain fields', () => { + const data: CitationsBlockData = { + citations: [ + { + location: { web: { url: 'https://example.com', domain: 'example.com' } }, + source: 'web-source-id', + sourceContent: [{ text: 'web content' }], + title: 'Example', + }, + ], + content: [{ text: 'generated' }], + } + const block = new CitationsBlock(data) + expect(block.citations[0]!.source).toBe('web-source-id') + + const restored = CitationsBlock.fromJSON(block.toJSON()) + expect(restored.citations[0]!.source).toBe('web-source-id') + expect((restored.citations[0]!.location as { web: { url: string; domain: string } }).web.domain).toBe('example.com') + }) }) diff --git a/test/integ/agent.test.ts b/test/integ/agent.test.ts index aa55f220..01b7ded0 100644 --- a/test/integ/agent.test.ts +++ b/test/integ/agent.test.ts @@ -298,6 +298,14 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode const citation = citationsBlock!.citations[0]! expect(citation.location).toBeDefined() expect('documentChar' in citation.location).toBe(true) + + // Verify all inner fields are present (Bedrock docs say "Not Required" but we expect them) + const charLoc = (citation.location as { documentChar: { documentIndex: number; start: number; end: number } }) + .documentChar + expect(typeof charLoc.documentIndex).toBe('number') + expect(typeof charLoc.start).toBe('number') + expect(typeof charLoc.end).toBe('number') + expect(citation.sourceContent.length).toBeGreaterThan(0) expect(citation.sourceContent[0]!.text).toBeDefined() @@ -339,6 +347,14 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode const citation = citationsBlock!.citations[0]! expect(citation.location).toBeDefined() expect('documentPage' in citation.location).toBe(true) + + // Verify all inner fields are present (Bedrock docs say "Not Required" but we expect them) + const pageLoc = (citation.location as { documentPage: { documentIndex: number; start: number; end: number } }) + .documentPage + expect(typeof pageLoc.documentIndex).toBe('number') + expect(typeof pageLoc.start).toBe('number') + expect(typeof pageLoc.end).toBe('number') + expect(citation.sourceContent.length).toBeGreaterThan(0) expect(citation.sourceContent[0]!.text).toBeDefined() @@ -377,8 +393,63 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode expect(citationsBlock!.citations.length).toBeGreaterThan(0) }) - // Note: documentChunk, searchResultLocation, and web citation location variants - // require RAG/search integration and cannot be triggered from document uploads alone. + it('preserves all CitationLocation variants in multi-turn conversation history', async () => { + const agent = new Agent({ + model: createModel(), + printer: false, + }) + + // Seed conversation with an assistant message containing all 5 citation location variants + agent.messages.push( + new Message({ + role: 'user', + content: [new TextBlock('Tell me about these sources.')], + }), + new Message({ + role: 'assistant', + content: [ + new CitationsBlock({ + citations: [ + { + location: { documentChar: { documentIndex: 0, start: 150, end: 300 } }, + sourceContent: [{ text: 'char source content' }], + title: 'Text Document', + }, + { + location: { documentPage: { documentIndex: 0, start: 2, end: 3 } }, + sourceContent: [{ text: 'page source content' }], + title: 'PDF Document', + }, + { + location: { documentChunk: { documentIndex: 1, start: 5, end: 8 } }, + sourceContent: [{ text: 'chunk source content' }], + title: 'Chunked Document', + }, + { + location: { searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 } }, + sourceContent: [{ text: 'search source content' }], + title: 'Search Result', + }, + { + location: { web: { url: 'https://example.com/doc', domain: 'example.com' } }, + sourceContent: [{ text: 'web source content' }], + title: 'Web Page', + }, + ], + content: [{ text: 'Here is information from all five source types.' }], + }), + new TextBlock('I found information from multiple source types.'), + ], + }) + ) + + // Follow-up turn forces Bedrock to accept all 5 variants in conversation history + const result = await agent.invoke('Can you summarize what you told me?') + + expect(result.stopReason).toBe('endTurn') + expect(result.lastMessage.role).toBe('assistant') + expect(result.lastMessage.content.length).toBeGreaterThan(0) + }) }) describe.skipIf(!supports.images)('multimodal input', () => { From e4f66abb9d10190680bf374ef78b24993d778d3f Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Wed, 25 Feb 2026 13:49:58 -0500 Subject: [PATCH 09/13] refactor: use type-field discriminator for CitationLocation union Replace object-key discrimination with a `type` field on CitationLocation to match the ContentBlockDelta pattern and provide better ergonomics for consumers (switch/case instead of `in` checks). Bedrock's wire format mapping is now handled in bedrock.ts via _mapBedrockCitationLocation and _mapCitationLocationToBedrock, decoupling the SDK types from the provider. --- src/index.ts | 5 - src/models/__tests__/bedrock.test.ts | 42 ++++-- src/models/bedrock.ts | 99 +++++++++++- src/types/__tests__/citations.test.ts | 48 +++--- src/types/__tests__/messages.test.ts | 4 +- src/types/citations.ts | 210 +++++++++++++------------- test/integ/agent.test.ts | 34 ++--- 7 files changed, 270 insertions(+), 172 deletions(-) diff --git a/src/index.ts b/src/index.ts index 24d65f5a..31d13bd5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -73,11 +73,6 @@ export type { CitationLocation, CitationSourceContent, CitationGeneratedContent, - DocumentCharLocation, - DocumentPageLocation, - DocumentChunkLocation, - SearchResultLocation, - WebLocation, } from './types/citations.js' // Citation class diff --git a/src/models/__tests__/bedrock.test.ts b/src/models/__tests__/bedrock.test.ts index 76b47032..7a51ae5f 100644 --- a/src/models/__tests__/bedrock.test.ts +++ b/src/models/__tests__/bedrock.test.ts @@ -763,7 +763,8 @@ describe('BedrockModel', () => { }) it('yields and validates citationsContent events correctly', async () => { - const citationsData = { + // Bedrock wire format uses object-key discrimination + const bedrockCitationsData = { citations: [ { location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, @@ -782,7 +783,7 @@ describe('BedrockModel', () => { yield { contentBlockStart: {} } yield { contentBlockDelta: { - delta: { citationsContent: citationsData }, + delta: { citationsContent: bedrockCitationsData }, }, } yield { contentBlockStop: {} } @@ -797,7 +798,7 @@ describe('BedrockModel', () => { output: { message: { role: 'assistant', - content: [{ citationsContent: citationsData }], + content: [{ citationsContent: bedrockCitationsData }], }, }, stopReason: 'end_turn', @@ -812,14 +813,21 @@ describe('BedrockModel', () => { const messages = [new Message({ role: 'user', content: [new TextBlock('Cite this.')] })] const events = await collectIterator(provider.stream(messages)) + // SDK events should use type-field discrimination expect(events).toContainEqual({ role: 'assistant', type: 'modelMessageStartEvent' }) expect(events).toContainEqual({ type: 'modelContentBlockStartEvent' }) expect(events).toContainEqual({ type: 'modelContentBlockDeltaEvent', delta: { type: 'citationsContentDelta', - citations: citationsData.citations, - content: citationsData.content, + citations: [ + { + location: { type: 'documentChar', documentIndex: 0, start: 10, end: 50 }, + sourceContent: [{ text: 'source text' }], + title: 'Test Doc', + }, + ], + content: [{ text: 'generated text' }], }, }) expect(events).toContainEqual({ type: 'modelContentBlockStopEvent' }) @@ -1543,31 +1551,32 @@ describe('BedrockModel', () => { describe('citations content block formatting', () => { const mockConverseStreamCommand = vi.mocked(ConverseStreamCommand) - it('preserves all CitationLocation union variants through formatting pipeline', async () => { + it('maps SDK CitationLocation types to Bedrock object-key format through formatting pipeline', async () => { const provider = new BedrockModel() - const citations = [ + // SDK format uses type-field discrimination + const sdkCitations = [ { - location: { documentChar: { documentIndex: 0, start: 150, end: 300 } }, + location: { type: 'documentChar' as const, documentIndex: 0, start: 150, end: 300 }, sourceContent: [{ text: 'char source' }], title: 'Text Document', }, { - location: { documentPage: { documentIndex: 0, start: 2, end: 3 } }, + location: { type: 'documentPage' as const, documentIndex: 0, start: 2, end: 3 }, sourceContent: [{ text: 'page source' }], title: 'PDF Document', }, { - location: { documentChunk: { documentIndex: 1, start: 5, end: 8 } }, + location: { type: 'documentChunk' as const, documentIndex: 1, start: 5, end: 8 }, sourceContent: [{ text: 'chunk source' }], title: 'Chunked Document', }, { - location: { searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 } }, + location: { type: 'searchResult' as const, searchResultIndex: 0, start: 25, end: 150 }, sourceContent: [{ text: 'search source' }], title: 'Search Result', }, { - location: { web: { url: 'https://example.com/doc', domain: 'example.com' } }, + location: { type: 'web' as const, url: 'https://example.com/doc', domain: 'example.com' }, sourceContent: [{ text: 'web source' }], title: 'Web Page', }, @@ -1578,7 +1587,7 @@ describe('BedrockModel', () => { role: 'assistant', content: [ new CitationsBlock({ - citations, + citations: sdkCitations, content: [{ text: 'generated text with all citation types' }], }), ], @@ -1591,6 +1600,7 @@ describe('BedrockModel', () => { collectIterator(provider.stream(messages)) + // Bedrock wire format uses object-key discrimination expect(mockConverseStreamCommand).toHaveBeenLastCalledWith( expect.objectContaining({ messages: [ @@ -1616,7 +1626,9 @@ describe('BedrockModel', () => { title: 'Chunked Document', }, { - location: { searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 } }, + location: { + searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 }, + }, sourceContent: [{ text: 'search source' }], title: 'Search Result', }, @@ -1649,7 +1661,7 @@ describe('BedrockModel', () => { new CitationsBlock({ citations: [ { - location: { web: { url: 'https://example.com' } }, + location: { type: 'web', url: 'https://example.com' }, sourceContent: [{ text: 'web source' }], }, ], diff --git a/src/models/bedrock.ts b/src/models/bedrock.ts index 6137cae9..e6e4f19c 100644 --- a/src/models/bedrock.ts +++ b/src/models/bedrock.ts @@ -40,7 +40,7 @@ import { type BaseModelConfig, Model, type StreamOptions } from '../models/model import type { ContentBlock, Message, StopReason, ToolUseBlock } from '../types/messages.js' import type { ImageSource, VideoSource, DocumentSource } from '../types/media.js' import type { CitationsContentDelta, ModelStreamEvent, ReasoningContentDelta, Usage } from '../models/streaming.js' -import type { CitationsBlockData } from '../types/citations.js' +import type { CitationLocation, CitationsBlockData } from '../types/citations.js' import type { JSONValue } from '../types/json.js' import { ContextWindowOverflowError, ModelThrottledError, normalizeError } from '../errors.js' import { ensureDefined } from '../types/validation.js' @@ -636,7 +636,7 @@ export class BedrockModel extends Model { case 'citationsBlock': { const filteredCitations = block.citations.map((citation) => { const filtered: Record = {} - if (citation.location) filtered.location = citation.location + if (citation.location) filtered.location = this._mapCitationLocationToBedrock(citation.location) if (citation.sourceContent) { const filteredSource = citation.sourceContent.filter((sc) => sc.text).map((sc) => ({ text: sc.text })) if (filteredSource.length > 0) filtered.sourceContent = filteredSource @@ -829,10 +829,11 @@ export class BedrockModel extends Model { if (!block) return events.push({ type: 'modelContentBlockStartEvent' }) + const mapped = this._mapBedrockCitationsData(block) const delta: CitationsContentDelta = { type: 'citationsContentDelta', - citations: block.citations, - content: block.content, + citations: mapped.citations, + content: mapped.content, } events.push({ type: 'modelContentBlockDeltaEvent', delta }) events.push({ type: 'modelContentBlockStopEvent' }) @@ -952,10 +953,11 @@ export class BedrockModel extends Model { }, citationsContent: (block: CitationsBlockData): void => { if (!block) return + const mapped = this._mapBedrockCitationsData(block) const delta: CitationsContentDelta = { type: 'citationsContentDelta', - citations: block.citations, - content: block.content, + citations: mapped.citations, + content: mapped.content, } events.push({ type: 'modelContentBlockDeltaEvent', delta }) }, @@ -1093,6 +1095,91 @@ export class BedrockModel extends Model { return mappedStopReason } + + /** + * Maps a Bedrock object-key citation location to the SDK's type-discriminated format. + * + * @param bedrockLocation - Bedrock citation location with object-key discrimination + * @returns SDK CitationLocation with type field discrimination + */ + private _mapBedrockCitationLocation(bedrockLocation: Record): CitationLocation { + if ('documentChar' in bedrockLocation) { + const loc = bedrockLocation.documentChar as { documentIndex: number; start: number; end: number } + return { type: 'documentChar', documentIndex: loc.documentIndex, start: loc.start, end: loc.end } + } + if ('documentPage' in bedrockLocation) { + const loc = bedrockLocation.documentPage as { documentIndex: number; start: number; end: number } + return { type: 'documentPage', documentIndex: loc.documentIndex, start: loc.start, end: loc.end } + } + if ('documentChunk' in bedrockLocation) { + const loc = bedrockLocation.documentChunk as { documentIndex: number; start: number; end: number } + return { type: 'documentChunk', documentIndex: loc.documentIndex, start: loc.start, end: loc.end } + } + if ('searchResultLocation' in bedrockLocation) { + const loc = bedrockLocation.searchResultLocation as { + searchResultIndex: number + start: number + end: number + } + return { + type: 'searchResult', + searchResultIndex: loc.searchResultIndex, + start: loc.start, + end: loc.end, + } + } + if ('web' in bedrockLocation) { + const loc = bedrockLocation.web as { url: string; domain?: string } + return { type: 'web', url: loc.url, ...(loc.domain && { domain: loc.domain }) } + } + logger.warn(`citation_location=<${JSON.stringify(bedrockLocation)}> | unknown citation location type`) + return bedrockLocation as CitationLocation + } + + /** + * Maps Bedrock citation data to SDK Citation objects. + * + * @param bedrockData - Raw Bedrock CitationsBlockData + * @returns CitationsBlockData with SDK-format CitationLocations + */ + private _mapBedrockCitationsData(bedrockData: CitationsBlockData): CitationsBlockData { + return { + citations: bedrockData.citations.map((citation) => ({ + ...citation, + location: this._mapBedrockCitationLocation(citation.location as unknown as Record), + })), + content: bedrockData.content, + } + } + + /** + * Maps an SDK CitationLocation back to Bedrock's object-key format. + * + * @param location - SDK CitationLocation with type field + * @returns Bedrock object-key citation location + */ + private _mapCitationLocationToBedrock(location: CitationLocation): Record { + switch (location.type) { + case 'documentChar': + return { documentChar: { documentIndex: location.documentIndex, start: location.start, end: location.end } } + case 'documentPage': + return { documentPage: { documentIndex: location.documentIndex, start: location.start, end: location.end } } + case 'documentChunk': + return { documentChunk: { documentIndex: location.documentIndex, start: location.start, end: location.end } } + case 'searchResult': + return { + searchResultLocation: { + searchResultIndex: location.searchResultIndex, + start: location.start, + end: location.end, + }, + } + case 'web': + return { web: { url: location.url, ...(location.domain && { domain: location.domain }) } } + default: + return location as unknown as Record + } + } } /** diff --git a/src/types/__tests__/citations.test.ts b/src/types/__tests__/citations.test.ts index 1fd51dfb..04ae4fb3 100644 --- a/src/types/__tests__/citations.test.ts +++ b/src/types/__tests__/citations.test.ts @@ -5,7 +5,7 @@ describe('CitationsBlock', () => { const documentCharData: CitationsBlockData = { citations: [ { - location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, + location: { type: 'documentChar', documentIndex: 0, start: 10, end: 50 }, sourceContent: [{ text: 'source text from document' }], title: 'Test Document', }, @@ -35,7 +35,7 @@ describe('CitationsBlock', () => { const data: CitationsBlockData = { citations: [ { - location: { documentPage: { documentIndex: 1, start: 3, end: 7 } }, + location: { type: 'documentPage', documentIndex: 1, start: 3, end: 7 }, sourceContent: [{ text: 'page content' }], }, ], @@ -50,7 +50,7 @@ describe('CitationsBlock', () => { const data: CitationsBlockData = { citations: [ { - location: { documentChunk: { documentIndex: 0, start: 0, end: 2 } }, + location: { type: 'documentChunk', documentIndex: 0, start: 0, end: 2 }, sourceContent: [{ text: 'chunk content' }], }, ], @@ -61,11 +61,11 @@ describe('CitationsBlock', () => { expect(restored).toEqual(original) }) - it('round-trips with searchResultLocation location', () => { + it('round-trips with searchResult location', () => { const data: CitationsBlockData = { citations: [ { - location: { searchResultLocation: { searchResultIndex: 2, start: 0, end: 100 } }, + location: { type: 'searchResult', searchResultIndex: 2, start: 0, end: 100 }, sourceContent: [{ text: 'search result content' }], }, ], @@ -80,7 +80,7 @@ describe('CitationsBlock', () => { const data: CitationsBlockData = { citations: [ { - location: { web: { url: 'https://example.com/article' } }, + location: { type: 'web', url: 'https://example.com/article' }, sourceContent: [{ text: 'web content' }], title: 'Example Article', }, @@ -100,7 +100,7 @@ describe('CitationsBlock', () => { const withoutTitle = new CitationsBlock({ citations: [ { - location: { documentChar: { documentIndex: 0, start: 0, end: 10 } }, + location: { type: 'documentChar', documentIndex: 0, start: 0, end: 10 }, sourceContent: [{ text: 'source' }], }, ], @@ -144,12 +144,12 @@ describe('CitationsBlock', () => { const data: CitationsBlockData = { citations: [ { - location: { documentChar: { documentIndex: 0, start: 0, end: 50 } }, + location: { type: 'documentChar', documentIndex: 0, start: 0, end: 50 }, sourceContent: [{ text: 'first source' }], title: 'Doc 1', }, { - location: { documentPage: { documentIndex: 1, start: 1, end: 3 } }, + location: { type: 'documentPage', documentIndex: 1, start: 1, end: 3 }, sourceContent: [{ text: 'second source' }, { text: 'additional source' }], }, ], @@ -164,27 +164,27 @@ describe('CitationsBlock', () => { const data: CitationsBlockData = { citations: [ { - location: { documentChar: { documentIndex: 0, start: 150, end: 300 } }, + location: { type: 'documentChar', documentIndex: 0, start: 150, end: 300 }, sourceContent: [{ text: 'char source' }], title: 'Text Document', }, { - location: { documentPage: { documentIndex: 0, start: 2, end: 3 } }, + location: { type: 'documentPage', documentIndex: 0, start: 2, end: 3 }, sourceContent: [{ text: 'page source' }], title: 'PDF Document', }, { - location: { documentChunk: { documentIndex: 1, start: 5, end: 8 } }, + location: { type: 'documentChunk', documentIndex: 1, start: 5, end: 8 }, sourceContent: [{ text: 'chunk source' }], title: 'Chunked Document', }, { - location: { searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 } }, + location: { type: 'searchResult', searchResultIndex: 0, start: 25, end: 150 }, sourceContent: [{ text: 'search source' }], title: 'Search Result', }, { - location: { web: { url: 'https://example.com/doc', domain: 'example.com' } }, + location: { type: 'web', url: 'https://example.com/doc', domain: 'example.com' }, sourceContent: [{ text: 'web source' }], title: 'Web Page', }, @@ -198,19 +198,19 @@ describe('CitationsBlock', () => { expect(restored).toEqual(original) expect(restored.citations).toHaveLength(5) - // Verify each variant has exactly one wrapper key with inner fields preserved - expect('documentChar' in restored.citations[0]!.location).toBe(true) - expect('documentPage' in restored.citations[1]!.location).toBe(true) - expect('documentChunk' in restored.citations[2]!.location).toBe(true) - expect('searchResultLocation' in restored.citations[3]!.location).toBe(true) - expect('web' in restored.citations[4]!.location).toBe(true) + // Verify each variant has the correct type discriminator + expect(restored.citations[0]!.location.type).toBe('documentChar') + expect(restored.citations[1]!.location.type).toBe('documentPage') + expect(restored.citations[2]!.location.type).toBe('documentChunk') + expect(restored.citations[3]!.location.type).toBe('searchResult') + expect(restored.citations[4]!.location.type).toBe('web') }) it('preserves optional source and domain fields', () => { const data: CitationsBlockData = { citations: [ { - location: { web: { url: 'https://example.com', domain: 'example.com' } }, + location: { type: 'web', url: 'https://example.com', domain: 'example.com' }, source: 'web-source-id', sourceContent: [{ text: 'web content' }], title: 'Example', @@ -223,6 +223,10 @@ describe('CitationsBlock', () => { const restored = CitationsBlock.fromJSON(block.toJSON()) expect(restored.citations[0]!.source).toBe('web-source-id') - expect((restored.citations[0]!.location as { web: { url: string; domain: string } }).web.domain).toBe('example.com') + const loc = restored.citations[0]!.location + expect(loc.type).toBe('web') + if (loc.type === 'web') { + expect(loc.domain).toBe('example.com') + } }) }) diff --git a/src/types/__tests__/messages.test.ts b/src/types/__tests__/messages.test.ts index 9ad75f3e..81c16386 100644 --- a/src/types/__tests__/messages.test.ts +++ b/src/types/__tests__/messages.test.ts @@ -290,7 +290,7 @@ describe('Message.fromMessageData', () => { citationsContent: { citations: [ { - location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, + location: { type: 'documentChar', documentIndex: 0, start: 10, end: 50 }, sourceContent: [{ text: 'source text' }], title: 'Test Doc', }, @@ -557,7 +557,7 @@ describe('toJSON/fromJSON round-trips', () => { ['Message with text content', () => new Message({ role: 'user', content: [new TextBlock('Hello')] })], ['Message with multiple content blocks', () => new Message({ role: 'assistant', content: [new TextBlock('Here is the result'), new ToolUseBlock({ name: 'test-tool', toolUseId: '123', input: { key: 'value' } })] })], ['Message with image content', () => new Message({ role: 'user', content: [new TextBlock('Check this image'), new ImageBlock({ format: 'png', source: { bytes: new Uint8Array([1, 2, 3]) } })] })], - ['CitationsBlock', () => new CitationsBlock({ citations: [{ location: { documentChar: { documentIndex: 0, start: 0, end: 10 } }, sourceContent: [{ text: 'source' }] }], content: [{ text: 'generated' }] })], + ['CitationsBlock', () => new CitationsBlock({ citations: [{ location: { type: 'documentChar', documentIndex: 0, start: 0, end: 10 }, sourceContent: [{ text: 'source' }] }], content: [{ text: 'generated' }] })], ] as const it.each(roundTripCases)('%s', (_name, createBlock) => { diff --git a/src/types/citations.ts b/src/types/citations.ts index 3eb062ff..9965372e 100644 --- a/src/types/citations.ts +++ b/src/types/citations.ts @@ -3,125 +3,125 @@ import type { JSONSerializable, Serialized } from './json.js' /** * Citation types for document citation content blocks. * - * Citations are returned by models (particularly Bedrock) when document citations - * are enabled. They are output-only blocks that appear in conversation history. + * Citations are returned by models when document citations are enabled. + * They are output-only blocks that appear in conversation history. */ -/** - * Location referencing character positions within a document. - */ -export interface DocumentCharLocation { - /** - * Index of the source document. - */ - documentIndex: number - - /** - * Start character position. - */ - start: number - - /** - * End character position. - */ - end: number -} - -/** - * Location referencing page positions within a document. - */ -export interface DocumentPageLocation { - /** - * Index of the source document. - */ - documentIndex: number - - /** - * Start page number. - */ - start: number - - /** - * End page number. - */ - end: number -} - -/** - * Location referencing chunk positions within a document. - */ -export interface DocumentChunkLocation { - /** - * Index of the source document. - */ - documentIndex: number - - /** - * Start chunk index. - */ - start: number - - /** - * End chunk index. - */ - end: number -} - -/** - * Location referencing a search result. - */ -export interface SearchResultLocation { - /** - * Index of the search result. - */ - searchResultIndex: number - - /** - * Start position within the search result. - */ - start: number - - /** - * End position within the search result. - */ - end: number -} - -/** - * Location referencing a web URL. - */ -export interface WebLocation { - /** - * The URL of the web source. - */ - url: string - - /** - * The domain of the web source. - */ - domain?: string -} - /** * Discriminated union of citation location types. - * Each variant uses a unique object key to identify the location type. + * Each variant uses a `type` field to identify the location kind. */ export type CitationLocation = - | { documentChar: DocumentCharLocation } - | { documentPage: DocumentPageLocation } - | { documentChunk: DocumentChunkLocation } - | { searchResultLocation: SearchResultLocation } - | { web: WebLocation } + | { + /** + * Location referencing character positions within a document. + */ + type: 'documentChar' + + /** + * Index of the source document. + */ + documentIndex: number + + /** + * Start character position. + */ + start: number + + /** + * End character position. + */ + end: number + } + | { + /** + * Location referencing page positions within a document. + */ + type: 'documentPage' + + /** + * Index of the source document. + */ + documentIndex: number + + /** + * Start page number. + */ + start: number + + /** + * End page number. + */ + end: number + } + | { + /** + * Location referencing chunk positions within a document. + */ + type: 'documentChunk' + + /** + * Index of the source document. + */ + documentIndex: number + + /** + * Start chunk index. + */ + start: number + + /** + * End chunk index. + */ + end: number + } + | { + /** + * Location referencing a search result. + */ + type: 'searchResult' + + /** + * Index of the search result. + */ + searchResultIndex: number + + /** + * Start position within the search result. + */ + start: number + + /** + * End position within the search result. + */ + end: number + } + | { + /** + * Location referencing a web URL. + */ + type: 'web' + + /** + * The URL of the web source. + */ + url: string + + /** + * The domain of the web source. + */ + domain?: string + } /** * Source content referenced by a citation. - * Modeled as a union type for future extensibility (Bedrock UNION type). + * Modeled as a union type for future extensibility. */ export type CitationSourceContent = { text: string } /** * Generated content associated with a citation. - * Modeled as a union type for future extensibility (Bedrock UNION type). + * Modeled as a union type for future extensibility. */ export type CitationGeneratedContent = { text: string } diff --git a/test/integ/agent.test.ts b/test/integ/agent.test.ts index 01b7ded0..2fbb9b62 100644 --- a/test/integ/agent.test.ts +++ b/test/integ/agent.test.ts @@ -297,14 +297,14 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode const citation = citationsBlock!.citations[0]! expect(citation.location).toBeDefined() - expect('documentChar' in citation.location).toBe(true) + expect(citation.location.type).toBe('documentChar') // Verify all inner fields are present (Bedrock docs say "Not Required" but we expect them) - const charLoc = (citation.location as { documentChar: { documentIndex: number; start: number; end: number } }) - .documentChar - expect(typeof charLoc.documentIndex).toBe('number') - expect(typeof charLoc.start).toBe('number') - expect(typeof charLoc.end).toBe('number') + if (citation.location.type === 'documentChar') { + expect(typeof citation.location.documentIndex).toBe('number') + expect(typeof citation.location.start).toBe('number') + expect(typeof citation.location.end).toBe('number') + } expect(citation.sourceContent.length).toBeGreaterThan(0) expect(citation.sourceContent[0]!.text).toBeDefined() @@ -346,14 +346,14 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode const citation = citationsBlock!.citations[0]! expect(citation.location).toBeDefined() - expect('documentPage' in citation.location).toBe(true) + expect(citation.location.type).toBe('documentPage') // Verify all inner fields are present (Bedrock docs say "Not Required" but we expect them) - const pageLoc = (citation.location as { documentPage: { documentIndex: number; start: number; end: number } }) - .documentPage - expect(typeof pageLoc.documentIndex).toBe('number') - expect(typeof pageLoc.start).toBe('number') - expect(typeof pageLoc.end).toBe('number') + if (citation.location.type === 'documentPage') { + expect(typeof citation.location.documentIndex).toBe('number') + expect(typeof citation.location.start).toBe('number') + expect(typeof citation.location.end).toBe('number') + } expect(citation.sourceContent.length).toBeGreaterThan(0) expect(citation.sourceContent[0]!.text).toBeDefined() @@ -411,27 +411,27 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode new CitationsBlock({ citations: [ { - location: { documentChar: { documentIndex: 0, start: 150, end: 300 } }, + location: { type: 'documentChar', documentIndex: 0, start: 150, end: 300 }, sourceContent: [{ text: 'char source content' }], title: 'Text Document', }, { - location: { documentPage: { documentIndex: 0, start: 2, end: 3 } }, + location: { type: 'documentPage', documentIndex: 0, start: 2, end: 3 }, sourceContent: [{ text: 'page source content' }], title: 'PDF Document', }, { - location: { documentChunk: { documentIndex: 1, start: 5, end: 8 } }, + location: { type: 'documentChunk', documentIndex: 1, start: 5, end: 8 }, sourceContent: [{ text: 'chunk source content' }], title: 'Chunked Document', }, { - location: { searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 } }, + location: { type: 'searchResult', searchResultIndex: 0, start: 25, end: 150 }, sourceContent: [{ text: 'search source content' }], title: 'Search Result', }, { - location: { web: { url: 'https://example.com/doc', domain: 'example.com' } }, + location: { type: 'web', url: 'https://example.com/doc', domain: 'example.com' }, sourceContent: [{ text: 'web source content' }], title: 'Web Page', }, From d85ca9735997a9804980f8aab01e59d2c4645b07 Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Wed, 25 Feb 2026 16:31:09 -0500 Subject: [PATCH 10/13] refactor: make Citation source and title required, consolidate tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make source and title required on Citation — integration tests will verify Bedrock always returns them. Consolidate duplicated citation unit tests into a single all-variants round-trip test. --- src/models/__tests__/bedrock.test.ts | 59 ++----- src/types/__tests__/citations.test.ts | 241 +++++++------------------- src/types/__tests__/messages.test.ts | 3 +- src/types/citations.ts | 6 +- test/integ/agent.test.ts | 9 + 5 files changed, 86 insertions(+), 232 deletions(-) diff --git a/src/models/__tests__/bedrock.test.ts b/src/models/__tests__/bedrock.test.ts index 7a51ae5f..dbf95f46 100644 --- a/src/models/__tests__/bedrock.test.ts +++ b/src/models/__tests__/bedrock.test.ts @@ -1557,26 +1557,31 @@ describe('BedrockModel', () => { const sdkCitations = [ { location: { type: 'documentChar' as const, documentIndex: 0, start: 150, end: 300 }, + source: 'doc-0', sourceContent: [{ text: 'char source' }], title: 'Text Document', }, { location: { type: 'documentPage' as const, documentIndex: 0, start: 2, end: 3 }, + source: 'doc-0', sourceContent: [{ text: 'page source' }], title: 'PDF Document', }, { location: { type: 'documentChunk' as const, documentIndex: 1, start: 5, end: 8 }, + source: 'doc-1', sourceContent: [{ text: 'chunk source' }], title: 'Chunked Document', }, { location: { type: 'searchResult' as const, searchResultIndex: 0, start: 25, end: 150 }, + source: 'search-0', sourceContent: [{ text: 'search source' }], title: 'Search Result', }, { location: { type: 'web' as const, url: 'https://example.com/doc', domain: 'example.com' }, + source: 'web-0', sourceContent: [{ text: 'web source' }], title: 'Web Page', }, @@ -1612,16 +1617,19 @@ describe('BedrockModel', () => { citations: [ { location: { documentChar: { documentIndex: 0, start: 150, end: 300 } }, + source: 'doc-0', sourceContent: [{ text: 'char source' }], title: 'Text Document', }, { location: { documentPage: { documentIndex: 0, start: 2, end: 3 } }, + source: 'doc-0', sourceContent: [{ text: 'page source' }], title: 'PDF Document', }, { location: { documentChunk: { documentIndex: 1, start: 5, end: 8 } }, + source: 'doc-1', sourceContent: [{ text: 'chunk source' }], title: 'Chunked Document', }, @@ -1629,11 +1637,13 @@ describe('BedrockModel', () => { location: { searchResultLocation: { searchResultIndex: 0, start: 25, end: 150 }, }, + source: 'search-0', sourceContent: [{ text: 'search source' }], title: 'Search Result', }, { location: { web: { url: 'https://example.com/doc', domain: 'example.com' } }, + source: 'web-0', sourceContent: [{ text: 'web source' }], title: 'Web Page', }, @@ -1651,55 +1661,6 @@ describe('BedrockModel', () => { }) ) }) - - it('formats citations block without optional title or source', async () => { - const provider = new BedrockModel() - const messages = [ - new Message({ - role: 'assistant', - content: [ - new CitationsBlock({ - citations: [ - { - location: { type: 'web', url: 'https://example.com' }, - sourceContent: [{ text: 'web source' }], - }, - ], - content: [{ text: 'cited text' }], - }), - ], - }), - new Message({ - role: 'user', - content: [new TextBlock('Thanks')], - }), - ] - - collectIterator(provider.stream(messages)) - - expect(mockConverseStreamCommand).toHaveBeenLastCalledWith( - expect.objectContaining({ - messages: expect.arrayContaining([ - expect.objectContaining({ - role: 'assistant', - content: [ - { - citationsContent: { - citations: [ - { - location: { web: { url: 'https://example.com' } }, - sourceContent: [{ text: 'web source' }], - }, - ], - content: [{ text: 'cited text' }], - }, - }, - ], - }), - ]), - }) - ) - }) }) describe('includeToolResultStatus configuration', async () => { diff --git a/src/types/__tests__/citations.test.ts b/src/types/__tests__/citations.test.ts index 04ae4fb3..f3ffd91f 100644 --- a/src/types/__tests__/citations.test.ts +++ b/src/types/__tests__/citations.test.ts @@ -2,10 +2,11 @@ import { describe, expect, it } from 'vitest' import { CitationsBlock, type CitationsBlockData } from '../citations.js' describe('CitationsBlock', () => { - const documentCharData: CitationsBlockData = { + const singleCitationData: CitationsBlockData = { citations: [ { location: { type: 'documentChar', documentIndex: 0, start: 10, end: 50 }, + source: 'doc-0', sourceContent: [{ text: 'source text from document' }], title: 'Test Document', }, @@ -13,100 +14,72 @@ describe('CitationsBlock', () => { content: [{ text: 'generated text with citation' }], } + const allVariantsData: CitationsBlockData = { + citations: [ + { + location: { type: 'documentChar', documentIndex: 0, start: 150, end: 300 }, + source: 'doc-0', + sourceContent: [{ text: 'char source' }], + title: 'Text Document', + }, + { + location: { type: 'documentPage', documentIndex: 0, start: 2, end: 3 }, + source: 'doc-0', + sourceContent: [{ text: 'page source' }], + title: 'PDF Document', + }, + { + location: { type: 'documentChunk', documentIndex: 1, start: 5, end: 8 }, + source: 'doc-1', + sourceContent: [{ text: 'chunk source' }], + title: 'Chunked Document', + }, + { + location: { type: 'searchResult', searchResultIndex: 0, start: 25, end: 150 }, + source: 'search-0', + sourceContent: [{ text: 'search source' }], + title: 'Search Result', + }, + { + location: { type: 'web', url: 'https://example.com/doc', domain: 'example.com' }, + source: 'web-0', + sourceContent: [{ text: 'web source' }, { text: 'additional source' }], + title: 'Web Page', + }, + ], + content: [{ text: 'first generated' }, { text: 'second generated' }], + } + it('creates block with correct type discriminator', () => { - const block = new CitationsBlock(documentCharData) + const block = new CitationsBlock(singleCitationData) expect(block.type).toBe('citationsBlock') }) it('stores citations and content', () => { - const block = new CitationsBlock(documentCharData) - expect(block.citations).toStrictEqual(documentCharData.citations) - expect(block.content).toStrictEqual(documentCharData.content) + const block = new CitationsBlock(singleCitationData) + expect(block.citations).toStrictEqual(singleCitationData.citations) + expect(block.content).toStrictEqual(singleCitationData.content) }) - describe('toJSON/fromJSON round-trips', () => { - it('round-trips with documentChar location', () => { - const original = new CitationsBlock(documentCharData) - const restored = CitationsBlock.fromJSON(original.toJSON()) - expect(restored).toEqual(original) - }) - - it('round-trips with documentPage location', () => { - const data: CitationsBlockData = { - citations: [ - { - location: { type: 'documentPage', documentIndex: 1, start: 3, end: 7 }, - sourceContent: [{ text: 'page content' }], - }, - ], - content: [{ text: 'generated from pages' }], - } - const original = new CitationsBlock(data) - const restored = CitationsBlock.fromJSON(original.toJSON()) - expect(restored).toEqual(original) - }) - - it('round-trips with documentChunk location', () => { - const data: CitationsBlockData = { - citations: [ - { - location: { type: 'documentChunk', documentIndex: 0, start: 0, end: 2 }, - sourceContent: [{ text: 'chunk content' }], - }, - ], - content: [{ text: 'generated from chunks' }], - } - const original = new CitationsBlock(data) - const restored = CitationsBlock.fromJSON(original.toJSON()) - expect(restored).toEqual(original) - }) - - it('round-trips with searchResult location', () => { - const data: CitationsBlockData = { - citations: [ - { - location: { type: 'searchResult', searchResultIndex: 2, start: 0, end: 100 }, - sourceContent: [{ text: 'search result content' }], - }, - ], - content: [{ text: 'generated from search' }], - } - const original = new CitationsBlock(data) - const restored = CitationsBlock.fromJSON(original.toJSON()) - expect(restored).toEqual(original) - }) + it('round-trips all CitationLocation variants, multiple citations, and multiple content blocks', () => { + const original = new CitationsBlock(allVariantsData) + const json = original.toJSON() + const restored = CitationsBlock.fromJSON(json) - it('round-trips with web location', () => { - const data: CitationsBlockData = { - citations: [ - { - location: { type: 'web', url: 'https://example.com/article' }, - sourceContent: [{ text: 'web content' }], - title: 'Example Article', - }, - ], - content: [{ text: 'generated from web' }], - } - const original = new CitationsBlock(data) - const restored = CitationsBlock.fromJSON(original.toJSON()) - expect(restored).toEqual(original) - }) - }) + expect(restored).toEqual(original) + expect(restored.citations).toHaveLength(5) - it('handles optional title field', () => { - const withTitle = new CitationsBlock(documentCharData) - expect(withTitle.citations[0]!.title).toBe('Test Document') + expect(restored.citations[0]!.location.type).toBe('documentChar') + expect(restored.citations[1]!.location.type).toBe('documentPage') + expect(restored.citations[2]!.location.type).toBe('documentChunk') + expect(restored.citations[3]!.location.type).toBe('searchResult') + expect(restored.citations[4]!.location.type).toBe('web') - const withoutTitle = new CitationsBlock({ - citations: [ - { - location: { type: 'documentChar', documentIndex: 0, start: 0, end: 10 }, - sourceContent: [{ text: 'source' }], - }, - ], - content: [{ text: 'generated' }], - }) - expect(withoutTitle.citations[0]!.title).toBeUndefined() + // Verify web-specific optional domain field survives round-trip + const webLoc = restored.citations[4]!.location + if (webLoc.type === 'web') { + expect(webLoc.domain).toBe('example.com') + } }) it('handles empty arrays', () => { @@ -123,110 +96,20 @@ describe('CitationsBlock', () => { }) it('toJSON returns wrapped format', () => { - const block = new CitationsBlock(documentCharData) + const block = new CitationsBlock(singleCitationData) const json = block.toJSON() expect(json).toStrictEqual({ citationsContent: { - citations: documentCharData.citations, - content: documentCharData.content, + citations: singleCitationData.citations, + content: singleCitationData.content, }, }) }) it('works with JSON.stringify', () => { - const original = new CitationsBlock(documentCharData) + const original = new CitationsBlock(allVariantsData) const jsonString = JSON.stringify(original) const restored = CitationsBlock.fromJSON(JSON.parse(jsonString)) expect(restored).toEqual(original) }) - - it('handles multiple citations and content blocks', () => { - const data: CitationsBlockData = { - citations: [ - { - location: { type: 'documentChar', documentIndex: 0, start: 0, end: 50 }, - sourceContent: [{ text: 'first source' }], - title: 'Doc 1', - }, - { - location: { type: 'documentPage', documentIndex: 1, start: 1, end: 3 }, - sourceContent: [{ text: 'second source' }, { text: 'additional source' }], - }, - ], - content: [{ text: 'first generated' }, { text: 'second generated' }], - } - const original = new CitationsBlock(data) - const restored = CitationsBlock.fromJSON(original.toJSON()) - expect(restored).toEqual(original) - }) - - it('round-trips all CitationLocation union variants in a single block', () => { - const data: CitationsBlockData = { - citations: [ - { - location: { type: 'documentChar', documentIndex: 0, start: 150, end: 300 }, - sourceContent: [{ text: 'char source' }], - title: 'Text Document', - }, - { - location: { type: 'documentPage', documentIndex: 0, start: 2, end: 3 }, - sourceContent: [{ text: 'page source' }], - title: 'PDF Document', - }, - { - location: { type: 'documentChunk', documentIndex: 1, start: 5, end: 8 }, - sourceContent: [{ text: 'chunk source' }], - title: 'Chunked Document', - }, - { - location: { type: 'searchResult', searchResultIndex: 0, start: 25, end: 150 }, - sourceContent: [{ text: 'search source' }], - title: 'Search Result', - }, - { - location: { type: 'web', url: 'https://example.com/doc', domain: 'example.com' }, - sourceContent: [{ text: 'web source' }], - title: 'Web Page', - }, - ], - content: [{ text: 'generated text referencing all sources' }], - } - const original = new CitationsBlock(data) - const json = original.toJSON() - const restored = CitationsBlock.fromJSON(json) - - expect(restored).toEqual(original) - expect(restored.citations).toHaveLength(5) - - // Verify each variant has the correct type discriminator - expect(restored.citations[0]!.location.type).toBe('documentChar') - expect(restored.citations[1]!.location.type).toBe('documentPage') - expect(restored.citations[2]!.location.type).toBe('documentChunk') - expect(restored.citations[3]!.location.type).toBe('searchResult') - expect(restored.citations[4]!.location.type).toBe('web') - }) - - it('preserves optional source and domain fields', () => { - const data: CitationsBlockData = { - citations: [ - { - location: { type: 'web', url: 'https://example.com', domain: 'example.com' }, - source: 'web-source-id', - sourceContent: [{ text: 'web content' }], - title: 'Example', - }, - ], - content: [{ text: 'generated' }], - } - const block = new CitationsBlock(data) - expect(block.citations[0]!.source).toBe('web-source-id') - - const restored = CitationsBlock.fromJSON(block.toJSON()) - expect(restored.citations[0]!.source).toBe('web-source-id') - const loc = restored.citations[0]!.location - expect(loc.type).toBe('web') - if (loc.type === 'web') { - expect(loc.domain).toBe('example.com') - } - }) }) diff --git a/src/types/__tests__/messages.test.ts b/src/types/__tests__/messages.test.ts index 81c16386..b3e8b9e4 100644 --- a/src/types/__tests__/messages.test.ts +++ b/src/types/__tests__/messages.test.ts @@ -291,6 +291,7 @@ describe('Message.fromMessageData', () => { citations: [ { location: { type: 'documentChar', documentIndex: 0, start: 10, end: 50 }, + source: 'doc-0', sourceContent: [{ text: 'source text' }], title: 'Test Doc', }, @@ -557,7 +558,7 @@ describe('toJSON/fromJSON round-trips', () => { ['Message with text content', () => new Message({ role: 'user', content: [new TextBlock('Hello')] })], ['Message with multiple content blocks', () => new Message({ role: 'assistant', content: [new TextBlock('Here is the result'), new ToolUseBlock({ name: 'test-tool', toolUseId: '123', input: { key: 'value' } })] })], ['Message with image content', () => new Message({ role: 'user', content: [new TextBlock('Check this image'), new ImageBlock({ format: 'png', source: { bytes: new Uint8Array([1, 2, 3]) } })] })], - ['CitationsBlock', () => new CitationsBlock({ citations: [{ location: { type: 'documentChar', documentIndex: 0, start: 0, end: 10 }, sourceContent: [{ text: 'source' }] }], content: [{ text: 'generated' }] })], + ['CitationsBlock', () => new CitationsBlock({ citations: [{ location: { type: 'documentChar', documentIndex: 0, start: 0, end: 10 }, source: 'doc-0', sourceContent: [{ text: 'source' }], title: 'Test' }], content: [{ text: 'generated' }] })], ] as const it.each(roundTripCases)('%s', (_name, createBlock) => { diff --git a/src/types/citations.ts b/src/types/citations.ts index 9965372e..a7553068 100644 --- a/src/types/citations.ts +++ b/src/types/citations.ts @@ -137,7 +137,7 @@ export interface Citation { /** * The source identifier string. */ - source?: string + source: string /** * The source content referenced by this citation. @@ -145,9 +145,9 @@ export interface Citation { sourceContent: CitationSourceContent[] /** - * Optional title of the cited source. + * Title of the cited source. */ - title?: string + title: string } /** diff --git a/test/integ/agent.test.ts b/test/integ/agent.test.ts index 2fbb9b62..951d14b6 100644 --- a/test/integ/agent.test.ts +++ b/test/integ/agent.test.ts @@ -308,6 +308,8 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode expect(citation.sourceContent.length).toBeGreaterThan(0) expect(citation.sourceContent[0]!.text).toBeDefined() + expect(typeof citation.source).toBe('string') + expect(typeof citation.title).toBe('string') // Second turn: verify citations survive in conversation history const followUp = await agent.invoke('What else does the document say about that city?') @@ -357,6 +359,8 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode expect(citation.sourceContent.length).toBeGreaterThan(0) expect(citation.sourceContent[0]!.text).toBeDefined() + expect(typeof citation.source).toBe('string') + expect(typeof citation.title).toBe('string') // Second turn: verify citations survive in conversation history const followUp = await agent.invoke('What else can you tell me about this document?') @@ -412,26 +416,31 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode citations: [ { location: { type: 'documentChar', documentIndex: 0, start: 150, end: 300 }, + source: 'doc-0', sourceContent: [{ text: 'char source content' }], title: 'Text Document', }, { location: { type: 'documentPage', documentIndex: 0, start: 2, end: 3 }, + source: 'doc-0', sourceContent: [{ text: 'page source content' }], title: 'PDF Document', }, { location: { type: 'documentChunk', documentIndex: 1, start: 5, end: 8 }, + source: 'doc-1', sourceContent: [{ text: 'chunk source content' }], title: 'Chunked Document', }, { location: { type: 'searchResult', searchResultIndex: 0, start: 25, end: 150 }, + source: 'search-0', sourceContent: [{ text: 'search source content' }], title: 'Search Result', }, { location: { type: 'web', url: 'https://example.com/doc', domain: 'example.com' }, + source: 'web-0', sourceContent: [{ text: 'web source content' }], title: 'Web Page', }, From 18ba32a7745d18efe07d0cefa169cb332686a587 Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Wed, 25 Feb 2026 16:55:01 -0500 Subject: [PATCH 11/13] refactor: use proper Bedrock SDK types for citation mapping Import BedrockCitationLocation, BedrockCitation, and BedrockCitationsContentBlock from the Bedrock SDK to type the mapping functions at the boundary instead of using Record and as-unknown-as casts. Simplify _formatContentBlock citationsBlock case by delegating to _mapCitationToBedrock. --- src/models/__tests__/bedrock.test.ts | 2 + src/models/bedrock.ts | 155 ++++++++++++++------------- 2 files changed, 82 insertions(+), 75 deletions(-) diff --git a/src/models/__tests__/bedrock.test.ts b/src/models/__tests__/bedrock.test.ts index dbf95f46..8268bff6 100644 --- a/src/models/__tests__/bedrock.test.ts +++ b/src/models/__tests__/bedrock.test.ts @@ -769,6 +769,7 @@ describe('BedrockModel', () => { { location: { documentChar: { documentIndex: 0, start: 10, end: 50 } }, sourceContent: [{ text: 'source text' }], + source: 'doc-0', title: 'Test Doc', }, ], @@ -824,6 +825,7 @@ describe('BedrockModel', () => { { location: { type: 'documentChar', documentIndex: 0, start: 10, end: 50 }, sourceContent: [{ text: 'source text' }], + source: 'doc-0', title: 'Test Doc', }, ], diff --git a/src/models/bedrock.ts b/src/models/bedrock.ts index e6e4f19c..b2e52331 100644 --- a/src/models/bedrock.ts +++ b/src/models/bedrock.ts @@ -35,12 +35,15 @@ import { DocumentFormat, ImageFormat, type BedrockRuntimeClientResolvedConfig, + type CitationLocation as BedrockCitationLocation, + type Citation as BedrockCitation, + type CitationsContentBlock as BedrockCitationsContentBlock, } from '@aws-sdk/client-bedrock-runtime' import { type BaseModelConfig, Model, type StreamOptions } from '../models/model.js' import type { ContentBlock, Message, StopReason, ToolUseBlock } from '../types/messages.js' import type { ImageSource, VideoSource, DocumentSource } from '../types/media.js' import type { CitationsContentDelta, ModelStreamEvent, ReasoningContentDelta, Usage } from '../models/streaming.js' -import type { CitationLocation, CitationsBlockData } from '../types/citations.js' +import type { Citation, CitationLocation, CitationsBlockData } from '../types/citations.js' import type { JSONValue } from '../types/json.js' import { ContextWindowOverflowError, ModelThrottledError, normalizeError } from '../errors.js' import { ensureDefined } from '../types/validation.js' @@ -633,27 +636,13 @@ export class BedrockModel extends Model { }, } - case 'citationsBlock': { - const filteredCitations = block.citations.map((citation) => { - const filtered: Record = {} - if (citation.location) filtered.location = this._mapCitationLocationToBedrock(citation.location) - if (citation.sourceContent) { - const filteredSource = citation.sourceContent.filter((sc) => sc.text).map((sc) => ({ text: sc.text })) - if (filteredSource.length > 0) filtered.sourceContent = filteredSource - } - if (citation.source) filtered.source = citation.source - if (citation.title) filtered.title = citation.title - return filtered - }) - const filteredContent = block.content.filter((gc) => gc.text).map((gc) => ({ text: gc.text })) - + case 'citationsBlock': return { citationsContent: { - citations: filteredCitations, - ...(filteredContent.length > 0 && { content: filteredContent }), + citations: block.citations.map((c) => this._mapCitationToBedrock(c)), + content: block.content, }, } - } case 'guardContentBlock': { if (block.text) { @@ -825,7 +814,7 @@ export class BedrockModel extends Model { events.push({ type: 'modelContentBlockStopEvent' }) }, - citationsContent: (block: CitationsBlockData): void => { + citationsContent: (block: BedrockCitationsContentBlock): void => { if (!block) return events.push({ type: 'modelContentBlockStartEvent' }) @@ -951,7 +940,7 @@ export class BedrockModel extends Model { events.push({ type: 'modelContentBlockDeltaEvent', delta: reasoningDelta }) } }, - citationsContent: (block: CitationsBlockData): void => { + citationsContent: (block: BedrockCitationsContentBlock): void => { if (!block) return const mapped = this._mapBedrockCitationsData(block) const delta: CitationsContentDelta = { @@ -1097,87 +1086,103 @@ export class BedrockModel extends Model { } /** - * Maps a Bedrock object-key citation location to the SDK's type-discriminated format. + * Maps a Bedrock object-key citation location to the SDK's type-field format. + * + * Bedrock uses object-key discrimination (`{ documentChar: { ... } }`) while the SDK uses + * type-field discrimination (`{ type: 'documentChar', ... }`). Also normalizes Bedrock's + * `searchResultLocation` key to the shorter `searchResult`. * * @param bedrockLocation - Bedrock citation location with object-key discrimination * @returns SDK CitationLocation with type field discrimination */ - private _mapBedrockCitationLocation(bedrockLocation: Record): CitationLocation { - if ('documentChar' in bedrockLocation) { - const loc = bedrockLocation.documentChar as { documentIndex: number; start: number; end: number } - return { type: 'documentChar', documentIndex: loc.documentIndex, start: loc.start, end: loc.end } + private _mapBedrockCitationLocation(bedrockLocation: BedrockCitationLocation): CitationLocation { + if (bedrockLocation.documentChar) { + const loc = bedrockLocation.documentChar + return { type: 'documentChar', documentIndex: loc.documentIndex!, start: loc.start!, end: loc.end! } } - if ('documentPage' in bedrockLocation) { - const loc = bedrockLocation.documentPage as { documentIndex: number; start: number; end: number } - return { type: 'documentPage', documentIndex: loc.documentIndex, start: loc.start, end: loc.end } + if (bedrockLocation.documentPage) { + const loc = bedrockLocation.documentPage + return { type: 'documentPage', documentIndex: loc.documentIndex!, start: loc.start!, end: loc.end! } } - if ('documentChunk' in bedrockLocation) { - const loc = bedrockLocation.documentChunk as { documentIndex: number; start: number; end: number } - return { type: 'documentChunk', documentIndex: loc.documentIndex, start: loc.start, end: loc.end } + if (bedrockLocation.documentChunk) { + const loc = bedrockLocation.documentChunk + return { type: 'documentChunk', documentIndex: loc.documentIndex!, start: loc.start!, end: loc.end! } } - if ('searchResultLocation' in bedrockLocation) { - const loc = bedrockLocation.searchResultLocation as { - searchResultIndex: number - start: number - end: number - } - return { - type: 'searchResult', - searchResultIndex: loc.searchResultIndex, - start: loc.start, - end: loc.end, - } + if (bedrockLocation.searchResultLocation) { + const loc = bedrockLocation.searchResultLocation + return { type: 'searchResult', searchResultIndex: loc.searchResultIndex!, start: loc.start!, end: loc.end! } } - if ('web' in bedrockLocation) { - const loc = bedrockLocation.web as { url: string; domain?: string } - return { type: 'web', url: loc.url, ...(loc.domain && { domain: loc.domain }) } + if (bedrockLocation.web) { + const loc = bedrockLocation.web + return { type: 'web', url: loc.url!, ...(loc.domain && { domain: loc.domain }) } } logger.warn(`citation_location=<${JSON.stringify(bedrockLocation)}> | unknown citation location type`) - return bedrockLocation as CitationLocation + return bedrockLocation as unknown as CitationLocation } /** - * Maps Bedrock citation data to SDK Citation objects. + * Maps a Bedrock CitationsContentBlock to SDK CitationsBlockData. * - * @param bedrockData - Raw Bedrock CitationsBlockData - * @returns CitationsBlockData with SDK-format CitationLocations + * @param bedrockData - Bedrock CitationsContentBlock + * @returns SDK CitationsBlockData with type-field CitationLocations */ - private _mapBedrockCitationsData(bedrockData: CitationsBlockData): CitationsBlockData { + private _mapBedrockCitationsData(bedrockData: BedrockCitationsContentBlock): CitationsBlockData { return { - citations: bedrockData.citations.map((citation) => ({ - ...citation, - location: this._mapBedrockCitationLocation(citation.location as unknown as Record), + citations: (bedrockData.citations ?? []).map((citation) => ({ + source: citation.source ?? '', + title: citation.title ?? '', + sourceContent: (citation.sourceContent ?? []).map((sc) => ({ text: sc.text! })), + location: this._mapBedrockCitationLocation(citation.location!), })), - content: bedrockData.content, + content: (bedrockData.content ?? []).map((gc) => ({ text: gc.text! })), + } + } + + /** + * Maps an SDK Citation to Bedrock's Citation format. + * + * @param citation - SDK Citation with type-field location + * @returns Bedrock Citation with object-key location + */ + private _mapCitationToBedrock(citation: Citation): BedrockCitation { + return { + location: this._mapCitationLocationToBedrock(citation.location), + sourceContent: citation.sourceContent.map((sc) => ({ text: sc.text })), + source: citation.source, + title: citation.title, } } /** - * Maps an SDK CitationLocation back to Bedrock's object-key format. + * Maps an SDK CitationLocation to Bedrock's object-key format. * * @param location - SDK CitationLocation with type field - * @returns Bedrock object-key citation location + * @returns Bedrock CitationLocation with object-key discrimination */ - private _mapCitationLocationToBedrock(location: CitationLocation): Record { + private _mapCitationLocationToBedrock(location: CitationLocation): BedrockCitationLocation { switch (location.type) { - case 'documentChar': - return { documentChar: { documentIndex: location.documentIndex, start: location.start, end: location.end } } - case 'documentPage': - return { documentPage: { documentIndex: location.documentIndex, start: location.start, end: location.end } } - case 'documentChunk': - return { documentChunk: { documentIndex: location.documentIndex, start: location.start, end: location.end } } - case 'searchResult': - return { - searchResultLocation: { - searchResultIndex: location.searchResultIndex, - start: location.start, - end: location.end, - }, - } - case 'web': - return { web: { url: location.url, ...(location.domain && { domain: location.domain }) } } + case 'documentChar': { + const { type: _, ...fields } = location + return { documentChar: fields } + } + case 'documentPage': { + const { type: _, ...fields } = location + return { documentPage: fields } + } + case 'documentChunk': { + const { type: _, ...fields } = location + return { documentChunk: fields } + } + case 'searchResult': { + const { type: _, ...fields } = location + return { searchResultLocation: fields } + } + case 'web': { + const { type: _, ...fields } = location + return { web: fields } + } default: - return location as unknown as Record + return location as unknown as BedrockCitationLocation } } } From 6835f504db4fced27c1a8ab5827fe812f8fc13ad Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Wed, 25 Feb 2026 17:05:06 -0500 Subject: [PATCH 12/13] fix: drop citations with unknown location types instead of casting --- src/models/bedrock.ts | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/models/bedrock.ts b/src/models/bedrock.ts index b2e52331..0e8eed7c 100644 --- a/src/models/bedrock.ts +++ b/src/models/bedrock.ts @@ -1095,7 +1095,7 @@ export class BedrockModel extends Model { * @param bedrockLocation - Bedrock citation location with object-key discrimination * @returns SDK CitationLocation with type field discrimination */ - private _mapBedrockCitationLocation(bedrockLocation: BedrockCitationLocation): CitationLocation { + private _mapBedrockCitationLocation(bedrockLocation: BedrockCitationLocation): CitationLocation | undefined { if (bedrockLocation.documentChar) { const loc = bedrockLocation.documentChar return { type: 'documentChar', documentIndex: loc.documentIndex!, start: loc.start!, end: loc.end! } @@ -1117,7 +1117,7 @@ export class BedrockModel extends Model { return { type: 'web', url: loc.url!, ...(loc.domain && { domain: loc.domain }) } } logger.warn(`citation_location=<${JSON.stringify(bedrockLocation)}> | unknown citation location type`) - return bedrockLocation as unknown as CitationLocation + return undefined } /** @@ -1128,12 +1128,18 @@ export class BedrockModel extends Model { */ private _mapBedrockCitationsData(bedrockData: BedrockCitationsContentBlock): CitationsBlockData { return { - citations: (bedrockData.citations ?? []).map((citation) => ({ - source: citation.source ?? '', - title: citation.title ?? '', - sourceContent: (citation.sourceContent ?? []).map((sc) => ({ text: sc.text! })), - location: this._mapBedrockCitationLocation(citation.location!), - })), + citations: (bedrockData.citations ?? []) + .map((citation) => { + const location = citation.location ? this._mapBedrockCitationLocation(citation.location) : undefined + if (!location) return undefined + return { + source: citation.source ?? '', + title: citation.title ?? '', + sourceContent: (citation.sourceContent ?? []).map((sc) => ({ text: sc.text! })), + location, + } + }) + .filter((c) => c !== undefined), content: (bedrockData.content ?? []).map((gc) => ({ text: gc.text! })), } } From db843e9e11a85fd5f3f4798292b50cb8f16aab6a Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Thu, 26 Feb 2026 12:56:06 -0500 Subject: [PATCH 13/13] fix: strip domain from web citations, harden integ tests, remove emojis from AGENTS.md - Strip Bedrock-rejected `domain` field from web citation locations in _mapCitationLocationToBedrock - Refactor citation integ tests: use non-streaming (Bedrock streaming lacks citation support), content source format, object-level assertions, and remove seeded multi-turn test - Remove all emojis from AGENTS.md --- AGENTS.md | 48 +++--- src/models/__tests__/bedrock.test.ts | 2 +- src/models/bedrock.ts | 6 +- test/integ/agent.test.ts | 236 +++++++++------------------ 4 files changed, 106 insertions(+), 186 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 48b2792d..c6d32f94 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -241,20 +241,20 @@ logger.warn(`field=<${value}> | statement one | statement two`) **Examples**: ```typescript -// ✅ Good: Context fields with message +// Good: Context fields with message logger.warn(`stop_reason=<${stopReason}>, fallback=<${fallback}> | unknown stop reason, converting to camelCase`) logger.warn(`event_type=<${eventType}> | unsupported bedrock event type`) -// ✅ Good: Simple message without context fields +// Good: Simple message without context fields logger.warn('cache points are not supported in openai system prompts, ignoring cache points') -// ✅ Good: Multiple statements separated by pipes +// Good: Multiple statements separated by pipes logger.warn(`request_id=<${id}> | processing request | starting validation`) -// ❌ Bad: Not using angle brackets for values +// Bad: Not using angle brackets for values logger.warn(`stop_reason=${stopReason} | unknown stop reason`) -// ❌ Bad: Using punctuation +// Bad: Using punctuation logger.warn(`event_type=<${eventType}> | Unsupported event type.`) ``` @@ -289,7 +289,7 @@ src/ **Example**: ```typescript -// ✅ Good: Main function first, helpers follow +// Good: Main function first, helpers follow export async function* mainFunction() { const result = await helperFunction1() return helperFunction2(result) @@ -303,7 +303,7 @@ function helperFunction2(input: string) { // Implementation } -// ❌ Bad: Helpers before main function +// Bad: Helpers before main function async function helperFunction1() { // Implementation } @@ -325,10 +325,10 @@ test/integ/ **Optional chaining for null safety**: Prefer optional chaining over verbose `typeof` checks when accessing potentially undefined properties: ```typescript -// ✅ Good: Optional chaining +// Good: Optional chaining return globalThis?.process?.env?.API_KEY -// ❌ Bad: Verbose typeof checks +// Bad: Verbose typeof checks if (typeof process !== 'undefined' && typeof process.env !== 'undefined') { return process.env.API_KEY } @@ -369,7 +369,7 @@ export function getData(): any { **Private fields**: Use underscore prefix for private class fields to improve readability and distinguish them from public members. ```typescript -// ✅ Good: Private fields with underscore prefix +// Good: Private fields with underscore prefix export class Example { private readonly _config: Config private _state: State @@ -384,7 +384,7 @@ export class Example { } } -// ❌ Bad: No underscore for private fields +// Bad: No underscore for private fields export class Example { private readonly config: Config // Missing underscore @@ -497,7 +497,7 @@ import type { Options, Config } from '../types' **When defining interfaces or types, organize them so the top-level interface comes first, followed by its dependencies, and then all nested dependencies.** ```typescript -// ✅ Correct - Top-level first, then dependencies +// Correct - Top-level first, then dependencies export interface Message { role: Role content: ContentBlock[] @@ -537,7 +537,7 @@ export class ToolResultBlock { } } -// ❌ Wrong - Dependencies before top-level +// Wrong - Dependencies before top-level export type Role = 'user' | 'assistant' export interface TextBlockData { @@ -557,7 +557,7 @@ export interface Message { // Top-level should come first **When creating discriminated unions with a `type` field, the type value MUST match the interface name with the first letter lowercase.** ```typescript -// ✅ Correct - type matches class name (first letter lowercase) +// Correct - type matches class name (first letter lowercase) export class TextBlock { readonly type = 'textBlock' as const // Matches 'TextBlock' class name readonly text: string @@ -572,7 +572,7 @@ export class CachePointBlock { export type ContentBlock = TextBlock | ToolUseBlock | CachePointBlock -// ❌ Wrong - type doesn't match class name +// Wrong - type doesn't match class name export class CachePointBlock { readonly type = 'cachePoint' as const // Should be 'cachePointBlock' readonly cacheType: 'default' @@ -588,29 +588,29 @@ When the upstream API (e.g., Bedrock) defines a type as a **UNION** ("only one m The Bedrock API marks all fields in union types as "Not Required" as a mechanism for future extensibility. In TypeScript, encode the mutual exclusivity using `|` with each variant having its field required. The "not required" from the API docs means "this field won't be present if a different variant is active." ```typescript -// ✅ Correct: type union — each variant has its field required +// Correct: type union — each variant has its field required // Adding a new variant later (e.g., | { image: ImageData }) is non-breaking export type CitationSourceContent = { text: string } -// ✅ Correct: multi-variant union with object-key discrimination +// Correct: multi-variant union with object-key discrimination export type DocumentSourceData = | { bytes: Uint8Array } | { text: string } | { content: DocumentContentBlockData[] } | { s3Location: S3LocationData } -// ✅ Correct: multi-variant union for citation locations +// Correct: multi-variant union for citation locations export type CitationLocation = | { documentChar: DocumentCharLocation } | { documentPage: DocumentPageLocation } | { web: WebLocation } -// ❌ Wrong: interface with optional fields — cannot expand without breaking +// Wrong: interface with optional fields — cannot expand without breaking export interface CitationSourceContent { text?: string } -// ❌ Wrong: interface with required field — changing to union later is breaking +// Wrong: interface with required field — changing to union later is breaking export interface CitationSourceContent { text: string } @@ -655,13 +655,13 @@ export class ValidationError extends Error { When asserting on objects, prefer `toStrictEqual` for full object comparison rather than checking individual fields: ```typescript -// ✅ Good: Full object assertion with toStrictEqual +// Good: Full object assertion with toStrictEqual expect(provider.getConfig()).toStrictEqual({ modelId: 'gemini-2.5-flash', params: { temperature: 0.5 }, }) -// ❌ Bad: Checking individual fields +// Bad: Checking individual fields expect(provider.getConfig().modelId).toBe('gemini-2.5-flash') expect(provider.getConfig().params.temperature).toBe(0.5) ``` @@ -680,7 +680,7 @@ When adding or modifying dependencies, you **MUST** follow the guidelines in [do ## Things to Do -✅ **Do**: +**Do**: - Use relative imports for internal modules - Co-locate unit tests with source under `__tests__` directories - Follow nested describe pattern for test organization @@ -693,7 +693,7 @@ When adding or modifying dependencies, you **MUST** follow the guidelines in [do ## Things NOT to Do -❌ **Don't**: +**Don't**: - Use `any` type (enforced by ESLint) - Put unit tests in separate `tests/` directory (use `src/**/__tests__/**`) - Skip documentation for exported functions diff --git a/src/models/__tests__/bedrock.test.ts b/src/models/__tests__/bedrock.test.ts index 8268bff6..32e591a0 100644 --- a/src/models/__tests__/bedrock.test.ts +++ b/src/models/__tests__/bedrock.test.ts @@ -1644,7 +1644,7 @@ describe('BedrockModel', () => { title: 'Search Result', }, { - location: { web: { url: 'https://example.com/doc', domain: 'example.com' } }, + location: { web: { url: 'https://example.com/doc' } }, source: 'web-0', sourceContent: [{ text: 'web source' }], title: 'Web Page', diff --git a/src/models/bedrock.ts b/src/models/bedrock.ts index 0e8eed7c..95ed417a 100644 --- a/src/models/bedrock.ts +++ b/src/models/bedrock.ts @@ -1183,10 +1183,8 @@ export class BedrockModel extends Model { const { type: _, ...fields } = location return { searchResultLocation: fields } } - case 'web': { - const { type: _, ...fields } = location - return { web: fields } - } + case 'web': + return { web: { url: location.url } } default: return location as unknown as BedrockCitationLocation } diff --git a/test/integ/agent.test.ts b/test/integ/agent.test.ts index 951d14b6..a21dedbe 100644 --- a/test/integ/agent.test.ts +++ b/test/integ/agent.test.ts @@ -264,115 +264,102 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode }) describe.skipIf(!supports.citations)('Citations', () => { + const documentText = [ + 'France is a country in Western Europe. Its capital is Paris, which is known as the City of Light.', + 'Paris has a population of approximately 2.1 million people in the city proper.', + 'The Eiffel Tower, built in 1889, is the most visited paid monument in the world.', + 'France is the most visited country in the world, with over 89 million tourists annually.', + 'The French Revolution of 1789 was a pivotal event in world history.', + ].join(' ') + const textDocBlock = new DocumentBlock({ name: 'test-document', format: 'txt', - source: { text: 'The capital of France is Paris. It is known as the City of Light.' }, + source: { content: [{ text: documentText }] }, citations: { enabled: true }, }) - const textDocPrompt = new TextBlock('What is the capital of France according to the document? Answer briefly.') - - describe.each([ - { label: 'streaming', modelOptions: {} }, - { label: 'non-streaming', modelOptions: { stream: false } }, - ])('$label', ({ modelOptions }) => { - it('returns documentChar citations from text document and preserves them in multi-turn', async () => { - const agent = new Agent({ - model: createModel(modelOptions), - printer: false, - }) - - const result = await agent.invoke([textDocBlock, textDocPrompt]) - - expect(result.stopReason).toBe('endTurn') - - const citationsBlock = result.lastMessage.content.find( - (block): block is CitationsBlock => block.type === 'citationsBlock' - ) - expect(citationsBlock).toBeDefined() - expect(citationsBlock!.citations.length).toBeGreaterThan(0) - expect(citationsBlock!.content.length).toBeGreaterThan(0) - expect(citationsBlock!.content[0]!.text).toBeDefined() - - const citation = citationsBlock!.citations[0]! - expect(citation.location).toBeDefined() - expect(citation.location.type).toBe('documentChar') - - // Verify all inner fields are present (Bedrock docs say "Not Required" but we expect them) - if (citation.location.type === 'documentChar') { - expect(typeof citation.location.documentIndex).toBe('number') - expect(typeof citation.location.start).toBe('number') - expect(typeof citation.location.end).toBe('number') - } - - expect(citation.sourceContent.length).toBeGreaterThan(0) - expect(citation.sourceContent[0]!.text).toBeDefined() - expect(typeof citation.source).toBe('string') - expect(typeof citation.title).toBe('string') - - // Second turn: verify citations survive in conversation history - const followUp = await agent.invoke('What else does the document say about that city?') - expect(followUp.stopReason).toBe('endTurn') - expect(followUp.lastMessage.role).toBe('assistant') - expect(followUp.lastMessage.content.length).toBeGreaterThan(0) + const textDocPrompt = new TextBlock( + 'Using the document, what is the capital of France and what is it known for? Cite specific details.' + ) + + it('returns documentChunk citations from text document', async () => { + const agent = new Agent({ + model: createModel({ stream: false }), + printer: false, }) - it('returns documentPage citations from PDF document and preserves them in multi-turn', async () => { - const pdfBytes = await loadFixture(letterPdfUrl) + const result = await agent.invoke([textDocBlock, textDocPrompt]) - const agent = new Agent({ - model: createModel(modelOptions), - printer: false, - }) + expect(result.stopReason).toBe('endTurn') - const result = await agent.invoke([ - new DocumentBlock({ - name: 'letter', - format: 'pdf', - source: { bytes: pdfBytes }, - citations: { enabled: true }, + const citationsBlock = result.lastMessage.content.find( + (block): block is CitationsBlock => block.type === 'citationsBlock' + ) + expect(citationsBlock).toBeDefined() + expect(citationsBlock!.citations).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + location: expect.objectContaining({ type: 'documentChunk' }), + source: expect.any(String), + title: expect.any(String), + sourceContent: expect.arrayContaining([expect.objectContaining({ text: expect.any(String) })]), }), - new TextBlock('Summarize this document briefly.'), ]) + ) + expect(citationsBlock!.content).toEqual( + expect.arrayContaining([expect.objectContaining({ text: expect.any(String) })]) + ) + }) - expect(result.stopReason).toBe('endTurn') - - const citationsBlock = result.lastMessage.content.find( - (block): block is CitationsBlock => block.type === 'citationsBlock' - ) - expect(citationsBlock).toBeDefined() - expect(citationsBlock!.citations.length).toBeGreaterThan(0) - expect(citationsBlock!.content.length).toBeGreaterThan(0) - expect(citationsBlock!.content[0]!.text).toBeDefined() - - const citation = citationsBlock!.citations[0]! - expect(citation.location).toBeDefined() - expect(citation.location.type).toBe('documentPage') - - // Verify all inner fields are present (Bedrock docs say "Not Required" but we expect them) - if (citation.location.type === 'documentPage') { - expect(typeof citation.location.documentIndex).toBe('number') - expect(typeof citation.location.start).toBe('number') - expect(typeof citation.location.end).toBe('number') - } - - expect(citation.sourceContent.length).toBeGreaterThan(0) - expect(citation.sourceContent[0]!.text).toBeDefined() - expect(typeof citation.source).toBe('string') - expect(typeof citation.title).toBe('string') - - // Second turn: verify citations survive in conversation history - const followUp = await agent.invoke('What else can you tell me about this document?') - expect(followUp.stopReason).toBe('endTurn') - expect(followUp.lastMessage.role).toBe('assistant') - expect(followUp.lastMessage.content.length).toBeGreaterThan(0) + it('returns documentPage citations from PDF document and preserves them in multi-turn', async () => { + const pdfBytes = await loadFixture(letterPdfUrl) + + const agent = new Agent({ + model: createModel({ stream: false }), + printer: false, }) + + const result = await agent.invoke([ + new DocumentBlock({ + name: 'letter', + format: 'pdf', + source: { bytes: pdfBytes }, + citations: { enabled: true }, + }), + new TextBlock('Summarize this document briefly.'), + ]) + + expect(result.stopReason).toBe('endTurn') + + const citationsBlock = result.lastMessage.content.find( + (block): block is CitationsBlock => block.type === 'citationsBlock' + ) + expect(citationsBlock).toBeDefined() + expect(citationsBlock!.citations).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + location: expect.objectContaining({ type: 'documentPage' }), + source: expect.any(String), + title: expect.any(String), + sourceContent: expect.arrayContaining([expect.objectContaining({ text: expect.any(String) })]), + }), + ]) + ) + expect(citationsBlock!.content).toEqual( + expect.arrayContaining([expect.objectContaining({ text: expect.any(String) })]) + ) + + // Second turn: verify citations survive in conversation history + const followUp = await agent.invoke('What else can you tell me about this document?') + expect(followUp.stopReason).toBe('endTurn') + expect(followUp.lastMessage.role).toBe('assistant') + expect(followUp.lastMessage.content.length).toBeGreaterThan(0) }) - it('emits citationsContentDelta events during streaming', async () => { + it('emits citationsContentDelta events via agent.stream()', async () => { const agent = new Agent({ - model: createModel(), + model: createModel({ stream: false }), printer: false, }) @@ -380,7 +367,6 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode expect(result.stopReason).toBe('endTurn') - // Verify citationsContentDelta events were emitted during streaming const citationDeltas = items.filter( (item) => item.type === 'modelStreamUpdateEvent' && @@ -389,76 +375,12 @@ describe.each(allProviders)('Agent with $name', ({ name, skip, createModel, mode ) expect(citationDeltas.length).toBeGreaterThan(0) - // Verify the aggregated result also contains the CitationsBlock const citationsBlock = result.lastMessage.content.find( (block): block is CitationsBlock => block.type === 'citationsBlock' ) expect(citationsBlock).toBeDefined() expect(citationsBlock!.citations.length).toBeGreaterThan(0) }) - - it('preserves all CitationLocation variants in multi-turn conversation history', async () => { - const agent = new Agent({ - model: createModel(), - printer: false, - }) - - // Seed conversation with an assistant message containing all 5 citation location variants - agent.messages.push( - new Message({ - role: 'user', - content: [new TextBlock('Tell me about these sources.')], - }), - new Message({ - role: 'assistant', - content: [ - new CitationsBlock({ - citations: [ - { - location: { type: 'documentChar', documentIndex: 0, start: 150, end: 300 }, - source: 'doc-0', - sourceContent: [{ text: 'char source content' }], - title: 'Text Document', - }, - { - location: { type: 'documentPage', documentIndex: 0, start: 2, end: 3 }, - source: 'doc-0', - sourceContent: [{ text: 'page source content' }], - title: 'PDF Document', - }, - { - location: { type: 'documentChunk', documentIndex: 1, start: 5, end: 8 }, - source: 'doc-1', - sourceContent: [{ text: 'chunk source content' }], - title: 'Chunked Document', - }, - { - location: { type: 'searchResult', searchResultIndex: 0, start: 25, end: 150 }, - source: 'search-0', - sourceContent: [{ text: 'search source content' }], - title: 'Search Result', - }, - { - location: { type: 'web', url: 'https://example.com/doc', domain: 'example.com' }, - source: 'web-0', - sourceContent: [{ text: 'web source content' }], - title: 'Web Page', - }, - ], - content: [{ text: 'Here is information from all five source types.' }], - }), - new TextBlock('I found information from multiple source types.'), - ], - }) - ) - - // Follow-up turn forces Bedrock to accept all 5 variants in conversation history - const result = await agent.invoke('Can you summarize what you told me?') - - expect(result.stopReason).toBe('endTurn') - expect(result.lastMessage.role).toBe('assistant') - expect(result.lastMessage.content.length).toBeGreaterThan(0) - }) }) describe.skipIf(!supports.images)('multimodal input', () => {