diff --git a/packages/super-editor/src/extensions/diffing/algorithm/attributes-diffing.test.js b/packages/super-editor/src/extensions/diffing/algorithm/attributes-diffing.test.js new file mode 100644 index 000000000..cf80ae640 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/attributes-diffing.test.js @@ -0,0 +1,166 @@ +import { describe, it, expect } from 'vitest'; +import { getAttributesDiff } from './attributes-diffing.ts'; + +describe('getAttributesDiff', () => { + it('detects nested additions, deletions, and modifications', () => { + const objectA = { + id: 1, + name: 'Alice', + age: 30, + config: { + theme: 'dark', + notifications: true, + additional: { + layout: 'grid', + itemsPerPage: 10, + }, + }, + }; + + const objectB = { + id: 1, + name: 'Alice Smith', + config: { + theme: 'light', + additional: { + layout: 'list', + itemsPerPage: 10, + showSidebar: true, + }, + }, + isActive: true, + }; + + const diff = getAttributesDiff(objectA, objectB); + + expect(diff).toEqual({ + added: { + isActive: true, + 'config.additional.showSidebar': true, + }, + deleted: { + age: 30, + 'config.notifications': true, + }, + modified: { + name: { from: 'Alice', to: 'Alice Smith' }, + 'config.theme': { from: 'dark', to: 'light' }, + 'config.additional.layout': { from: 'grid', to: 'list' }, + }, + }); + }); + + it('returns empty diff when objects are identical', () => { + const objectA = { + name: 'Same', + config: { + theme: 'dark', + }, + }; + + const diff = getAttributesDiff(objectA, { ...objectA }); + + expect(diff).toBeNull(); + }); + + it('handles whole-object additions, removals, and non-object replacements', () => { + const objectA = { + profile: { + preferences: { + email: true, + }, + }, + options: { + advanced: { + mode: 'auto', + }, + }, + }; + + const objectB = { + profile: {}, + options: { + advanced: 'manual', + }, + flags: ['a'], + }; + + const diff = getAttributesDiff(objectA, objectB); + + expect(diff.added).toEqual({ + flags: ['a'], + }); + expect(diff.deleted).toEqual({ + 'profile.preferences.email': true, + }); + expect(diff.modified).toEqual({ + 'options.advanced': { from: { mode: 'auto' }, to: 'manual' }, + }); + }); + + it('ignores keys defined in the ignored attribute list', () => { + const objectA = { + sdBlockId: '123', + nested: { + sdBlockId: '456', + value: 1, + }, + }; + + const objectB = { + nested: { + sdBlockId: '789', + value: 2, + }, + }; + + const diff = getAttributesDiff(objectA, objectB); + + expect(diff.added).toEqual({}); + expect(diff.deleted).toEqual({}); + expect(diff.modified).toEqual({ + 'nested.value': { from: 1, to: 2 }, + }); + }); + + it('handles array equality and modifications', () => { + const objectA = { + tags: ['alpha', 'beta'], + nested: { + metrics: [ + { name: 'views', value: 10 }, + { name: 'likes', value: 5 }, + ], + }, + }; + + const objectB = { + tags: ['alpha', 'beta'], + nested: { + metrics: [ + { name: 'views', value: 12 }, + { name: 'likes', value: 5 }, + ], + }, + }; + + let diff = getAttributesDiff(objectA, objectB); + expect(diff.added).toEqual({}); + expect(diff.deleted).toEqual({}); + expect(diff.modified).toEqual({ + 'nested.metrics': { + from: [ + { name: 'views', value: 10 }, + { name: 'likes', value: 5 }, + ], + to: [ + { name: 'views', value: 12 }, + { name: 'likes', value: 5 }, + ], + }, + }); + + diff = getAttributesDiff(objectA, { ...objectA }); + expect(diff).toBeNull(); + }); +}); diff --git a/packages/super-editor/src/extensions/diffing/algorithm/attributes-diffing.ts b/packages/super-editor/src/extensions/diffing/algorithm/attributes-diffing.ts new file mode 100644 index 000000000..06285514d --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/attributes-diffing.ts @@ -0,0 +1,303 @@ +const IGNORED_ATTRIBUTE_KEYS = new Set(['sdBlockId']); + +/** + * Represents a single attribute change capturing the previous and next values. + */ +export interface AttributeChange { + from: unknown; + to: unknown; +} + +/** + * Aggregated attribute diff broken down into added, deleted, and modified dotted paths. + */ +export interface AttributesDiff { + /** Attributes added in the new payload. */ + added: Record; + /** Attributes removed from the old payload. */ + deleted: Record; + /** Attributes that changed values between old and new payloads. */ + modified: Record; +} + +/** + * Aggregated marks diff broken down into added, deleted, and modified marks. + */ +export interface MarksDiff { + /** Marks added in the new payload. */ + added: { name: string; attrs: Record }[]; + /** Marks removed from the old payload. */ + deleted: { name: string; attrs: Record }[]; + /** Marks whose attributes changed between old and new payloads. */ + modified: { name: string; oldAttrs: Record; newAttrs: Record }[]; +} + +/** + * Computes the attribute level diff between two arbitrary objects. + * Produces a map of dotted paths to added, deleted and modified values. + * + * @param objectA Baseline attributes to compare. + * @param objectB Updated attributes to compare. + * @param ignoreKeys Additional attribute keys to ignore. + * @returns Structured diff or null when objects are effectively equal. + */ +export function getAttributesDiff( + objectA: Record | null | undefined = {}, + objectB: Record | null | undefined = {}, + ignoreKeys: string[] = [], +): AttributesDiff | null { + const diff: AttributesDiff = { + added: {}, + deleted: {}, + modified: {}, + }; + + const ignored = new Set([...IGNORED_ATTRIBUTE_KEYS, ...ignoreKeys]); + diffObjects(objectA ?? {}, objectB ?? {}, '', diff, ignored); + const hasChanges = + Object.keys(diff.added).length > 0 || Object.keys(diff.deleted).length > 0 || Object.keys(diff.modified).length > 0; + + return hasChanges ? diff : null; +} + +/** + * Computes the attribute level diff between two sets of ProseMirror marks. + * Produces a map of dotted paths to added, deleted and modified values. + * + * @param marksA Baseline marks to compare. + * @param marksB Updated marks to compare. + * @returns Structured diff or null when marks are effectively equal. + * + */ +export function getMarksDiff( + marksA: Array<{ type: string; attrs?: Record }> | null = [], + marksB: Array<{ type: string; attrs?: Record }> | null = [], +): MarksDiff | null { + marksA = marksA || []; + marksB = marksB || []; + + const normalizeMarkAttrs = (attrs?: Record): Record => { + if (!attrs) { + return {}; + } + const normalized: Record = {}; + for (const [key, value] of Object.entries(attrs)) { + if (IGNORED_ATTRIBUTE_KEYS.has(key)) { + continue; + } + normalized[key] = value; + } + return normalized; + }; + const marksDiff: MarksDiff = { + added: [], + deleted: [], + modified: [], + }; + const marksMapA = new Map>(); + const marksMapB = new Map>(); + + for (const mark of marksA) { + marksMapA.set(mark.type, normalizeMarkAttrs(mark.attrs)); + } + for (const mark of marksB) { + marksMapB.set(mark.type, normalizeMarkAttrs(mark.attrs)); + } + + const markNames = new Set([...marksMapA.keys(), ...marksMapB.keys()]); + for (const name of markNames) { + const attrsA = marksMapA.get(name); + const attrsB = marksMapB.get(name); + + if (attrsA && !attrsB) { + marksDiff.deleted.push({ name, attrs: attrsA }); + continue; + } + + if (!attrsA && attrsB) { + marksDiff.added.push({ name, attrs: attrsB }); + continue; + } + + if (attrsA && attrsB && !deepEquals(attrsA, attrsB)) { + marksDiff.modified.push({ name, oldAttrs: attrsA, newAttrs: attrsB }); + } + } + + const hasChanges = marksDiff.added.length > 0 || marksDiff.deleted.length > 0 || marksDiff.modified.length > 0; + return hasChanges ? marksDiff : null; +} + +/** + * Recursively compares two objects and fills the diff buckets. + * + * @param objectA Baseline attributes being inspected. + * @param objectB Updated attributes being inspected. + * @param basePath Dotted path prefix used for nested keys. + * @param diff Aggregated diff being mutated. + * @param ignoreKeys Set of attribute keys to ignore. + */ +function diffObjects( + objectA: Record, + objectB: Record, + basePath: string, + diff: AttributesDiff, + ignoreKeys: Set, +): void { + const keys = new Set([...Object.keys(objectA || {}), ...Object.keys(objectB || {})]); + + for (const key of keys) { + if (ignoreKeys.has(key)) { + continue; + } + + const path = joinPath(basePath, key); + const hasA = Object.prototype.hasOwnProperty.call(objectA, key); + const hasB = Object.prototype.hasOwnProperty.call(objectB, key); + + if (hasA && !hasB) { + recordDeletedValue(objectA[key], path, diff, ignoreKeys); + continue; + } + + if (!hasA && hasB) { + recordAddedValue(objectB[key], path, diff, ignoreKeys); + continue; + } + + const valueA = objectA[key]; + const valueB = objectB[key]; + + if (isPlainObject(valueA) && isPlainObject(valueB)) { + diffObjects(valueA, valueB, path, diff, ignoreKeys); + continue; + } + + if (Array.isArray(valueA) && Array.isArray(valueB)) { + if (valueA.length === valueB.length && valueA.every((item, index) => deepEquals(item, valueB[index]))) { + continue; + } + } + + if (!deepEquals(valueA, valueB)) { + diff.modified[path] = { + from: valueA, + to: valueB, + }; + } + } +} + +/** + * Records a nested value as an addition, flattening objects into dotted paths. + * + * @param value Value being marked as added. + * @param path Dotted attribute path for the value. + * @param diff Bucket used to capture additions. + * @param ignoreKeys Set of attribute keys to ignore. + */ +function recordAddedValue( + value: unknown, + path: string, + diff: Pick, + ignoreKeys: Set, +): void { + if (isPlainObject(value)) { + for (const [childKey, childValue] of Object.entries(value)) { + if (ignoreKeys.has(childKey)) { + continue; + } + recordAddedValue(childValue, joinPath(path, childKey), diff, ignoreKeys); + } + return; + } + diff.added[path] = value; +} + +/** + * Records a nested value as a deletion, flattening objects into dotted paths. + * + * @param value Value being marked as removed. + * @param path Dotted attribute path for the value. + * @param diff Bucket used to capture deletions. + * @param ignoreKeys Set of attribute keys to ignore. + */ +function recordDeletedValue( + value: unknown, + path: string, + diff: Pick, + ignoreKeys: Set, +): void { + if (isPlainObject(value)) { + for (const [childKey, childValue] of Object.entries(value)) { + if (ignoreKeys.has(childKey)) { + continue; + } + recordDeletedValue(childValue, joinPath(path, childKey), diff, ignoreKeys); + } + return; + } + diff.deleted[path] = value; +} + +/** + * Builds dotted attribute paths. + * + * @param base Existing path prefix. + * @param key Current key being appended. + * @returns Combined dotted path. + */ +function joinPath(base: string, key: string): string { + return base ? `${base}.${key}` : key; +} + +/** + * Determines if a value is a plain object (no arrays or nulls). + * + * @param value Value to inspect. + * @returns True when the value is a non-null object. + */ +function isPlainObject(value: unknown): value is Record { + return Boolean(value) && typeof value === 'object' && !Array.isArray(value); +} + +/** + * Checks deep equality for primitives, arrays, and plain objects. + * + * @param a First value. + * @param b Second value. + * @returns True when both values are deeply equal. + */ +function deepEquals(a: unknown, b: unknown): boolean { + if (a === b) { + return true; + } + + if (Array.isArray(a) && Array.isArray(b)) { + if (a.length !== b.length) { + return false; + } + for (let i = 0; i < a.length; i++) { + if (!deepEquals(a[i], b[i])) { + return false; + } + } + return true; + } + + if (isPlainObject(a) && isPlainObject(b)) { + const keysA = Object.keys(a); + const keysB = Object.keys(b); + if (keysA.length !== keysB.length) { + return false; + } + for (const key of keysA) { + if (!deepEquals(a[key], b[key])) { + return false; + } + } + return true; + } + + return false; +} diff --git a/packages/super-editor/src/extensions/diffing/algorithm/comment-diffing.test.ts b/packages/super-editor/src/extensions/diffing/algorithm/comment-diffing.test.ts new file mode 100644 index 000000000..6dc000f31 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/comment-diffing.test.ts @@ -0,0 +1,273 @@ +import { describe, expect, it } from 'vitest'; +import { Schema } from 'prosemirror-model'; +import { + buildAddedCommentDiff, + buildCommentTokens, + buildDeletedCommentDiff, + buildModifiedCommentDiff, + canTreatAsModification, + commentComparator, + diffComments, + shouldProcessEqualAsModification, +} from './comment-diffing.ts'; + +/** + * Builds a minimal schema suitable for comment text tokenization. + * + * @returns {Schema} + */ +const createSchema = () => + new Schema({ + nodes: { + doc: { content: 'block+' }, + paragraph: { content: 'inline*', group: 'block' }, + text: { group: 'inline' }, + }, + marks: {}, + }); + +/** + * Builds a basic comment body JSON payload. + * + * @param {string} text Comment text content. + * @returns {Record} + */ +const buildCommentTextJson = (text) => ({ + type: 'paragraph', + content: [{ type: 'text', text }], +}); + +/** + * Returns the first token for convenience in tests. + * + * @param {Array} tokens + * @returns {import('./comment-diffing.ts').CommentToken} + */ +const getFirstToken = (tokens) => tokens[0]; + +describe('buildCommentTokens', () => { + it('builds tokens and text for comments with commentId', () => { + const schema = createSchema(); + const comment = { + commentId: 'c-1', + textJson: buildCommentTextJson('Hello'), + isInternal: true, + }; + + const tokens = buildCommentTokens([comment], schema); + expect(tokens).toHaveLength(1); + expect(tokens[0]?.commentId).toBe('c-1'); + expect(tokens[0]?.content?.fullText).toBe('Hello'); + expect(tokens[0]?.content?.text).toHaveLength(5); + expect(tokens[0]?.commentJSON).toBe(comment); + }); + + it('falls back to importedId when commentId is missing', () => { + const schema = createSchema(); + const comment = { + importedId: 'import-1', + textJson: buildCommentTextJson('Import'), + }; + + const tokens = buildCommentTokens([comment], schema); + expect(tokens).toHaveLength(1); + expect(tokens[0]?.commentId).toBe('import-1'); + }); + + it('returns empty text when textJson is missing', () => { + const schema = createSchema(); + const comment = { + commentId: 'c-2', + textJson: null, + }; + + const tokens = buildCommentTokens([comment], schema); + expect(tokens).toHaveLength(1); + expect(tokens[0]?.content).toBeNull(); + }); + + it('returns a base node info when the root node is not a paragraph', () => { + const schema = createSchema(); + const comment = { + commentId: 'c-3', + textJson: { type: 'text', text: 'Inline' }, + }; + + const tokens = buildCommentTokens([comment], schema); + expect(tokens).toHaveLength(1); + expect(tokens[0]?.content).toMatchObject({ + pos: 0, + depth: 0, + }); + expect(tokens[0]?.content?.node?.type?.name).toBe('text'); + }); + + it('skips comments without a resolvable id', () => { + const schema = createSchema(); + const comment = { + textJson: buildCommentTextJson('No id'), + }; + + const tokens = buildCommentTokens([comment], schema); + expect(tokens).toEqual([]); + }); +}); + +describe('comment diff helpers', () => { + it('matches comments by id', () => { + const schema = createSchema(); + const oldToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('A') }], schema), + ); + const newToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('B') }], schema), + ); + + expect(commentComparator(oldToken, newToken)).toBe(true); + }); + + it('treats metadata changes as modifications', () => { + const schema = createSchema(); + const oldToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('Text'), isDone: false }], schema), + ); + const newToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('Text'), isDone: true }], schema), + ); + + expect(shouldProcessEqualAsModification(oldToken, newToken)).toBe(true); + }); + + it('treats content changes as modifications', () => { + const schema = createSchema(); + const oldToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('Old') }], schema), + ); + const newToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('New') }], schema), + ); + + expect(shouldProcessEqualAsModification(oldToken, newToken)).toBe(true); + }); + + it('returns false for identical comments', () => { + const schema = createSchema(); + const oldToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('Same') }], schema), + ); + const newToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('Same') }], schema), + ); + + expect(shouldProcessEqualAsModification(oldToken, newToken)).toBe(false); + }); + + it('does not treat insert/delete pairs as modifications', () => { + expect(canTreatAsModification()).toBe(false); + }); + + it('builds added comment diffs with text', () => { + const schema = createSchema(); + const token = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('Added') }], schema), + ); + + expect(buildAddedCommentDiff(token)).toEqual({ + action: 'added', + nodeType: 'comment', + commentId: 'c-1', + commentJSON: token.commentJSON, + text: 'Added', + }); + }); + + it('builds deleted comment diffs with old text', () => { + const schema = createSchema(); + const token = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('Deleted') }], schema), + ); + + expect(buildDeletedCommentDiff(token)).toEqual({ + action: 'deleted', + nodeType: 'comment', + commentId: 'c-1', + commentJSON: token.commentJSON, + oldText: 'Deleted', + }); + }); + + it('builds modified comment diffs when content changes', () => { + const schema = createSchema(); + const oldToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('Old') }], schema), + ); + const newToken = getFirstToken( + buildCommentTokens([{ commentId: 'c-1', textJson: buildCommentTextJson('New') }], schema), + ); + + const diff = buildModifiedCommentDiff(oldToken, newToken); + expect(diff).toMatchObject({ + action: 'modified', + nodeType: 'comment', + commentId: 'c-1', + oldText: 'Old', + newText: 'New', + }); + expect(diff?.contentDiff).not.toEqual([]); + expect(diff?.attrsDiff).toBeNull(); + }); +}); + +describe('diffComments', () => { + it('returns added comment diffs for new comments', () => { + const schema = createSchema(); + const diffs = diffComments([], [{ commentId: 'c-1', textJson: buildCommentTextJson('Added') }], schema); + + expect(diffs).toHaveLength(1); + expect(diffs[0]).toMatchObject({ + action: 'added', + nodeType: 'comment', + commentId: 'c-1', + }); + }); + + it('returns deleted comment diffs for removed comments', () => { + const schema = createSchema(); + const diffs = diffComments([{ commentId: 'c-1', textJson: buildCommentTextJson('Removed') }], [], schema); + + expect(diffs).toHaveLength(1); + expect(diffs[0]).toMatchObject({ + action: 'deleted', + nodeType: 'comment', + commentId: 'c-1', + }); + }); + + it('returns modified comment diffs for content changes', () => { + const schema = createSchema(); + const diffs = diffComments( + [{ commentId: 'c-1', textJson: buildCommentTextJson('Old') }], + [{ commentId: 'c-1', textJson: buildCommentTextJson('New') }], + schema, + ); + + expect(diffs).toHaveLength(1); + expect(diffs[0]).toMatchObject({ + action: 'modified', + nodeType: 'comment', + commentId: 'c-1', + }); + expect(diffs[0].contentDiff).not.toEqual([]); + }); + + it('returns empty diffs for identical comments', () => { + const schema = createSchema(); + const diffs = diffComments( + [{ commentId: 'c-1', textJson: buildCommentTextJson('Same') }], + [{ commentId: 'c-1', textJson: buildCommentTextJson('Same') }], + schema, + ); + + expect(diffs).toEqual([]); + }); +}); diff --git a/packages/super-editor/src/extensions/diffing/algorithm/comment-diffing.ts b/packages/super-editor/src/extensions/diffing/algorithm/comment-diffing.ts new file mode 100644 index 000000000..3357cc484 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/comment-diffing.ts @@ -0,0 +1,287 @@ +import type { Schema } from 'prosemirror-model'; +import { diffNodes, type NodeDiff, type NodeInfo } from './generic-diffing.ts'; +import { getAttributesDiff, type AttributesDiff } from './attributes-diffing.ts'; +import { createParagraphSnapshot, type ParagraphNodeInfo } from './paragraph-diffing.ts'; +import { diffSequences } from './sequence-diffing.ts'; + +/** + * Raw comment data used for diffing comment content and metadata. + */ +export interface CommentInput { + /** Primary comment identifier when available. */ + commentId?: string; + /** Imported comment identifier used as a fallback. */ + importedId?: string; + /** Alternate identifier used by some integrations. */ + id?: string; + /** ProseMirror-compatible JSON for the comment body (expected to be a paragraph node). */ + textJson?: unknown; + /** Additional comment metadata fields. */ + [key: string]: unknown; +} + +/** + * Normalized token representation for a single comment. + */ +export interface CommentToken { + /** Resolved identifier for the comment. */ + commentId: string; + /** Original comment payload. */ + commentJSON: CommentInput; + /** Parsed comment body content when available. */ + content: NodeInfo | null; +} + +/** + * Base shape shared by every comment diff payload. + */ +export interface CommentDiffBase { + /** Change type for this comment. */ + action: Action; + /** Node type identifier for comment diffs. */ + nodeType: 'comment'; + /** Resolved comment identifier (importedId → id → commentId). */ + commentId: string; +} + +/** + * Diff payload describing an added comment. + */ +export type CommentAddedDiff = CommentDiffBase<'added'> & { + /** Serialized comment payload inserted into the document. */ + commentJSON: CommentInput; + /** Plain-text representation of the comment body. */ + text: string; +}; + +/** + * Diff payload describing a deleted comment. + */ +export type CommentDeletedDiff = CommentDiffBase<'deleted'> & { + /** Serialized comment payload removed from the document. */ + commentJSON: CommentInput; + /** Plain-text representation of the removed comment body. */ + oldText: string; +}; + +/** + * Diff payload describing a modified comment. + */ +export type CommentModifiedDiff = CommentDiffBase<'modified'> & { + /** Serialized comment payload before the change. */ + oldCommentJSON: CommentInput; + /** Serialized comment payload after the change. */ + newCommentJSON: CommentInput; + /** Plain-text content before the change. */ + oldText: string; + /** Plain-text content after the change. */ + newText: string; + /** Node-level diff for the comment body content. */ + contentDiff: NodeDiff[]; + /** Attribute-level diff for comment metadata. */ + attrsDiff: AttributesDiff | null; +}; + +/** + * Union of every diff variant the comment diffing logic can produce. + */ +export type CommentDiff = CommentAddedDiff | CommentDeletedDiff | CommentModifiedDiff; + +/** + * Builds normalized tokens for diffing comment content. + * + * @param comments Comment payloads to normalize. + * @param schema Schema used to build ProseMirror nodes from comment JSON. + * @returns Normalized comment tokens. + */ +export function buildCommentTokens(comments: CommentInput[], schema: Schema): CommentToken[] { + return comments + .map((comment) => { + const commentId = resolveCommentId(comment); + if (!commentId) { + return null; + } + const content = tokenizeCommentText(comment, schema); + return { + commentId, + commentJSON: comment, + content, + }; + }) + .filter((token): token is CommentToken => token !== null); +} + +/** + * Computes diffs between two comment lists. + * + * @param oldComments Previous comment list. + * @param newComments Updated comment list. + * @param schema Schema used to parse comment bodies. + * @returns Comment diff payloads. + */ +export function diffComments(oldComments: CommentInput[], newComments: CommentInput[], schema: Schema): CommentDiff[] { + const oldTokens = buildCommentTokens(oldComments, schema); + const newTokens = buildCommentTokens(newComments, schema); + + return diffSequences(oldTokens, newTokens, { + comparator: commentComparator, + shouldProcessEqualAsModification, + canTreatAsModification: () => false, + buildAdded: (token) => buildAddedCommentDiff(token), + buildDeleted: (token) => buildDeletedCommentDiff(token), + buildModified: (oldToken, newToken) => buildModifiedCommentDiff(oldToken, newToken), + }); +} + +/** + * Compares two comment tokens to determine if they represent the same comment. + * + * @param oldToken Comment token from the old list. + * @param newToken Comment token from the new list. + * @returns True when comment ids match. + */ +export function commentComparator(oldToken: CommentToken, newToken: CommentToken): boolean { + return oldToken.commentId === newToken.commentId; +} + +/** + * Determines whether equal comment tokens should still be treated as modified. + * + * @param oldToken Comment token from the old list. + * @param newToken Comment token from the new list. + * @returns True when content or metadata differs. + */ +export function shouldProcessEqualAsModification(oldToken: CommentToken, newToken: CommentToken): boolean { + const attrsDiff = getAttributesDiff(oldToken.commentJSON, newToken.commentJSON, ['textJson', 'commentId']); + if (attrsDiff) { + return true; + } + + const oldSignature = oldToken.content ? JSON.stringify(oldToken.content.node.toJSON()) : ''; + const newSignature = newToken.content ? JSON.stringify(newToken.content.node.toJSON()) : ''; + return oldSignature !== newSignature; +} + +/** + * Determines whether delete/insert pairs should be treated as modifications. + * + * @returns False because comment ids are treated as stable identities. + */ +export function canTreatAsModification(): boolean { + return false; +} + +/** + * Builds a normalized payload describing a comment addition. + * + * @param comment Comment token being added. + * @returns Diff payload for the added comment. + */ +export function buildAddedCommentDiff(comment: CommentToken): CommentAddedDiff { + return { + action: 'added', + nodeType: 'comment', + commentId: comment.commentId, + commentJSON: comment.commentJSON, + text: getCommentText(comment.content), + }; +} + +/** + * Builds a normalized payload describing a comment deletion. + * + * @param comment Comment token being deleted. + * @returns Diff payload for the deleted comment. + */ +export function buildDeletedCommentDiff(comment: CommentToken): CommentDeletedDiff { + return { + action: 'deleted', + nodeType: 'comment', + commentId: comment.commentId, + commentJSON: comment.commentJSON, + oldText: getCommentText(comment.content), + }; +} + +/** + * Builds the payload for a comment modification, including inline diffs when possible. + * + * @param oldComment Comment token from the old list. + * @param newComment Comment token from the new list. + * @returns Diff payload or null when no changes exist. + */ +export function buildModifiedCommentDiff( + oldComment: CommentToken, + newComment: CommentToken, +): CommentModifiedDiff | null { + const contentDiff = + oldComment.content && newComment.content ? diffNodes([oldComment.content], [newComment.content]) : []; + const attrsDiff = getAttributesDiff(oldComment.commentJSON, newComment.commentJSON, ['textJson', 'commentId']); + + if (contentDiff.length === 0 && !attrsDiff) { + return null; + } + + return { + action: 'modified', + nodeType: 'comment', + commentId: oldComment.commentId, + oldCommentJSON: oldComment.commentJSON, + newCommentJSON: newComment.commentJSON, + oldText: getCommentText(oldComment.content), + newText: getCommentText(newComment.content), + contentDiff, + attrsDiff, + }; +} + +/** + * Resolves a stable comment identifier from a comment payload. + * + * @param comment Comment payload to inspect. + * @returns Resolved comment id or null when unavailable. + */ +function resolveCommentId(comment: CommentInput): string | null { + return comment.importedId ?? comment.id ?? comment.commentId ?? null; +} + +/** + * Returns the flattened comment text when the content is a paragraph. + * + * @param content Comment content payload. + * @returns Flattened text string. + */ +function getCommentText(content: NodeInfo | null): string { + if (!content) { + return ''; + } + if (content.node.type.name === 'paragraph') { + const paragraphContent = content as ParagraphNodeInfo; + return paragraphContent.fullText; + } + return ''; +} + +/** + * Tokenizes a comment body into inline tokens and a flattened text string. + * + * @param comment Comment payload containing `textJson`. + * @param schema Schema used to build ProseMirror nodes. + * @returns Tokenization output for the comment body. + */ +function tokenizeCommentText(comment: CommentInput, schema: Schema): NodeInfo | null { + if (!comment.textJson) { + return null; + } + + const node = schema.nodeFromJSON(comment.textJson as Record); + if (node.type.name !== 'paragraph') { + return { + node, + pos: 0, + depth: 0, + }; + } + + return createParagraphSnapshot(node, 0, 0); +} diff --git a/packages/super-editor/src/extensions/diffing/algorithm/diff-utils.test.ts b/packages/super-editor/src/extensions/diffing/algorithm/diff-utils.test.ts new file mode 100644 index 000000000..63ef88bc2 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/diff-utils.test.ts @@ -0,0 +1,29 @@ +import { describe, it, expect } from 'vitest'; +import { getInsertionPos } from './diff-utils.ts'; + +const createNodeInfo = ({ pos = 0, depth = 0, nodeSize = 1 } = {}) => ({ + pos, + depth, + node: { nodeSize }, +}); + +describe('getInsertionPos', () => { + it('positions after previous node when depth matches', () => { + const previous = createNodeInfo({ pos: 10, depth: 2, nodeSize: 5 }); + expect(getInsertionPos(2, previous)).toBe(15); + }); + + it('falls back to previous position plus one when depth differs', () => { + const previous = createNodeInfo({ pos: 10, depth: 1, nodeSize: 3 }); + expect(getInsertionPos(2, previous)).toBe(11); + }); + + it('returns zero when there is no previous node info', () => { + expect(getInsertionPos(0, undefined)).toBe(0); + }); + + it('handles previous nodes lacking nodeSize safely', () => { + const previous = { pos: 5, depth: 1, node: {} } as any; + expect(getInsertionPos(1, previous)).toBe(5); + }); +}); diff --git a/packages/super-editor/src/extensions/diffing/algorithm/diff-utils.ts b/packages/super-editor/src/extensions/diffing/algorithm/diff-utils.ts new file mode 100644 index 000000000..0276dc42b --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/diff-utils.ts @@ -0,0 +1,30 @@ +import type { Node as PMNode } from 'prosemirror-model'; + +interface NodePositionInfo { + /** ProseMirror node reference. */ + node: PMNode; + /** Absolute position of the node in the document. */ + pos: number; + /** Depth of the node within the document tree. */ + depth: number; +} + +/** + * Computes the insertion point for a node relative to the previous node in the old document tree. + * + * When the previous node shares the same depth, the insertion + * is placed right after the previous node's position. Otherwise, the insertion + * is placed just after the previous node's opening position. + * + * @param currentDepth Depth of the node being inserted. + * @param previousNode Optional info about the preceding node from the old document. + * @returns Absolute document position where the new node should be inserted. + */ +export function getInsertionPos(currentDepth: number, previousNode?: NodePositionInfo): number { + if (currentDepth === previousNode?.depth) { + const previousPos = previousNode?.pos ?? -1; + const previousSize = previousNode?.node.nodeSize ?? 0; + return previousPos >= 0 ? previousPos + previousSize : 0; + } + return (previousNode?.pos ?? -1) + 1; +} diff --git a/packages/super-editor/src/extensions/diffing/algorithm/generic-diffing.test.js b/packages/super-editor/src/extensions/diffing/algorithm/generic-diffing.test.js new file mode 100644 index 000000000..f57bd11d9 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/generic-diffing.test.js @@ -0,0 +1,235 @@ +import { describe, it, expect } from 'vitest'; +import { diffNodes, normalizeNodes } from './generic-diffing.ts'; + +const createDocFromNodes = (nodes = []) => { + const docNode = { + type: { name: 'doc', spec: {} }, + descendants(callback) { + const childIndexMap = new WeakMap(); + const depthStack = [docNode]; + for (const entry of nodes) { + const { node, pos, depth = 1 } = entry; + depthStack.length = depth; + const parentNode = depthStack[depth - 1] ?? docNode; + const currentIndex = childIndexMap.get(parentNode) ?? 0; + childIndexMap.set(parentNode, currentIndex + 1); + callback(node, pos, parentNode, currentIndex); + depthStack[depth] = node; + } + }, + }; + + return docNode; +}; + +const buildSimpleNode = (typeName, attrs = {}, options = {}) => { + const { nodeSize = 2, children = [] } = options; + const node = { + attrs, + type: { name: typeName, spec: {} }, + nodeSize, + descendants(cb) { + children.forEach((child, index) => { + cb(child, index + 1); + if (typeof child.descendants === 'function') { + child.descendants(cb); + } + }); + }, + }; + node.toJSON = () => ({ type: node.type.name, attrs: node.attrs }); + return node; +}; + +const createParagraph = (text, attrs = {}, options = {}) => { + const { pos = 0, textAttrs = {}, depth = 1 } = options; + const paragraphNode = { + attrs, + type: { name: 'paragraph', spec: {} }, + nodeSize: text.length + 2, + content: { size: text.length }, + nodesBetween(_from, _to, callback) { + if (!text.length) { + return; + } + callback( + { + isText: true, + text, + type: { name: 'text', spec: {} }, + isLeaf: false, + isInline: true, + }, + 1, + ); + }, + nodeAt() { + return { attrs: textAttrs }; + }, + }; + paragraphNode.toJSON = () => ({ type: paragraphNode.type.name, attrs: paragraphNode.attrs }); + + return { node: paragraphNode, pos, depth }; +}; + +describe('diffParagraphs', () => { + it('treats similar paragraphs without IDs as modifications', () => { + const oldParagraphs = [createParagraph('Hello world from ProseMirror.')]; + const newParagraphs = [createParagraph('Hello brave new world from ProseMirror.')]; + const oldRoot = createDocFromNodes(oldParagraphs); + const newRoot = createDocFromNodes(newParagraphs); + + const diffs = diffNodes(normalizeNodes(oldRoot), normalizeNodes(newRoot)); + + expect(diffs).toHaveLength(1); + expect(diffs[0].action).toBe('modified'); + expect(diffs[0].contentDiff.length).toBeGreaterThan(0); + }); + + it('keeps unrelated paragraphs as deletion + addition', () => { + const oldParagraphs = [createParagraph('Alpha paragraph with some text.')]; + const newParagraphs = [createParagraph('Zephyr quickly jinxed the new passage.')]; + const oldRoot = createDocFromNodes(oldParagraphs); + const newRoot = createDocFromNodes(newParagraphs); + + const diffs = diffNodes(normalizeNodes(oldRoot), normalizeNodes(newRoot)); + + expect(diffs).toHaveLength(2); + expect(diffs[0].action).toBe('deleted'); + expect(diffs[1].action).toBe('added'); + }); + + it('detects modifications even when Myers emits grouped deletes and inserts', () => { + const oldParagraphs = [ + createParagraph('Original introduction paragraph that needs tweaks.'), + createParagraph('Paragraph that will be removed.'), + ]; + const newParagraphs = [ + createParagraph('Original introduction paragraph that now has tweaks.'), + createParagraph('Completely different replacement paragraph.'), + ]; + const oldRoot = createDocFromNodes(oldParagraphs); + const newRoot = createDocFromNodes(newParagraphs); + + const diffs = diffNodes(normalizeNodes(oldRoot), normalizeNodes(newRoot)); + + expect(diffs).toHaveLength(3); + expect(diffs[0].action).toBe('modified'); + expect(diffs[0].contentDiff.length).toBeGreaterThan(0); + expect(diffs[1].action).toBe('deleted'); + expect(diffs[2].action).toBe('added'); + }); + + it('treats paragraph attribute-only changes as modifications', () => { + const oldParagraph = createParagraph('Consistent text', { align: 'left' }); + const newParagraph = createParagraph('Consistent text', { align: 'right' }); + const diffs = diffNodes( + normalizeNodes(createDocFromNodes([oldParagraph])), + normalizeNodes(createDocFromNodes([newParagraph])), + ); + + expect(diffs).toHaveLength(1); + expect(diffs[0].action).toBe('modified'); + expect(diffs[0].contentDiff).toEqual([]); + expect(diffs[0].attrsDiff?.modified?.align).toEqual({ from: 'left', to: 'right' }); + }); + + it('emits attribute diffs for non-paragraph nodes', () => { + const oldHeading = { node: buildSimpleNode('heading', { level: 1 }), pos: 0, depth: 1 }; + const newHeading = { node: buildSimpleNode('heading', { level: 2 }), pos: 0, depth: 1 }; + const diffs = diffNodes( + normalizeNodes(createDocFromNodes([oldHeading])), + normalizeNodes(createDocFromNodes([newHeading])), + ); + + expect(diffs).toHaveLength(1); + expect(diffs[0]).toMatchObject({ + action: 'modified', + nodeType: 'heading', + }); + expect(diffs[0].attrsDiff?.modified?.level).toEqual({ from: 1, to: 2 }); + }); + + it('deduplicates added nodes and their descendants', () => { + const childNode = buildSimpleNode('image'); + const parentNode = buildSimpleNode('figure', {}, { children: [childNode] }); + const oldParagraph = createParagraph('Base paragraph', {}, { pos: 0 }); + const newParagraph = createParagraph('Base paragraph', {}, { pos: 0 }); + const insertionPos = oldParagraph.pos + oldParagraph.node.nodeSize; + const diffs = diffNodes( + normalizeNodes(createDocFromNodes([oldParagraph])), + normalizeNodes( + createDocFromNodes([ + newParagraph, + { node: parentNode, pos: insertionPos, depth: 1 }, + { node: childNode, pos: insertionPos + 1, depth: 2 }, + ]), + ), + ); + + const additions = diffs.filter((diff) => diff.action === 'added'); + expect(additions).toHaveLength(1); + expect(additions[0].nodeType).toBe('figure'); + }); + + it('deduplicates deleted nodes and their descendants', () => { + const childNode = buildSimpleNode('image'); + const parentNode = buildSimpleNode('figure', {}, { children: [childNode] }); + const paragraph = createParagraph('Base paragraph', {}, { pos: 0 }); + const figurePos = paragraph.pos + paragraph.node.nodeSize; + + const diffs = diffNodes( + normalizeNodes( + createDocFromNodes([ + paragraph, + { node: parentNode, pos: figurePos, depth: 1 }, + { node: childNode, pos: figurePos + 1, depth: 2 }, + ]), + ), + normalizeNodes(createDocFromNodes([paragraph])), + ); + + const deletions = diffs.filter((diff) => diff.action === 'deleted'); + expect(deletions).toHaveLength(1); + expect(deletions[0].nodeType).toBe('figure'); + }); + + it('computes insertion position for nodes added to the beginning of a container', () => { + const oldRow = buildSimpleNode('tableRow', { paraId: 'row-1' }, { nodeSize: 4 }); + const oldTable = buildSimpleNode('table', {}, { nodeSize: 10, children: [oldRow] }); + const oldDoc = createDocFromNodes([ + { node: oldTable, pos: 0, depth: 1 }, + { node: oldRow, pos: 1, depth: 2 }, + ]); + + const insertedRow = buildSimpleNode('tableRow', { paraId: 'row-2' }, { nodeSize: 4 }); + const persistedRow = buildSimpleNode('tableRow', { paraId: 'row-1' }, { nodeSize: 4 }); + const newTable = buildSimpleNode('table', {}, { nodeSize: 14, children: [insertedRow, persistedRow] }); + const newDoc = createDocFromNodes([ + { node: newTable, pos: 0, depth: 1 }, + { node: insertedRow, pos: 1, depth: 2 }, + { node: persistedRow, pos: 1 + insertedRow.nodeSize, depth: 2 }, + ]); + + const diffs = diffNodes(normalizeNodes(oldDoc), normalizeNodes(newDoc)); + + const addition = diffs.find((diff) => diff.action === 'added' && diff.nodeType === 'tableRow'); + expect(addition).toBeDefined(); + expect(addition.pos).toBe(1); + }); + + it('computes insertion position based on the previous old node', () => { + const oldParagraph = createParagraph('Hello!', {}, { pos: 0 }); + const newParagraph = createParagraph('Hello!', {}, { pos: 0 }); + const headingNode = buildSimpleNode('heading', { level: 1 }, { nodeSize: 3 }); + const expectedPos = oldParagraph.pos + oldParagraph.node.nodeSize; + + const diffs = diffNodes( + normalizeNodes(createDocFromNodes([oldParagraph])), + normalizeNodes(createDocFromNodes([newParagraph, { node: headingNode, pos: expectedPos, depth: 1 }])), + ); + + const addition = diffs.find((diff) => diff.action === 'added' && diff.nodeType === 'heading'); + expect(addition?.pos).toBe(expectedPos); + }); +}); diff --git a/packages/super-editor/src/extensions/diffing/algorithm/generic-diffing.ts b/packages/super-editor/src/extensions/diffing/algorithm/generic-diffing.ts new file mode 100644 index 000000000..7af4418cb --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/generic-diffing.ts @@ -0,0 +1,239 @@ +import type { Node as PMNode } from 'prosemirror-model'; +import { + createParagraphSnapshot, + paragraphComparator, + canTreatAsModification as canTreatParagraphDeletionInsertionAsModification, + shouldProcessEqualAsModification as shouldProcessEqualParagraphsAsModification, + buildAddedParagraphDiff, + buildDeletedParagraphDiff, + buildModifiedParagraphDiff, + type ParagraphDiff, + type ParagraphNodeInfo, +} from './paragraph-diffing.ts'; +import { diffSequences, reorderDiffOperations } from './sequence-diffing.ts'; +import { getAttributesDiff, type AttributesDiff } from './attributes-diffing.ts'; +import { getInsertionPos } from './diff-utils.ts'; + +type NodeJSON = ReturnType; + +/** + * Minimal node metadata extracted during document traversal. + */ +export type BaseNodeInfo = { + /** ProseMirror node reference. */ + node: PMNode; + /** Absolute position of the node in the document. */ + pos: number; + /** Depth of the node within the document tree. */ + depth: number; +}; + +/** + * Union describing every node processed by the generic diff. + */ +export type NodeInfo = BaseNodeInfo | ParagraphNodeInfo; + +interface NodeDiffBase { + /** Change type for this node. */ + action: Action; + /** ProseMirror node type name. */ + nodeType: string; + /** Anchor position in the old document for replaying diffs. */ + pos: number; +} + +/** + * Diff payload describing an inserted non-paragraph node. + */ +interface NodeAddedDiff extends NodeDiffBase<'added'> { + /** Serialized node payload inserted into the document. */ + nodeJSON: NodeJSON; +} + +/** + * Diff payload describing a deleted non-paragraph node. + */ +interface NodeDeletedDiff extends NodeDiffBase<'deleted'> { + /** Serialized node payload removed from the document. */ + nodeJSON: NodeJSON; +} + +/** + * Diff payload describing an attribute-only change on non-paragraph nodes. + */ +interface NodeModifiedDiff extends NodeDiffBase<'modified'> { + /** Serialized node payload before the change. */ + oldNodeJSON: NodeJSON; + /** Serialized node payload after the change. */ + newNodeJSON: NodeJSON; + /** Attribute-level diff for the node. */ + attrsDiff: AttributesDiff; +} + +/** + * Union of every diff type emitted by the generic diffing layer. + */ +export type NodeDiff = ParagraphDiff | NodeAddedDiff | NodeDeletedDiff | NodeModifiedDiff; + +/** + * Produces a sequence diff between two normalized node lists. + * + * @param oldNodes Normalized nodes from the old document. + * @param newNodes Normalized nodes from the new document. + * @returns List of node diffs describing the changes. + */ +export function diffNodes(oldNodes: NodeInfo[], newNodes: NodeInfo[]): NodeDiff[] { + const addedNodesSet = new Set(); + const deletedNodesSet = new Set(); + return diffSequences(oldNodes, newNodes, { + comparator: nodeComparator, + reorderOperations: reorderDiffOperations, + shouldProcessEqualAsModification, + canTreatAsModification, + buildAdded: (nodeInfo, _oldIdx, previousOldNodeInfo) => + buildAddedDiff(nodeInfo, previousOldNodeInfo, addedNodesSet), + buildDeleted: (nodeInfo) => buildDeletedDiff(nodeInfo, deletedNodesSet), + buildModified: buildModifiedDiff, + }); +} + +/** + * Traverses a ProseMirror document and converts paragraphs to richer node info objects. + */ +export function normalizeNodes(pmDoc: PMNode): NodeInfo[] { + const nodes: NodeInfo[] = []; + const depthMap = new WeakMap(); + depthMap.set(pmDoc, -1); + + pmDoc.descendants((node, pos, parent) => { + const parentDepth = parent ? (depthMap.get(parent) ?? -1) : -1; + const depth = parentDepth + 1; + depthMap.set(node, depth); + if (node.type.name === 'paragraph') { + nodes.push(createParagraphSnapshot(node, pos, depth)); + return false; + } + nodes.push({ node, pos, depth }); + return undefined; + }); + return nodes; +} + +/** + * Compares two node infos to determine if they correspond to the same logical node. + * Paragraphs are compared with `paragraphComparator`, while other nodes are matched by type name. + */ +function nodeComparator(oldNodeInfo: NodeInfo, newNodeInfo: NodeInfo): boolean { + if (oldNodeInfo.node.type.name !== newNodeInfo.node.type.name) { + return false; + } + if (isParagraphNodeInfo(oldNodeInfo) && isParagraphNodeInfo(newNodeInfo)) { + return paragraphComparator(oldNodeInfo, newNodeInfo); + } else if ( + oldNodeInfo.node.type.name === 'tableRow' && + newNodeInfo.node.type.name === 'tableRow' && + oldNodeInfo.node.attrs.paraId && + newNodeInfo.node.attrs.paraId + ) { + return oldNodeInfo.node.attrs.paraId === newNodeInfo.node.attrs.paraId; + } + return true; +} + +/** + * Decides whether nodes deemed equal by the diff should still be emitted as modifications. + * Paragraph nodes leverage their specialized handler, while other nodes compare attribute JSON. + */ +function shouldProcessEqualAsModification(oldNodeInfo: NodeInfo, newNodeInfo: NodeInfo): boolean { + if (isParagraphNodeInfo(oldNodeInfo) && isParagraphNodeInfo(newNodeInfo)) { + return shouldProcessEqualParagraphsAsModification(oldNodeInfo, newNodeInfo); + } + return JSON.stringify(oldNodeInfo.node.attrs) !== JSON.stringify(newNodeInfo.node.attrs); +} + +/** + * Determines whether a delete/insert pair should instead be surfaced as a modification. + * Only paragraphs qualify because we can measure textual similarity; other nodes remain as-is. + */ +function canTreatAsModification(deletedNodeInfo: NodeInfo, insertedNodeInfo: NodeInfo): boolean { + if (isParagraphNodeInfo(deletedNodeInfo) && isParagraphNodeInfo(insertedNodeInfo)) { + return canTreatParagraphDeletionInsertionAsModification(deletedNodeInfo, insertedNodeInfo); + } + return false; +} + +/** + * Builds the diff payload for an inserted node and tracks descendants to avoid duplicates. + */ +function buildAddedDiff( + nodeInfo: NodeInfo, + previousOldNodeInfo: NodeInfo | undefined, + addedNodesSet: Set, +): NodeDiff | null { + if (addedNodesSet.has(nodeInfo.node)) { + return null; + } + addedNodesSet.add(nodeInfo.node); + if (isParagraphNodeInfo(nodeInfo)) { + return buildAddedParagraphDiff(nodeInfo, previousOldNodeInfo); + } + nodeInfo.node.descendants((childNode) => { + addedNodesSet.add(childNode); + }); + + return { + action: 'added', + nodeType: nodeInfo.node.type.name, + nodeJSON: nodeInfo.node.toJSON(), + pos: getInsertionPos(nodeInfo.depth, previousOldNodeInfo), + }; +} + +/** + * Builds the diff payload for a deleted node. + */ +function buildDeletedDiff(nodeInfo: NodeInfo, deletedNodesSet: Set): NodeDiff | null { + if (deletedNodesSet.has(nodeInfo.node)) { + return null; + } + deletedNodesSet.add(nodeInfo.node); + if (isParagraphNodeInfo(nodeInfo)) { + return buildDeletedParagraphDiff(nodeInfo); + } + nodeInfo.node.descendants((childNode) => { + deletedNodesSet.add(childNode); + }); + return { + action: 'deleted', + nodeType: nodeInfo.node.type.name, + nodeJSON: nodeInfo.node.toJSON(), + pos: nodeInfo.pos, + }; +} + +/** + * Builds the diff payload for a modified node. + * Paragraphs delegate to their inline-aware builder, while other nodes report attribute diffs. + */ +function buildModifiedDiff(oldNodeInfo: NodeInfo, newNodeInfo: NodeInfo): NodeDiff | null { + if (isParagraphNodeInfo(oldNodeInfo) && isParagraphNodeInfo(newNodeInfo)) { + return buildModifiedParagraphDiff(oldNodeInfo, newNodeInfo); + } + + const attrsDiff = getAttributesDiff(oldNodeInfo.node.attrs, newNodeInfo.node.attrs); + if (!attrsDiff) { + return null; + } + return { + action: 'modified', + nodeType: oldNodeInfo.node.type.name, + oldNodeJSON: oldNodeInfo.node.toJSON(), + newNodeJSON: newNodeInfo.node.toJSON(), + pos: oldNodeInfo.pos, + attrsDiff, + }; +} + +function isParagraphNodeInfo(nodeInfo: NodeInfo): nodeInfo is ParagraphNodeInfo { + return nodeInfo.node.type.name === 'paragraph'; +} diff --git a/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.test.js b/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.test.js new file mode 100644 index 000000000..e237fc99e --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.test.js @@ -0,0 +1,419 @@ +import { describe, it, expect, vi } from 'vitest'; +vi.mock('./myers-diff.ts', async () => { + const actual = await vi.importActual('./myers-diff.ts'); + return { + myersDiff: vi.fn(actual.myersDiff), + }; +}); +import { getInlineDiff, tokenizeInlineContent } from './inline-diffing.ts'; + +/** + * Builds text tokens with offsets for inline diff tests. + * + * @param {string} text Text content to tokenize. + * @param {Record} runAttrs Run attributes to attach. + * @param {number} offsetStart Offset base for the first token. + * @returns {import('./inline-diffing.ts').InlineTextToken[]} + */ +const buildTextRuns = (text, runAttrs = {}, offsetStart = 0) => + text.split('').map((char, index) => ({ + char, + runAttrs: { ...runAttrs }, + kind: 'text', + offset: offsetStart + index, + })); + +/** + * Builds marked text tokens with offsets for inline diff tests. + * + * @param {string} text Text content to tokenize. + * @param {Array>} marks Marks to attach. + * @param {Record} runAttrs Run attributes to attach. + * @param {number} offsetStart Offset base for the first token. + * @returns {import('./inline-diffing.ts').InlineTextToken[]} + */ +const buildMarkedTextRuns = (text, marks, runAttrs = {}, offsetStart = 0) => + text.split('').map((char, index) => ({ + char, + runAttrs: { ...runAttrs }, + kind: 'text', + offset: offsetStart + index, + marks, + })); + +/** + * Builds a mock inline-node token for diff tests. + * + * @param {Record} attrs Node attributes. + * @param {{ name: string }} type Node type descriptor. + * @param {number} pos Position offset for the inline node. + * @returns {import('./inline-diffing.ts').InlineNodeToken} + */ +const buildInlineNodeToken = (attrs = {}, type = { name: 'link' }, pos = 0) => { + const nodeAttrs = { ...attrs }; + return { + kind: 'inlineNode', + nodeType: 'link', + node: { + type, + attrs: nodeAttrs, + toJSON: () => ({ type: 'link', attrs: nodeAttrs }), + }, + nodeJSON: { type: 'link', attrs: nodeAttrs }, + pos, + }; +}; + +/** + * Builds text tokens without offsets for tokenizer assertions. + * + * @param {string} text Text content to tokenize. + * @param {Record} runAttrs Run attributes to attach. + * @param {Array>} marks Marks to attach. + * @returns {import('./inline-diffing.ts').InlineTextToken[]} + */ +const buildTextTokens = (text, runAttrs = {}, marks = []) => + text.split('').map((char) => ({ + char, + runAttrs, + kind: 'text', + marks, + })); + +/** + * Creates a mock inline container with configurable segments for tokenizer tests. + * + * @param {Array>} segments Inline segments to emit. + * @param {number | null} contentSize Optional content size override. + * @returns {import('prosemirror-model').Node} + */ +const createInlineContainer = (segments, contentSize) => { + const computedSegments = segments.map((segment) => { + if (segment.inlineNode) { + return { + ...segment, + kind: 'inline', + length: segment.length ?? 1, + start: segment.start ?? 0, + attrs: segment.attrs ?? segment.inlineNode.attrs ?? {}, + inlineNode: { + typeName: segment.inlineNode.typeName ?? 'inline', + attrs: segment.inlineNode.attrs ?? {}, + isLeaf: segment.inlineNode.isLeaf ?? true, + toJSON: + segment.inlineNode.toJSON ?? + (() => ({ + type: segment.inlineNode.typeName ?? 'inline', + attrs: segment.inlineNode.attrs ?? {}, + })), + }, + }; + } + + const segmentText = segment.text ?? segment.leafText(); + const length = segmentText.length; + return { + ...segment, + kind: segment.text != null ? 'text' : 'leaf', + length, + start: segment.start ?? 0, + attrs: segment.attrs ?? {}, + }; + }); + const size = + contentSize ?? computedSegments.reduce((max, segment) => Math.max(max, segment.start + segment.length), 0); + const attrsMap = new Map(); + computedSegments.forEach((segment) => { + const key = segment.kind === 'inline' ? segment.start : segment.start - 1; + attrsMap.set(key, segment.attrs); + }); + + return { + content: { size }, + nodesBetween: (from, to, callback) => { + computedSegments.forEach((segment) => { + if (segment.kind === 'text') { + callback({ isText: true, text: segment.text, marks: segment.marks ?? [] }, segment.start); + } else if (segment.kind === 'leaf') { + callback({ isLeaf: true, type: { spec: { leafText: segment.leafText } } }, segment.start); + } else { + callback( + { + isInline: true, + isLeaf: segment.inlineNode.isLeaf, + type: { name: segment.inlineNode.typeName, spec: {} }, + attrs: segment.inlineNode.attrs, + toJSON: () => ({ + type: segment.inlineNode.typeName, + attrs: segment.inlineNode.attrs, + }), + }, + segment.start, + ); + } + }); + }, + nodeAt: (pos) => ({ attrs: attrsMap.get(pos) ?? {} }), + }; +}; + +/** + * Strips positional fields from tokens for assertions. + * + * @param {import('./inline-diffing.ts').InlineDiffToken[]} tokens Tokens to normalize. + * @returns {Array>} + */ +const stripTokenOffsets = (tokens) => + tokens.map((token) => { + if (token.kind === 'text') { + return { + kind: token.kind, + char: token.char, + runAttrs: token.runAttrs, + marks: token.marks, + }; + } + return { + kind: token.kind, + nodeType: token.nodeType, + nodeJSON: token.nodeJSON, + }; + }); + +describe('getInlineDiff', () => { + it('returns an empty diff list when both strings are identical', () => { + const oldRuns = buildTextRuns('unchanged'); + const diffs = getInlineDiff(oldRuns, buildTextRuns('unchanged'), oldRuns.length); + + expect(diffs).toEqual([]); + }); + + it('detects text insertions and maps them to resolver positions', () => { + const startOffset = 10; + const oldRuns = buildTextRuns('abc', {}, startOffset); + const diffs = getInlineDiff(oldRuns, buildTextRuns('abXc', {}, startOffset), startOffset + oldRuns.length); + + expect(diffs).toEqual([ + { + action: 'added', + kind: 'text', + startPos: 12, + endPos: 12, + text: 'X', + runAttrs: {}, + }, + ]); + }); + + it('detects deletions and additions in the same diff sequence', () => { + const startOffset = 5; + const oldRuns = buildTextRuns('abcd', {}, startOffset); + const diffs = getInlineDiff(oldRuns, buildTextRuns('abXYd', {}, startOffset), startOffset + oldRuns.length); + + expect(diffs).toEqual([ + { + action: 'deleted', + kind: 'text', + startPos: 7, + endPos: 7, + text: 'c', + runAttrs: {}, + }, + { + action: 'added', + kind: 'text', + startPos: 8, + endPos: 8, + text: 'XY', + runAttrs: {}, + }, + ]); + }); + + it('marks attribute-only changes as modifications and surfaces attribute diffs', () => { + const oldRuns = buildTextRuns('a', { bold: true }, 0); + const diffs = getInlineDiff(oldRuns, buildTextRuns('a', { italic: true }), oldRuns.length); + + expect(diffs).toEqual([ + { + action: 'modified', + kind: 'text', + startPos: 0, + endPos: 0, + oldText: 'a', + newText: 'a', + runAttrsDiff: { + added: { italic: true }, + deleted: { bold: true }, + modified: {}, + }, + marksDiff: null, + }, + ]); + }); + + it('merges contiguous attribute edits that share the same diff metadata', () => { + const startOffset = 5; + const oldRuns = buildTextRuns('ab', { bold: true }, startOffset); + const diffs = getInlineDiff( + oldRuns, + buildTextRuns('ab', { bold: false }, startOffset), + startOffset + oldRuns.length, + ); + + expect(diffs).toEqual([ + { + action: 'modified', + kind: 'text', + startPos: 5, + endPos: 6, + oldText: 'ab', + newText: 'ab', + runAttrsDiff: { + added: {}, + deleted: {}, + modified: { + bold: { from: true, to: false }, + }, + }, + marksDiff: null, + }, + ]); + }); + + it('treats mark-only changes as modifications and surfaces marks diffs', () => { + const oldRuns = buildMarkedTextRuns('a', [{ type: 'bold', attrs: { level: 1 } }]); + const newRuns = buildMarkedTextRuns('a', [{ type: 'italic', attrs: {} }]); + + const diffs = getInlineDiff(oldRuns, newRuns, oldRuns.length); + + expect(diffs).toEqual([ + { + action: 'modified', + kind: 'text', + startPos: 0, + endPos: 0, + oldText: 'a', + newText: 'a', + runAttrsDiff: null, + marksDiff: { + added: [{ name: 'italic', attrs: {} }], + deleted: [{ name: 'bold', attrs: { level: 1 } }], + modified: [], + }, + }, + ]); + }); + + it('surfaces attribute diffs for inline node modifications', () => { + const sharedType = { name: 'link' }; + const oldNode = buildInlineNodeToken({ href: 'https://old.example', label: 'Example' }, sharedType, 3); + const newNode = buildInlineNodeToken({ href: 'https://new.example', label: 'Example' }, sharedType, 3); + + const diffs = getInlineDiff([oldNode], [newNode], 4); + + expect(diffs).toEqual([ + { + action: 'modified', + kind: 'inlineNode', + nodeType: 'link', + startPos: 3, + endPos: 3, + oldNodeJSON: oldNode.nodeJSON, + newNodeJSON: newNode.nodeJSON, + attrsDiff: { + added: {}, + deleted: {}, + modified: { + href: { + from: 'https://old.example', + to: 'https://new.example', + }, + }, + }, + }, + ]); + }); +}); + +describe('tokenizeInlineContent', () => { + it('handles basic text nodes', () => { + const mockParagraph = createInlineContainer([{ text: 'Hello', start: 1, attrs: { bold: true } }], 6); + + const tokens = tokenizeInlineContent(mockParagraph, 0); + expect(stripTokenOffsets(tokens)).toEqual(buildTextTokens('Hello', { bold: true }, [])); + expect(tokens[0]?.offset).toBe(1); + expect(tokens[4]?.offset).toBe(5); + }); + + it('handles leaf nodes with leafText', () => { + const mockParagraph = createInlineContainer([{ leafText: () => 'Leaf', start: 1, attrs: { type: 'leaf' } }], 5); + + const tokens = tokenizeInlineContent(mockParagraph, 0); + expect(stripTokenOffsets(tokens)).toEqual(buildTextTokens('Leaf', { type: 'leaf' }, [])); + expect(tokens[0]?.offset).toBe(1); + expect(tokens[3]?.offset).toBe(4); + }); + + it('handles mixed content', () => { + const mockParagraph = createInlineContainer([ + { text: 'Hello', start: 1, attrs: { bold: true } }, + { leafText: () => 'Leaf', start: 6, attrs: { italic: true } }, + ]); + + const tokens = tokenizeInlineContent(mockParagraph, 0); + expect(stripTokenOffsets(tokens)).toEqual([ + ...buildTextTokens('Hello', { bold: true }, []), + ...buildTextTokens('Leaf', { italic: true }, []), + ]); + expect(tokens[0]?.offset).toBe(1); + expect(tokens[5]?.offset).toBe(6); + expect(tokens[tokens.length - 1]?.offset).toBe(9); + }); + + it('handles empty content', () => { + const mockParagraph = createInlineContainer([], 0); + + const tokens = tokenizeInlineContent(mockParagraph, 0); + expect(tokens).toEqual([]); + }); + + it('includes inline nodes that have no textual content', () => { + const inlineAttrs = { kind: 'tab', width: 120 }; + const mockParagraph = createInlineContainer([ + { inlineNode: { typeName: 'tab', attrs: inlineAttrs }, start: 1 }, + { text: 'Text', start: 2, attrs: { bold: false } }, + ]); + + const tokens = tokenizeInlineContent(mockParagraph, 0); + expect(tokens[0]).toMatchObject({ + kind: 'inlineNode', + nodeType: 'tab', + nodeJSON: { + type: 'tab', + attrs: inlineAttrs, + }, + pos: 1, + }); + expect(stripTokenOffsets(tokens.slice(1))).toEqual(buildTextTokens('Text', { bold: false }, [])); + expect(tokens[1]?.offset).toBe(2); + }); + + it('captures marks from text nodes', () => { + const boldMark = { toJSON: () => ({ type: 'bold', attrs: { level: 2 } }) }; + const mockParagraph = createInlineContainer([{ text: 'Hi', start: 1, marks: [boldMark] }], 3); + + const tokens = tokenizeInlineContent(mockParagraph, 0); + expect(tokens[0]?.marks).toEqual([{ type: 'bold', attrs: { level: 2 } }]); + expect(tokens[1]?.marks).toEqual([{ type: 'bold', attrs: { level: 2 } }]); + }); + + it('applies the base offset to token positions', () => { + const mockParagraph = createInlineContainer([{ text: 'Nested', start: 1 }], 7); + + const tokens = tokenizeInlineContent(mockParagraph, 10); + expect(stripTokenOffsets(tokens)).toEqual(buildTextTokens('Nested', {}, [])); + expect(tokens[0]?.offset).toBe(11); + expect(tokens[5]?.offset).toBe(16); + }); +}); diff --git a/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.ts b/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.ts new file mode 100644 index 000000000..d540ce8e7 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.ts @@ -0,0 +1,533 @@ +import type { Node as PMNode } from 'prosemirror-model'; +import { getAttributesDiff, getMarksDiff, type AttributesDiff, type MarksDiff } from './attributes-diffing.ts'; +import { diffSequences } from './sequence-diffing.ts'; + +type NodeJSON = ReturnType; +type MarkJSON = { type: string; attrs?: Record }; + +/** + * Supported diff operations for inline changes. + */ +type InlineAction = 'added' | 'deleted' | 'modified'; + +/** + * Serialized representation of a single text character plus its run attributes. + */ +export type InlineTextToken = { + kind: 'text'; + char: string; + runAttrs: Record; + marks: MarkJSON[]; + offset?: number | null; +}; + +/** + * Flattened inline node token treated as a single diff unit. + */ +export type InlineNodeToken = { + kind: 'inlineNode'; + node: PMNode; + nodeType?: string; + toJSON?: () => unknown; + nodeJSON?: NodeJSON; + pos?: number | null; +}; + +/** + * Union of inline token kinds used as input for Myers diffing. + */ +export type InlineDiffToken = InlineTextToken | InlineNodeToken; + +/** + * Intermediate text diff emitted by `diffSequences`. + */ +type RawTextDiff = + | { + action: Exclude; + idx: number; + kind: 'text'; + text: string; + runAttrs: Record; + marks: MarkJSON[]; + } + | { + action: 'modified'; + idx: number; + kind: 'text'; + newText: string; + oldText: string; + oldAttrs: Record; + newAttrs: Record; + oldMarks: MarkJSON[]; + newMarks: MarkJSON[]; + }; + +/** + * Intermediate inline node diff emitted by `diffSequences`. + */ +type RawInlineNodeDiff = + | { + action: Exclude; + idx: number; + kind: 'inlineNode'; + nodeJSON: NodeJSON; + nodeType?: string; + } + | { + action: 'modified'; + idx: number; + kind: 'inlineNode'; + nodeType?: string; + oldNodeJSON: NodeJSON; + newNodeJSON: NodeJSON; + attrsDiff: AttributesDiff | null; + }; + +/** + * Combined raw diff union for text and inline node tokens. + */ +type RawDiff = RawTextDiff | RawInlineNodeDiff; + +/** + * Final grouped inline diff exposed to downstream consumers. + */ +export interface InlineDiffResult { + /** Change type for this inline segment. */ + action: InlineAction; + /** Token kind associated with the diff. */ + kind: 'text' | 'inlineNode'; + /** Start position in the old document (or null when unknown). */ + startPos: number | null; + /** End position in the old document (or null when unknown). */ + endPos: number | null; + /** Inserted text for additions. */ + text?: string; + /** Removed text for deletions/modifications. */ + oldText?: string; + /** Inserted text for modifications. */ + newText?: string; + /** Run attributes for added/deleted text. */ + runAttrs?: Record; + /** Attribute diff for modified runs. */ + runAttrsDiff?: AttributesDiff | null; + /** Marks applied to added/deleted text. */ + marks?: Record[]; + /** Mark diff for modified text. */ + marksDiff?: MarksDiff | null; + /** Inline node type name for node diffs. */ + nodeType?: string; + /** Serialized inline node payload for additions/deletions. */ + nodeJSON?: NodeJSON; + /** Serialized inline node payload before the change. */ + oldNodeJSON?: NodeJSON; + /** Serialized inline node payload after the change. */ + newNodeJSON?: NodeJSON; + /** Attribute diff for modified inline nodes. */ + attrsDiff?: AttributesDiff | null; +} + +/** + * Tokenizes inline content into diffable text and inline-node tokens. + * + * @param pmNode ProseMirror node containing inline content. + * @param baseOffset Offset applied to every token position (default: 0). + * @returns Flattened inline tokens with offsets relative to the base offset. + */ +export function tokenizeInlineContent(pmNode: PMNode, baseOffset = 0): InlineDiffToken[] { + const content: InlineDiffToken[] = []; + pmNode.nodesBetween( + 0, + pmNode.content.size, + (node, pos) => { + let nodeText = ''; + + if (node.isText) { + nodeText = node.text ?? ''; + } else if (node.isLeaf) { + const leafTextFn = (node.type.spec as { leafText?: (node: PMNode) => string } | undefined)?.leafText; + if (leafTextFn) { + nodeText = leafTextFn(node); + } + } + + if (nodeText) { + const runNode = pos > 0 ? pmNode.nodeAt(pos - 1) : null; + const runAttrs = runNode?.attrs ?? {}; + const tokenOffset = baseOffset + pos; + for (let i = 0; i < nodeText.length; i += 1) { + content.push({ + kind: 'text', + char: nodeText[i] ?? '', + runAttrs, + offset: tokenOffset + i, + marks: node.marks?.map((mark) => mark.toJSON()) ?? [], + }); + } + return; + } + + if (node.type.name !== 'run' && node.isInline) { + content.push({ + kind: 'inlineNode', + node, + nodeType: node.type.name, + nodeJSON: node.toJSON(), + pos: baseOffset + pos, + }); + } + }, + 0, + ); + return content; +} + +/** + * Computes text-level additions and deletions between two sequences using the generic sequence diff, mapping back to document positions. + * + * @param oldContent Source tokens enriched with document offsets. + * @param newContent Target tokens. + * @param oldParagraphEndPos Absolute document position at the end of the old paragraph (used for trailing inserts). + * @returns List of grouped inline diffs with document positions and text content. + */ +export function getInlineDiff( + oldContent: InlineDiffToken[], + newContent: InlineDiffToken[], + oldParagraphEndPos: number, +): InlineDiffResult[] { + const buildInlineDiff = ( + action: Exclude, + token: InlineDiffToken, + oldIdx: number, + ): RawDiff => { + if (token.kind !== 'text') { + return { + action, + idx: oldIdx, + kind: 'inlineNode', + nodeJSON: token.nodeJSON ?? token.node.toJSON(), + nodeType: token.nodeType, + }; + } + return { + action, + idx: oldIdx, + kind: 'text', + text: token.char, + runAttrs: token.runAttrs, + marks: token.marks, + }; + }; + + const diffs = diffSequences(oldContent, newContent, { + comparator: inlineComparator, + shouldProcessEqualAsModification, + canTreatAsModification: (oldToken, newToken) => + oldToken.kind === newToken.kind && oldToken.kind !== 'text' && oldToken.node.type === newToken.node.type, + buildAdded: (token, oldIdx) => buildInlineDiff('added', token, oldIdx), + buildDeleted: (token, oldIdx) => buildInlineDiff('deleted', token, oldIdx), + buildModified: (oldToken, newToken, oldIdx) => { + if (oldToken.kind !== 'text' && newToken.kind !== 'text') { + const attrsDiff = getAttributesDiff(oldToken.node.attrs, newToken.node.attrs); + return { + action: 'modified', + idx: oldIdx, + kind: 'inlineNode', + oldNodeJSON: oldToken.node.toJSON(), + newNodeJSON: newToken.node.toJSON(), + nodeType: oldToken.nodeType, + attrsDiff, + }; + } + if (oldToken.kind === 'text' && newToken.kind === 'text') { + return { + action: 'modified', + idx: oldIdx, + kind: 'text', + newText: newToken.char, + oldText: oldToken.char, + oldAttrs: oldToken.runAttrs, + newAttrs: newToken.runAttrs, + oldMarks: oldToken.marks, + newMarks: newToken.marks, + }; + } + return null; + }, + }); + + return groupDiffs(diffs, oldContent, oldParagraphEndPos); +} + +/** + * Compares two inline tokens to decide if they can be considered equal for the Myers diff. + * Text tokens compare character equality while inline nodes compare their type. + */ +function inlineComparator(a: InlineDiffToken, b: InlineDiffToken): boolean { + if (a.kind !== b.kind) { + return false; + } + + if (a.kind === 'text' && b.kind === 'text') { + return a.char === b.char; + } + if (a.kind === 'inlineNode' && b.kind === 'inlineNode') { + return a.node.type === b.node.type; + } + return false; +} + +/** + * Determines whether equal tokens should still be treated as modifications, either because run attributes changed or the node payload differs. + */ +function shouldProcessEqualAsModification(oldToken: InlineDiffToken, newToken: InlineDiffToken): boolean { + if (oldToken.kind === 'text' && newToken.kind === 'text') { + return ( + Boolean(getAttributesDiff(oldToken.runAttrs, newToken.runAttrs)) || + oldToken.marks?.length !== newToken.marks?.length || + Boolean(getMarksDiff(oldToken.marks, newToken.marks)) + ); + } + + if (oldToken.kind === 'inlineNode' && newToken.kind === 'inlineNode') { + const oldJSON = oldToken.node.toJSON(); + const newJSON = newToken.node.toJSON(); + return JSON.stringify(oldJSON) !== JSON.stringify(newJSON); + } + + return false; +} + +/** + * Accumulator structure used while coalescing contiguous text diffs. + */ +type TextDiffGroup = + | { + action: Exclude; + kind: 'text'; + startPos: number | null; + endPos: number | null; + text: string; + runAttrs: Record; + marks: MarkJSON[]; + } + | { + action: 'modified'; + kind: 'text'; + startPos: number | null; + endPos: number | null; + newText: string; + oldText: string; + oldAttrs: Record; + newAttrs: Record; + oldMarks: MarkJSON[]; + newMarks: MarkJSON[]; + }; + +/** + * Groups raw diff operations into contiguous ranges. + * + * @param diffs Raw diff operations from the sequence diff. + * @param oldTokens Flattened tokens from the old paragraph, used to derive document positions. + * @param oldParagraphEndPos Absolute document position marking the paragraph boundary. + * @returns Grouped inline diffs with start/end document positions. + */ +function groupDiffs(diffs: RawDiff[], oldTokens: InlineDiffToken[], oldParagraphEndPos: number): InlineDiffResult[] { + const grouped: InlineDiffResult[] = []; + let currentGroup: TextDiffGroup | null = null; + + const pushCurrentGroup = () => { + if (!currentGroup) { + return; + } + const result: InlineDiffResult = { + action: currentGroup.action, + kind: 'text', + startPos: currentGroup.startPos, + endPos: currentGroup.endPos, + }; + + if (currentGroup.action === 'modified') { + result.oldText = currentGroup.oldText; + result.newText = currentGroup.newText; + result.runAttrsDiff = getAttributesDiff(currentGroup.oldAttrs, currentGroup.newAttrs); + result.marksDiff = getMarksDiff(currentGroup.oldMarks, currentGroup.newMarks); + } else { + result.text = currentGroup.text; + result.runAttrs = currentGroup.runAttrs; + result.marks = currentGroup.marks; + } + + grouped.push(result); + currentGroup = null; + }; + + for (const diff of diffs) { + if (diff.kind !== 'text') { + pushCurrentGroup(); + grouped.push({ + action: diff.action, + kind: 'inlineNode', + startPos: resolveTokenPosition(oldTokens, diff.idx, oldParagraphEndPos), + endPos: resolveTokenPosition(oldTokens, diff.idx, oldParagraphEndPos), + nodeType: diff.nodeType, + ...(diff.action === 'modified' + ? { + oldNodeJSON: diff.oldNodeJSON, + newNodeJSON: diff.newNodeJSON, + attrsDiff: diff.attrsDiff ?? null, + } + : { nodeJSON: diff.nodeJSON }), + }); + continue; + } + + if (!currentGroup || !canExtendGroup(currentGroup, diff, oldTokens, oldParagraphEndPos)) { + pushCurrentGroup(); + currentGroup = createTextGroup(diff, oldTokens, oldParagraphEndPos); + } else { + extendTextGroup(currentGroup, diff, oldTokens, oldParagraphEndPos); + } + } + + pushCurrentGroup(); + return grouped; +} + +/** + * Builds a fresh text diff group seeded with the current diff token. + */ +function createTextGroup(diff: RawTextDiff, oldTokens: InlineDiffToken[], oldParagraphEndPos: number): TextDiffGroup { + const baseGroup = + diff.action === 'modified' + ? { + action: diff.action, + kind: 'text' as const, + startPos: resolveTokenPosition(oldTokens, diff.idx, oldParagraphEndPos), + endPos: resolveTokenPosition(oldTokens, diff.idx, oldParagraphEndPos), + newText: diff.newText, + oldText: diff.oldText, + oldAttrs: diff.oldAttrs, + newAttrs: diff.newAttrs, + oldMarks: diff.oldMarks, + newMarks: diff.newMarks, + } + : { + action: diff.action, + kind: 'text' as const, + startPos: resolveTokenPosition(oldTokens, diff.idx, oldParagraphEndPos), + endPos: resolveTokenPosition(oldTokens, diff.idx, oldParagraphEndPos), + text: diff.text, + runAttrs: diff.runAttrs, + marks: diff.marks, + }; + + return baseGroup; +} + +/** + * Expands the current text group with the incoming diff token. + * Keeps start/end positions updated while concatenating text payloads. + */ +function extendTextGroup( + group: TextDiffGroup, + diff: RawTextDiff, + oldTokens: InlineDiffToken[], + oldParagraphEndPos: number, +): void { + group.endPos = resolveTokenPosition(oldTokens, diff.idx, oldParagraphEndPos); + if (group.action === 'modified' && diff.action === 'modified') { + group.newText += diff.newText; + group.oldText += diff.oldText; + } else if (group.action !== 'modified' && diff.action !== 'modified') { + group.text += diff.text; + } +} + +/** + * Determines whether a text diff token can be merged into the current group. + * Checks action, attributes, and adjacency constraints required by the grouping heuristic. + */ +function canExtendGroup( + group: TextDiffGroup, + diff: RawTextDiff, + oldTokens: InlineDiffToken[], + oldParagraphEndPos: number, +): boolean { + if (group.action !== diff.action) { + return false; + } + + if (group.action === 'modified' && diff.action === 'modified') { + if (!areInlineAttrsEqual(group.oldAttrs, diff.oldAttrs) || !areInlineAttrsEqual(group.newAttrs, diff.newAttrs)) { + return false; + } + if (!areInlineMarksEqual(group.oldMarks, diff.oldMarks) || !areInlineMarksEqual(group.newMarks, diff.newMarks)) { + return false; + } + } else if (group.action !== 'modified' && diff.action !== 'modified') { + if (!areInlineAttrsEqual(group.runAttrs, diff.runAttrs)) { + return false; + } + if (!areInlineMarksEqual(group.marks, diff.marks)) { + return false; + } + } else { + return false; + } + + const diffPos = resolveTokenPosition(oldTokens, diff.idx, oldParagraphEndPos); + if (group.action === 'added') { + return group.startPos === diffPos; + } + if (diffPos == null || group.endPos == null) { + return false; + } + return group.endPos + 1 === diffPos; +} + +/** + * Maps a raw diff index back to an absolute document position using the original token offsets. + * + * @param tokens Flattened tokens from the old paragraph. + * @param idx Index provided by the Myers diff output. + * @param paragraphEndPos Absolute document position marking the paragraph boundary; used when idx equals the token length. + * @returns Document position or null when the index is outside the known ranges. + */ +function resolveTokenPosition(tokens: InlineDiffToken[], idx: number, paragraphEndPos: number): number | null { + if (idx < 0) { + return null; + } + const token = tokens[idx]; + if (token) { + if (token.kind === 'text') { + return token.offset ?? null; + } + return token.pos ?? null; + } + if (idx === tokens.length) { + return paragraphEndPos; + } + return null; +} + +/** + * Compares two sets of inline attributes and determines if they are equal. + * + * @param a - The first set of attributes to compare. + * @param b - The second set of attributes to compare. + * @returns `true` if the attributes are equal, `false` otherwise. + */ +function areInlineAttrsEqual(a: Record | undefined, b: Record | undefined): boolean { + return !getAttributesDiff(a ?? {}, b ?? {}); +} + +/** + * Compares two sets of inline marks and determines if they are equal. + * + * @param a - The first set of marks to compare. + * @param b - The second set of marks to compare. + * @returns `true` if the marks are equal, `false` otherwise. + */ +function areInlineMarksEqual(a: MarkJSON[] | undefined, b: MarkJSON[] | undefined): boolean { + return !getMarksDiff(a ?? [], b ?? []); +} diff --git a/packages/super-editor/src/extensions/diffing/algorithm/myers-diff.ts b/packages/super-editor/src/extensions/diffing/algorithm/myers-diff.ts new file mode 100644 index 000000000..b90e709c7 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/myers-diff.ts @@ -0,0 +1,133 @@ +/** + * A primitive Myers diff operation describing equality, insertion, or deletion. + */ +export type MyersOperation = 'equal' | 'insert' | 'delete'; + +/** + * Minimal read-only sequence abstraction required by the diff algorithm. + */ +type Sequence = ArrayLike; +/** + * Equality predicate applied while traversing sequences. + */ +type Comparator = (a: T, b: T) => boolean; + +/** + * Computes a Myers diff operation list for arbitrary sequences. + * + * @param oldSeq Original sequence to compare. + * @param newSeq Updated sequence to compare. + * @param isEqual Equality predicate used to determine matching elements. + * @returns Ordered list of diff operations describing how to transform {@link oldSeq} into {@link newSeq}. + */ +export function myersDiff(oldSeq: Sequence, newSeq: Sequence, isEqual: Comparator): MyersOperation[] { + const oldLen = oldSeq.length; + const newLen = newSeq.length; + + if (oldLen === 0 && newLen === 0) { + return []; + } + + // Myers diff bookkeeping: +2 padding keeps diagonal lookups in bounds. + const max = oldLen + newLen; + const size = 2 * max + 3; + const offset = max + 1; + const v = new Array(size).fill(-1); + v[offset + 1] = 0; + + const trace: number[][] = []; + let foundPath = false; + + for (let d = 0; d <= max && !foundPath; d += 1) { + for (let k = -d; k <= d; k += 2) { + const index = offset + k; + let x: number; + + if (k === -d || (k !== d && v[index - 1] < v[index + 1])) { + x = v[index + 1]; + } else { + x = v[index - 1] + 1; + } + + let y = x - k; + while (x < oldLen && y < newLen && isEqual(oldSeq[x], newSeq[y])) { + x += 1; + y += 1; + } + + v[index] = x; + + if (x >= oldLen && y >= newLen) { + foundPath = true; + break; + } + } + trace.push(v.slice()); + } + + return backtrackMyers(trace, oldLen, newLen, offset); +} + +/** + * Reconstructs the shortest edit script by walking the previously recorded V vectors. + * + * @param trace Snapshot of diagonal furthest-reaching points per edit distance. + * @param oldLen Length of the original sequence. + * @param newLen Length of the target sequence. + * @param offset Offset applied to diagonal indexes to keep array lookups positive. + * @returns Concrete step-by-step operations transforming {@link oldLen} chars into {@link newLen} chars. + */ +function backtrackMyers(trace: number[][], oldLen: number, newLen: number, offset: number): MyersOperation[] { + const operations: MyersOperation[] = []; + let x = oldLen; + let y = newLen; + + for (let d = trace.length - 1; d > 0; d -= 1) { + const v = trace[d - 1]; + const k = x - y; + const index = offset + k; + + let prevK: number; + if (k === -d || (k !== d && v[index - 1] < v[index + 1])) { + prevK = k + 1; + } else { + prevK = k - 1; + } + + const prevIndex = offset + prevK; + const prevX = v[prevIndex]; + const prevY = prevX - prevK; + + while (x > prevX && y > prevY) { + x -= 1; + y -= 1; + operations.push('equal'); + } + + if (x === prevX) { + y -= 1; + operations.push('insert'); + } else { + x -= 1; + operations.push('delete'); + } + } + + while (x > 0 && y > 0) { + x -= 1; + y -= 1; + operations.push('equal'); + } + + while (x > 0) { + x -= 1; + operations.push('delete'); + } + + while (y > 0) { + y -= 1; + operations.push('insert'); + } + + return operations.reverse(); +} diff --git a/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.test.js b/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.test.js new file mode 100644 index 000000000..20db466b7 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.test.js @@ -0,0 +1,272 @@ +import { describe, it, expect } from 'vitest'; +import { + shouldProcessEqualAsModification, + paragraphComparator, + buildAddedParagraphDiff, + buildDeletedParagraphDiff, + buildModifiedParagraphDiff, + canTreatAsModification, +} from './paragraph-diffing.ts'; + +/** + * Builds text tokens without offsets for paragraph diff tests. + * + * @param {string} text Text content to tokenize. + * @param {Record} attrs Run attributes to attach. + * @returns {Array>} + */ +const buildRuns = (text, attrs = {}) => text.split('').map((char) => ({ char, runAttrs: attrs, kind: 'text' })); + +/** + * Builds marked text tokens with offsets for paragraph diff tests. + * + * @param {string} text Text content to tokenize. + * @param {Array>} marks Marks to attach. + * @param {Record} attrs Run attributes to attach. + * @param {number} offsetStart Offset base for the first token. + * @returns {Array>} + */ +const buildMarkedRuns = (text, marks, attrs = {}, offsetStart = 0) => + text.split('').map((char, index) => ({ + char, + runAttrs: attrs, + kind: 'text', + marks, + offset: offsetStart + index, + })); + +/** + * Creates a mock paragraph node with default attributes. + * + * @param {Record} overrides Overrides for the mock node. + * @returns {Record} + */ +const createParagraphNode = (overrides = {}) => { + const node = { + type: { name: 'paragraph', ...(overrides.type || {}) }, + attrs: {}, + nodeSize: 5, + ...overrides, + }; + if (typeof node.toJSON !== 'function') { + node.toJSON = () => ({ type: node.type.name, attrs: node.attrs }); + } + return node; +}; + +/** + * Creates a paragraph snapshot stub for diff builder tests. + * + * @param {Record} overrides Overrides for the snapshot. + * @returns {Record} + */ +const createParagraphInfo = (overrides = {}) => { + const fullText = overrides.fullText ?? 'text'; + const paragraphPos = overrides.pos ?? 0; + const baseTokens = + overrides.text ?? + buildRuns(fullText).map((token, index) => ({ + ...token, + offset: paragraphPos + 1 + index, + })); + const textTokens = baseTokens.map((token, index) => { + if (token.kind === 'text' && token.offset == null) { + return { ...token, offset: paragraphPos + 1 + index }; + } + if (token.kind === 'inlineNode' && token.pos == null) { + return { ...token, pos: paragraphPos + 1 + index }; + } + return token; + }); + + return { + node: createParagraphNode(overrides.node), + pos: paragraphPos, + depth: 0, + fullText, + text: textTokens, + endPos: overrides.endPos ?? paragraphPos + 1 + fullText.length, + ...overrides, + }; +}; + +describe('shouldProcessEqualAsModification', () => { + it('returns true when node JSON differs', () => { + const baseNode = { toJSON: () => ({ attrs: { bold: true } }) }; + const modifiedNode = { toJSON: () => ({ attrs: { bold: false } }) }; + + expect(shouldProcessEqualAsModification({ node: baseNode }, { node: modifiedNode })).toBe(true); + }); + + it('returns false when serialized nodes are identical', () => { + const node = { toJSON: () => ({ attrs: { bold: true } }) }; + expect(shouldProcessEqualAsModification({ node }, { node })).toBe(false); + }); +}); + +describe('paragraphComparator', () => { + it('treats paragraphs with the same paraId as equal', () => { + const makeInfo = (id) => ({ node: { attrs: { paraId: id } } }); + expect(paragraphComparator(makeInfo('123'), makeInfo('123'))).toBe(true); + }); + + it('falls back to comparing fullText when ids differ', () => { + const makeInfo = (text) => ({ node: { attrs: {} }, fullText: text }); + expect(paragraphComparator(makeInfo('same text'), makeInfo('same text'))).toBe(true); + }); + + it('returns false for paragraphs with different identity signals', () => { + expect(paragraphComparator({ fullText: 'one' }, { fullText: 'two' })).toBe(false); + }); +}); + +describe('paragraph diff builders', () => { + it('builds added paragraph payloads with consistent metadata', () => { + const paragraph = createParagraphInfo({ + node: createParagraphNode({ type: { name: 'paragraph' } }), + fullText: 'Hello', + }); + const previousNode = { pos: 10, depth: 0, node: { nodeSize: 4 } }; + + expect(buildAddedParagraphDiff(paragraph, previousNode)).toEqual({ + action: 'added', + nodeType: 'paragraph', + nodeJSON: paragraph.node.toJSON(), + text: 'Hello', + pos: 14, + }); + }); + + it('builds deletion payloads reflecting the original paragraph context', () => { + const paragraph = createParagraphInfo({ pos: 7, fullText: 'Old text' }); + + expect(buildDeletedParagraphDiff(paragraph)).toEqual({ + action: 'deleted', + nodeType: 'paragraph', + nodeJSON: paragraph.node.toJSON(), + oldText: 'Old text', + pos: 7, + }); + }); + + it('returns a diff with inline changes when content differs', () => { + const oldParagraph = createParagraphInfo({ + pos: 5, + fullText: 'foo', + text: buildRuns('foo'), + node: createParagraphNode({ attrs: { align: 'left' } }), + }); + const newParagraph = createParagraphInfo({ + pos: 5, + fullText: 'bar', + text: buildRuns('bar'), + node: createParagraphNode({ attrs: { align: 'left' } }), + }); + + const diff = buildModifiedParagraphDiff(oldParagraph, newParagraph); + expect(diff).not.toBeNull(); + expect(diff).toMatchObject({ + action: 'modified', + nodeType: 'paragraph', + oldNodeJSON: oldParagraph.node.toJSON(), + newNodeJSON: newParagraph.node.toJSON(), + oldText: 'foo', + newText: 'bar', + pos: 5, + attrsDiff: null, + }); + expect(diff.contentDiff.length).toBeGreaterThan(0); + }); + + it('returns a diff when only inline marks change', () => { + const oldParagraph = createParagraphInfo({ + fullText: 'a', + text: buildMarkedRuns('a', [{ type: 'bold', attrs: { level: 1 } }], {}, 1), + node: createParagraphNode({ attrs: { align: 'left' } }), + }); + const newParagraph = createParagraphInfo({ + fullText: 'a', + text: buildMarkedRuns('a', [{ type: 'bold', attrs: { level: 2 } }], {}, 1), + node: createParagraphNode({ attrs: { align: 'left' } }), + }); + + const diff = buildModifiedParagraphDiff(oldParagraph, newParagraph); + expect(diff).not.toBeNull(); + expect(diff?.attrsDiff).toBeNull(); + expect(diff?.contentDiff).toEqual([ + { + action: 'modified', + kind: 'text', + startPos: 1, + endPos: 1, + oldText: 'a', + newText: 'a', + runAttrsDiff: null, + marksDiff: { + added: [], + deleted: [], + modified: [ + { + name: 'bold', + oldAttrs: { level: 1 }, + newAttrs: { level: 2 }, + }, + ], + }, + }, + ]); + }); + + it('returns null when neither text nor attributes changed', () => { + const baseParagraph = createParagraphInfo({ + fullText: 'stable', + node: createParagraphNode({ attrs: { align: 'left' } }), + }); + + expect(buildModifiedParagraphDiff(baseParagraph, baseParagraph)).toBeNull(); + }); + + it('returns a diff when only the attributes change', () => { + const oldParagraph = createParagraphInfo({ + node: createParagraphNode({ attrs: { align: 'left' } }), + }); + const newParagraph = createParagraphInfo({ + node: createParagraphNode({ attrs: { align: 'right' } }), + }); + + const diff = buildModifiedParagraphDiff(oldParagraph, newParagraph); + expect(diff).not.toBeNull(); + expect(diff.contentDiff).toEqual([]); + expect(diff.attrsDiff?.modified).toHaveProperty('align'); + expect(diff.oldNodeJSON).toEqual(oldParagraph.node.toJSON()); + expect(diff.newNodeJSON).toEqual(newParagraph.node.toJSON()); + }); +}); + +describe('canTreatAsModification', () => { + it('returns true when paragraph comparator matches by paraId', () => { + const buildInfo = (paraId) => ({ + node: { attrs: { paraId } }, + fullText: 'abc', + }); + expect(canTreatAsModification(buildInfo('id'), buildInfo('id'))).toBe(true); + }); + + it('returns false for short paragraphs lacking identity signals', () => { + const a = { node: { attrs: {} }, fullText: 'abc' }; + const b = { node: { attrs: {} }, fullText: 'xyz' }; + expect(canTreatAsModification(a, b)).toBe(false); + }); + + it('returns true when textual similarity exceeds the threshold', () => { + const a = { node: { attrs: {} }, fullText: 'lorem' }; + const b = { node: { attrs: {} }, fullText: 'loren' }; + expect(canTreatAsModification(a, b)).toBe(true); + }); + + it('returns false when paragraphs are dissimilar', () => { + const a = { node: { attrs: {} }, fullText: 'lorem ipsum' }; + const b = { node: { attrs: {} }, fullText: 'dolor sit' }; + expect(canTreatAsModification(a, b)).toBe(false); + }); +}); diff --git a/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.ts b/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.ts new file mode 100644 index 000000000..20d30b876 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.ts @@ -0,0 +1,215 @@ +import type { Node as PMNode } from 'prosemirror-model'; +import { getInlineDiff, tokenizeInlineContent, type InlineDiffToken, type InlineDiffResult } from './inline-diffing.ts'; +import { getAttributesDiff, type AttributesDiff } from './attributes-diffing.ts'; +import { getInsertionPos } from './diff-utils.ts'; +import { levenshteinDistance } from './similarity.ts'; + +// Heuristics that prevent unrelated paragraphs from being paired as modifications. +const SIMILARITY_THRESHOLD = 0.65; +const MIN_LENGTH_FOR_SIMILARITY = 4; + +type NodeJSON = ReturnType; + +export interface ParagraphNodeInfo { + /** ProseMirror paragraph node reference. */ + node: PMNode; + /** Absolute position of the paragraph in the document. */ + pos: number; + /** Depth of the paragraph within the document tree. */ + depth: number; + /** Flattened inline tokens for inline diffing. */ + text: InlineDiffToken[]; + /** Absolute end position used for trailing inserts. */ + endPos: number; + /** Plain-text representation of the paragraph content. */ + fullText: string; +} + +/** + * Base shape shared by every paragraph diff payload. + */ +interface ParagraphDiffBase { + /** Change type for this paragraph. */ + action: Action; + /** Node type name (always `paragraph`). */ + nodeType: string; + /** Anchor position in the old document for replaying diffs. */ + pos: number; +} + +/** + * Diff payload produced when a paragraph is inserted. + */ +type AddedParagraphDiff = ParagraphDiffBase<'added'> & { + /** Serialized paragraph payload inserted into the document. */ + nodeJSON: NodeJSON; + /** Plain-text content of the inserted paragraph. */ + text: string; +}; + +/** + * Diff payload produced when a paragraph is deleted. + */ +type DeletedParagraphDiff = ParagraphDiffBase<'deleted'> & { + /** Serialized paragraph payload removed from the document. */ + nodeJSON: NodeJSON; + /** Plain-text content of the removed paragraph. */ + oldText: string; +}; + +/** + * Diff payload emitted when a paragraph changes, including inline edits. + */ +type ModifiedParagraphDiff = ParagraphDiffBase<'modified'> & { + /** Serialized paragraph payload before the change. */ + oldNodeJSON: NodeJSON; + /** Serialized paragraph payload after the change. */ + newNodeJSON: NodeJSON; + /** Plain-text content before the change. */ + oldText: string; + /** Plain-text content after the change. */ + newText: string; + /** Inline diff operations within the paragraph. */ + contentDiff: InlineDiffResult[]; + /** Attribute-level diff for the paragraph. */ + attrsDiff: AttributesDiff | null; +}; + +/** + * Union of every diff variant the paragraph diffing logic can produce. + */ +export type ParagraphDiff = AddedParagraphDiff | DeletedParagraphDiff | ModifiedParagraphDiff; + +/** + * Creates a reusable snapshot that stores flattened paragraph content plus position metadata. + * + * @param paragraph Paragraph node to flatten. + * @param paragraphPos Position of the paragraph in the document. + * @param depth Depth of the paragraph within the document tree. + * @returns Snapshot containing tokens (with offsets) and derived metadata. + */ +export function createParagraphSnapshot(paragraph: PMNode, paragraphPos: number, depth: number): ParagraphNodeInfo { + const text = tokenizeInlineContent(paragraph, paragraphPos + 1); + return { + node: paragraph, + pos: paragraphPos, + depth, + text, + endPos: paragraphPos + 1 + paragraph.content.size, + fullText: text.map((token) => (token.kind === 'text' ? token.char : '')).join(''), + }; +} + +/** + * Determines whether equal paragraph nodes should still be marked as modified because their serialized structure differs. + * + * @param oldParagraph Previous paragraph node reference. + * @param newParagraph Updated paragraph node reference. + * @returns True when the serialized JSON payload differs. + */ +export function shouldProcessEqualAsModification( + oldParagraph: ParagraphNodeInfo, + newParagraph: ParagraphNodeInfo, +): boolean { + return JSON.stringify(oldParagraph.node.toJSON()) !== JSON.stringify(newParagraph.node.toJSON()); +} + +/** + * Compares two paragraphs for identity based on paraId or text content. + */ +export function paragraphComparator(oldParagraph: ParagraphNodeInfo, newParagraph: ParagraphNodeInfo): boolean { + const oldId = oldParagraph?.node?.attrs?.paraId; + const newId = newParagraph?.node?.attrs?.paraId; + if (oldId && newId && oldId === newId) { + return true; + } + return oldParagraph?.fullText === newParagraph?.fullText; +} + +/** + * Builds a normalized payload describing a paragraph addition, ensuring all consumers receive the same metadata shape. + */ +export function buildAddedParagraphDiff( + paragraph: ParagraphNodeInfo, + previousOldNodeInfo?: Pick, +): AddedParagraphDiff { + return { + action: 'added', + nodeType: paragraph.node.type.name, + nodeJSON: paragraph.node.toJSON(), + text: paragraph.fullText, + pos: getInsertionPos(paragraph.depth, previousOldNodeInfo), + }; +} + +/** + * Builds a normalized payload describing a paragraph deletion so diff consumers can show removals with all context. + */ +export function buildDeletedParagraphDiff(paragraph: ParagraphNodeInfo): DeletedParagraphDiff { + return { + action: 'deleted', + nodeType: paragraph.node.type.name, + nodeJSON: paragraph.node.toJSON(), + oldText: paragraph.fullText, + pos: paragraph.pos, + }; +} + +/** + * Builds the payload for a paragraph modification, including text-level diffs, so renderers can highlight edits inline. + */ +export function buildModifiedParagraphDiff( + oldParagraph: ParagraphNodeInfo, + newParagraph: ParagraphNodeInfo, +): ModifiedParagraphDiff | null { + const contentDiff = getInlineDiff(oldParagraph.text, newParagraph.text, oldParagraph.endPos); + + const attrsDiff = getAttributesDiff(oldParagraph.node.attrs, newParagraph.node.attrs); + if (contentDiff.length === 0 && !attrsDiff) { + return null; + } + + return { + action: 'modified', + nodeType: oldParagraph.node.type.name, + oldNodeJSON: oldParagraph.node.toJSON(), + newNodeJSON: newParagraph.node.toJSON(), + oldText: oldParagraph.fullText, + newText: newParagraph.fullText, + pos: oldParagraph.pos, + contentDiff, + attrsDiff, + }; +} + +/** + * Decides whether a delete/insert pair should be reinterpreted as a modification to minimize noisy diff output. + */ +export function canTreatAsModification(oldParagraph: ParagraphNodeInfo, newParagraph: ParagraphNodeInfo): boolean { + if (paragraphComparator(oldParagraph, newParagraph)) { + return true; + } + + const oldText = oldParagraph.fullText; + const newText = newParagraph.fullText; + const maxLength = Math.max(oldText.length, newText.length); + if (maxLength < MIN_LENGTH_FOR_SIMILARITY) { + return false; + } + + const similarity = getTextSimilarityScore(oldText, newText); + return similarity >= SIMILARITY_THRESHOLD; +} + +/** + * Scores the similarity between two text strings so the diff can decide if they represent the same conceptual paragraph. + */ +function getTextSimilarityScore(oldText: string, newText: string): number { + if (!oldText && !newText) { + return 1; + } + + const distance = levenshteinDistance(oldText, newText); + const maxLength = Math.max(oldText.length, newText.length) || 1; + return 1 - distance / maxLength; +} diff --git a/packages/super-editor/src/extensions/diffing/algorithm/sequence-diffing.test.js b/packages/super-editor/src/extensions/diffing/algorithm/sequence-diffing.test.js new file mode 100644 index 000000000..e953cb82a --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/sequence-diffing.test.js @@ -0,0 +1,73 @@ +import { describe, it, expect } from 'vitest'; +import { diffSequences } from './sequence-diffing.ts'; + +const buildAdded = (item) => ({ action: 'added', id: item.id }); +const buildDeleted = (item) => ({ action: 'deleted', id: item.id }); +const buildModified = (oldItem, newItem) => ({ + action: 'modified', + id: oldItem.id ?? newItem.id, + from: oldItem.value, + to: newItem.value, +}); + +describe('diffSequences', () => { + it('detects modifications for equal-aligned items when requested', () => { + const oldSeq = [ + { id: 'a', value: 'Hello' }, + { id: 'b', value: 'World' }, + ]; + const newSeq = [ + { id: 'a', value: 'Hello' }, + { id: 'b', value: 'World!!!' }, + ]; + + const diffs = diffSequences(oldSeq, newSeq, { + comparator: (a, b) => a.id === b.id, + shouldProcessEqualAsModification: (oldItem, newItem) => oldItem.value !== newItem.value, + buildAdded, + buildDeleted, + buildModified, + }); + + expect(diffs).toEqual([{ action: 'modified', id: 'b', from: 'World', to: 'World!!!' }]); + }); + + it('pairs delete/insert operations into modifications when allowed', () => { + const oldSeq = [ + { id: 'a', value: 'Alpha' }, + { id: 'b', value: 'Beta' }, + ]; + const newSeq = [ + { id: 'a', value: 'Alpha' }, + { id: 'c', value: 'Beta v2' }, + ]; + + const diffs = diffSequences(oldSeq, newSeq, { + comparator: (a, b) => a.id === b.id, + canTreatAsModification: (oldItem, newItem) => oldItem.value[0] === newItem.value[0], + shouldProcessEqualAsModification: () => false, + buildAdded, + buildDeleted, + buildModified, + }); + + expect(diffs).toEqual([{ action: 'modified', id: 'b', from: 'Beta', to: 'Beta v2' }]); + }); + + it('emits additions and deletions when items cannot be paired', () => { + const oldSeq = [{ id: 'a', value: 'Foo' }]; + const newSeq = [{ id: 'b', value: 'Bar' }]; + + const diffs = diffSequences(oldSeq, newSeq, { + comparator: (a, b) => a.id === b.id, + buildAdded, + buildDeleted, + buildModified, + }); + + expect(diffs).toEqual([ + { action: 'deleted', id: 'a' }, + { action: 'added', id: 'b' }, + ]); + }); +}); diff --git a/packages/super-editor/src/extensions/diffing/algorithm/sequence-diffing.ts b/packages/super-editor/src/extensions/diffing/algorithm/sequence-diffing.ts new file mode 100644 index 000000000..81e8b2099 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/sequence-diffing.ts @@ -0,0 +1,197 @@ +import { myersDiff, type MyersOperation } from './myers-diff.ts'; + +/** + * Comparator used to determine whether two sequence values are equal. + */ +type Comparator = (a: T, b: T) => boolean; + +/** + * Discrete operation emitted by the Myers diff before higher-level mapping. + */ +type OperationStep = + | { type: 'equal'; oldIdx: number; newIdx: number } + | { type: 'delete'; oldIdx: number; newIdx: number } + | { type: 'insert'; oldIdx: number; newIdx: number }; + +/** + * Hooks and comparators used to translate raw Myers operations into domain-specific diffs. + */ +export interface SequenceDiffOptions { + /** Comparator to determine whether two items are equivalent. */ + comparator?: Comparator; + /** Builder invoked for insertions in the new sequence. */ + buildAdded: (item: T, oldIdx: number, previousOldItem: T | undefined, newIdx: number) => Added | null | undefined; + /** Builder invoked for deletions in the old sequence. */ + buildDeleted: (item: T, oldIdx: number, newIdx: number) => Deleted | null | undefined; + /** Builder invoked for modifications between old and new items. */ + buildModified: (oldItem: T, newItem: T, oldIdx: number, newIdx: number) => Modified | null | undefined; + /** Predicate to emit modifications even when items compare equal. */ + shouldProcessEqualAsModification?: (oldItem: T, newItem: T, oldIdx: number, newIdx: number) => boolean; + /** Predicate to treat delete+insert pairs as a modification. */ + canTreatAsModification?: (deletedItem: T, insertedItem: T, oldIdx: number, newIdx: number) => boolean; + /** Optional reordering hook for Myers operations before mapping. */ + reorderOperations?: (operations: MyersOperation[]) => MyersOperation[]; +} + +/** + * Generic sequence diff helper built on top of Myers algorithm. + * Allows callers to provide custom comparators and payload builders that determine how + * additions, deletions, and modifications should be reported. + * + * @param oldSeq Original sequence to diff from. + * @param newSeq Target sequence to diff against. + * @param options Hook bundle that controls how additions/deletions/modifications are emitted. + * @returns Sequence of mapped diff payloads produced by the caller-provided builders. + */ +export function diffSequences( + oldSeq: T[], + newSeq: T[], + options: SequenceDiffOptions, +): Array { + if (!options) { + throw new Error('diffSequences requires an options object.'); + } + + const comparator: Comparator = options.comparator ?? ((a: T, b: T) => a === b); + const reorder = options.reorderOperations ?? ((ops: MyersOperation[]) => ops); + const canTreatAsModification = options.canTreatAsModification; + const shouldProcessEqualAsModification = options.shouldProcessEqualAsModification; + + if (typeof options.buildAdded !== 'function') { + throw new Error('diffSequences requires a buildAdded option.'); + } + if (typeof options.buildDeleted !== 'function') { + throw new Error('diffSequences requires a buildDeleted option.'); + } + if (typeof options.buildModified !== 'function') { + throw new Error('diffSequences requires a buildModified option.'); + } + + const operations = reorder(myersDiff(oldSeq, newSeq, comparator)); + const steps = buildOperationSteps(operations); + + const diffs: Array = []; + for (let i = 0; i < steps.length; i += 1) { + const step = steps[i]; + + if (step.type === 'equal') { + if (!shouldProcessEqualAsModification) { + continue; + } + const oldItem = oldSeq[step.oldIdx]; + const newItem = newSeq[step.newIdx]; + if (!shouldProcessEqualAsModification(oldItem, newItem, step.oldIdx, step.newIdx)) { + continue; + } + const diff = options.buildModified(oldItem, newItem, step.oldIdx, step.newIdx); + if (diff != null) { + diffs.push(diff); + } + continue; + } + + if (step.type === 'delete') { + const nextStep = steps[i + 1]; + if ( + nextStep?.type === 'insert' && + typeof canTreatAsModification === 'function' && + canTreatAsModification(oldSeq[step.oldIdx], newSeq[nextStep.newIdx], step.oldIdx, nextStep.newIdx) + ) { + const diff = options.buildModified(oldSeq[step.oldIdx], newSeq[nextStep.newIdx], step.oldIdx, nextStep.newIdx); + if (diff != null) { + diffs.push(diff); + } + i += 1; + } else { + const diff = options.buildDeleted(oldSeq[step.oldIdx], step.oldIdx, step.newIdx); + if (diff != null) { + diffs.push(diff); + } + } + continue; + } + + if (step.type === 'insert') { + const diff = options.buildAdded(newSeq[step.newIdx], step.oldIdx, oldSeq[step.oldIdx - 1], step.newIdx); + if (diff != null) { + diffs.push(diff); + } + } + } + + return diffs; +} + +/** + * Translates the raw Myers operations into indexed steps so higher-level logic can reason about positions. + * + * @param operations Myers diff operations produced for the input sequences. + * @returns Indexed steps that reference the original `oldSeq` and `newSeq` positions. + */ +function buildOperationSteps(operations: MyersOperation[]): OperationStep[] { + let oldIdx = 0; + let newIdx = 0; + const steps: OperationStep[] = []; + + for (const op of operations) { + if (op === 'equal') { + steps.push({ type: 'equal', oldIdx, newIdx }); + oldIdx += 1; + newIdx += 1; + } else if (op === 'delete') { + steps.push({ type: 'delete', oldIdx, newIdx }); + oldIdx += 1; + } else if (op === 'insert') { + steps.push({ type: 'insert', oldIdx, newIdx }); + newIdx += 1; + } + } + + return steps; +} + +/** + * Normalizes interleaved delete/insert operations so consumers can treat replacements as paired steps. + * + * @param operations Raw Myers operations. + * @returns Normalized operation sequence with deletes and inserts paired. + */ +export function reorderDiffOperations(operations: MyersOperation[]): MyersOperation[] { + const normalized: MyersOperation[] = []; + + for (let i = 0; i < operations.length; i += 1) { + const op = operations[i]; + if (op !== 'delete') { + normalized.push(op); + continue; + } + + let deleteCount = 0; + while (i < operations.length && operations[i] === 'delete') { + deleteCount += 1; + i += 1; + } + + let insertCount = 0; + let insertCursor = i; + while (insertCursor < operations.length && operations[insertCursor] === 'insert') { + insertCount += 1; + insertCursor += 1; + } + + const pairCount = Math.min(deleteCount, insertCount); + for (let k = 0; k < pairCount; k += 1) { + normalized.push('delete', 'insert'); + } + for (let k = pairCount; k < deleteCount; k += 1) { + normalized.push('delete'); + } + for (let k = pairCount; k < insertCount; k += 1) { + normalized.push('insert'); + } + + i = insertCursor - 1; + } + + return normalized; +} diff --git a/packages/super-editor/src/extensions/diffing/algorithm/similarity.ts b/packages/super-editor/src/extensions/diffing/algorithm/similarity.ts new file mode 100644 index 000000000..60d9604e8 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/algorithm/similarity.ts @@ -0,0 +1,43 @@ +/** + * Computes the Levenshtein edit distance between two strings. + * + * @param a First string. + * @param b Second string. + * @returns Minimum number of edits required to transform {@link a} into {@link b}. + */ +export function levenshteinDistance(a: string, b: string): number { + const lenA = a.length; + const lenB = b.length; + + if (lenA === 0) { + return lenB; + } + if (lenB === 0) { + return lenA; + } + + let previous = new Array(lenB + 1); + let current = new Array(lenB + 1); + + for (let j = 0; j <= lenB; j += 1) { + previous[j] = j; + } + + for (let i = 1; i <= lenA; i += 1) { + current[0] = i; + const charA = a[i - 1]; + + for (let j = 1; j <= lenB; j += 1) { + const charB = b[j - 1]; + const cost = charA === charB ? 0 : 1; + const deletion = previous[j] + 1; + const insertion = current[j - 1] + 1; + const substitution = previous[j - 1] + cost; + current[j] = Math.min(deletion, insertion, substitution); + } + + [previous, current] = [current, previous]; + } + + return previous[lenB]; +} diff --git a/packages/super-editor/src/extensions/diffing/computeDiff.test.js b/packages/super-editor/src/extensions/diffing/computeDiff.test.js new file mode 100644 index 000000000..605fef5ae --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/computeDiff.test.js @@ -0,0 +1,345 @@ +import { describe, it, expect } from 'vitest'; +import { computeDiff } from './computeDiff'; + +import { Editor } from '@core/Editor.js'; +import { getStarterExtensions } from '@extensions/index.js'; +import { getTestDataAsBuffer } from '@tests/export/export-helpers/export-helpers.js'; + +/** + * Loads a DOCX fixture and returns the ProseMirror document and schema. + * + * @param {string} name DOCX fixture filename. + * @returns {Promise<{ doc: import('prosemirror-model').Node; schema: import('prosemirror-model').Schema; comments: Array> }>} + */ +const getDocument = async (name) => { + const buffer = await getTestDataAsBuffer(`diffing/${name}`); + const [docx, media, mediaFiles, fonts] = await Editor.loadXmlData(buffer, true); + + const editor = new Editor({ + isHeadless: true, + extensions: getStarterExtensions(), + documentId: 'test-doc', + content: docx, + mode: 'docx', + media, + mediaFiles, + fonts, + annotations: true, + }); + + return { doc: editor.state.doc, schema: editor.schema, comments: editor.converter.comments }; +}; + +/** + * Flattens a ProseMirror JSON node to its text content. + * + * @param {import('prosemirror-model').Node | import('prosemirror-model').Node['toJSON'] | null | undefined} nodeJSON + * @returns {string} + */ +const getNodeTextContent = (nodeJSON) => { + if (!nodeJSON) { + return ''; + } + if (typeof nodeJSON.text === 'string') { + return nodeJSON.text; + } + if (Array.isArray(nodeJSON.content)) { + return nodeJSON.content.map((child) => getNodeTextContent(child)).join(''); + } + return ''; +}; + +describe('Diff', () => { + it('Compares two documents and identifies added, deleted, and modified paragraphs', async () => { + const { doc: docBefore, schema } = await getDocument('diff_before.docx'); + const { doc: docAfter } = await getDocument('diff_after.docx'); + + const { docDiffs } = computeDiff(docBefore, docAfter, schema); + const diffs = docDiffs; + const getDiff = (action, predicate) => diffs.find((diff) => diff.action === action && predicate(diff)); + + const modifiedDiffs = diffs.filter((diff) => diff.action === 'modified'); + const addedDiffs = diffs.filter((diff) => diff.action === 'added'); + const deletedDiffs = diffs.filter((diff) => diff.action === 'deleted'); + const attrOnlyDiffs = modifiedDiffs.filter((diff) => diff.contentDiff.length === 0); + + expect(diffs).toHaveLength(19); + expect(modifiedDiffs).toHaveLength(9); + expect(addedDiffs).toHaveLength(5); + expect(deletedDiffs).toHaveLength(5); + expect(attrOnlyDiffs).toHaveLength(4); + + // Modified paragraph with multiple text diffs + let diff = getDiff( + 'modified', + (diff) => diff.oldText === 'Curabitur facilisis ligula suscipit enim pretium, sed porttitor augue consequat.', + ); + expect(diff?.newText).toBe( + 'Curabitur facilisis ligula suscipit enim pretium et nunc ligula, porttitor augue consequat maximus.', + ); + const textPropsChanges = diff?.contentDiff.filter((textDiff) => textDiff.action === 'modified'); + expect(textPropsChanges).toHaveLength(18); + expect(diff?.contentDiff).toHaveLength(24); + + // Deleted paragraph + diff = getDiff( + 'deleted', + (diff) => diff.oldText === 'Vestibulum gravida eros sed nulla malesuada, vel eleifend sapien bibendum.', + ); + expect(diff).toBeDefined(); + + // Added paragraph + diff = getDiff( + 'added', + (diff) => + diff.text === 'Lorem tempor velit eget lorem posuere, id luctus dolor ultricies, to track supplier risks.', + ); + expect(diff).toBeDefined(); + + // Another modified paragraph + diff = getDiff( + 'modified', + (diff) => diff.oldText === 'Quisque posuere risus a ligula cursus vulputate et vitae ipsum.', + ); + expect(diff?.newText).toBe( + 'Quisque dapibus risus convallis ligula cursus vulputate, ornare dictum ipsum et vehicula nisl.', + ); + + // Simple modified paragraph + diff = getDiff('modified', (diff) => diff.oldText === 'OK' && diff.newText === 'No'); + expect(diff).toBeDefined(); + + // Added, trimmed, merged, removed, and moved paragraphs + diff = getDiff('added', (diff) => diff.text === 'Sed et nibh in nulla blandit maximus et dapibus.'); + expect(diff).toBeDefined(); + + const trimmedParagraph = getDiff( + 'modified', + (diff) => + diff.oldText === + 'Sed et nibh in nulla blandit maximus et dapibus. Etiam egestas diam luctus sit amet gravida purus.' && + diff.newText === 'Etiam egestas diam luctus sit amet gravida purus.', + ); + expect(trimmedParagraph).toBeDefined(); + + const mergedParagraph = getDiff( + 'added', + (diff) => + diff.text === + 'Praesent dapibus lacus vitae tellus laoreet, eget facilisis mi facilisis, donec mollis lacus sed nisl posuere, nec feugiat massa fringilla.', + ); + expect(mergedParagraph).toBeDefined(); + + const removedParagraph = getDiff( + 'modified', + (diff) => + diff.oldText === 'Praesent dapibus lacus vitae tellus laoreet, eget facilisis mi facilisis.' && + diff.newText === '', + ); + expect(removedParagraph).toBeDefined(); + + const movedParagraph = getDiff( + 'added', + (diff) => diff.text === 'Aenean hendrerit elit vitae sem fermentum, vel sagittis erat gravida.', + ); + expect(movedParagraph).toBeDefined(); + + // Attribute-only paragraph change + const namParagraph = attrOnlyDiffs.find( + (diff) => diff.oldText === 'Nam ultricies velit vitae purus eleifend pellentesque.', + ); + expect(namParagraph?.attrsDiff?.modified).toBeDefined(); + }); + + it('Compare two documents with simple changes', async () => { + const { doc: docBefore, schema } = await getDocument('diff_before2.docx'); + const { doc: docAfter } = await getDocument('diff_after2.docx'); + + const { docDiffs } = computeDiff(docBefore, docAfter, schema); + const diffs = docDiffs; + expect(diffs).toHaveLength(4); + + let diff = diffs.find((diff) => diff.action === 'modified' && diff.oldText === 'Here’s some text.'); + + expect(diff.newText).toBe('Here’s some NEW text.'); + expect(diff.contentDiff).toHaveLength(3); + expect(diff.contentDiff[0].newText).toBe(' '); + expect(diff.contentDiff[1].text).toBe('NEW'); + expect(diff.contentDiff[2].text).toBe(' '); + expect(diff.attrsDiff?.modified?.textId).toBeDefined(); + + diff = diffs.find((diff) => diff.action === 'deleted' && diff.oldText === 'I deleted this sentence.'); + expect(diff).toBeDefined(); + + diff = diffs.find((diff) => diff.action === 'added' && diff.text === 'I added this sentence.'); + expect(diff).toBeDefined(); + + diff = diffs.find((diff) => diff.action === 'modified' && diff.oldText === 'We are not done yet.'); + expect(diff.newText).toBe('We are done now.'); + expect(diff.contentDiff).toHaveLength(3); + expect(diff.attrsDiff?.modified?.textId).toBeDefined(); + }); + + it('Compare another set of two documents with only formatting changes', async () => { + const { doc: docBefore, schema } = await getDocument('diff_before4.docx'); + const { doc: docAfter } = await getDocument('diff_after4.docx'); + + const { docDiffs } = computeDiff(docBefore, docAfter, schema); + const diffs = docDiffs; + + expect(diffs).toHaveLength(1); + const diff = diffs[0]; + expect(diff.action).toBe('modified'); + }); + + it('Compare another set of two documents with only formatting changes', async () => { + const { doc: docBefore, schema } = await getDocument('diff_before5.docx'); + const { doc: docAfter } = await getDocument('diff_after5.docx'); + + const { docDiffs } = computeDiff(docBefore, docAfter, schema); + const diffs = docDiffs; + + expect(diffs).toHaveLength(1); + const diff = diffs[0]; + expect(diff.action).toBe('modified'); + }); + + it('Compare another set of two documents where an image was added', async () => { + const { doc: docBefore, schema } = await getDocument('diff_before6.docx'); + const { doc: docAfter } = await getDocument('diff_after6.docx'); + + const { docDiffs } = computeDiff(docBefore, docAfter, schema); + const diffs = docDiffs; + expect(diffs).toHaveLength(1); + const diff = diffs[0]; + expect(diff.action).toBe('modified'); + expect(diff.contentDiff).toHaveLength(3); + expect(diff.contentDiff[0].action).toBe('modified'); + expect(diff.contentDiff[0].kind).toBe('text'); + expect(diff.contentDiff[1].action).toBe('added'); + expect(diff.contentDiff[1].kind).toBe('inlineNode'); + expect(diff.contentDiff[2].action).toBe('added'); + expect(diff.contentDiff[2].kind).toBe('text'); + }); + + it('Compare a complex document with table edits and tracked formatting', async () => { + const { doc: docBefore, schema } = await getDocument('diff_before7.docx'); + const { doc: docAfter } = await getDocument('diff_after7.docx'); + + const { docDiffs } = computeDiff(docBefore, docAfter, schema); + const diffs = docDiffs; + expect(diffs).toHaveLength(9); + expect(diffs.filter((diff) => diff.action === 'modified')).toHaveLength(6); + expect(diffs.filter((diff) => diff.action === 'added')).toHaveLength(2); + expect(diffs.filter((diff) => diff.action === 'deleted')).toHaveLength(1); + + const formattingDiff = diffs.find( + (diff) => diff.action === 'modified' && diff.oldText === 'This paragraph formatting will change.', + ); + expect(formattingDiff?.contentDiff?.[0]?.runAttrsDiff?.added).toHaveProperty('runProperties.bold', true); + + const upgradedParagraph = diffs.find( + (diff) => diff.action === 'modified' && diff.oldText === 'This paragraph will have words.', + ); + expect(upgradedParagraph?.newText).toBe('This paragraph will have NEW words.'); + expect( + upgradedParagraph?.contentDiff?.some( + (change) => change.action === 'added' && typeof change.text === 'string' && change.text.includes('NEW'), + ), + ).toBe(true); + + const deletion = diffs.find( + (diff) => diff.action === 'deleted' && diff.oldText === 'This paragraph will be deleted.', + ); + expect(deletion).toBeDefined(); + + const wordRemoval = diffs.find( + (diff) => diff.action === 'modified' && diff.oldText === 'This word will be deleted.', + ); + expect(wordRemoval?.newText).toBe('This will be deleted.'); + expect(wordRemoval?.contentDiff).toHaveLength(1); + expect(wordRemoval?.contentDiff?.[0].action).toBe('deleted'); + + const tableModification = diffs.find( + (diff) => diff.action === 'modified' && diff.nodeType === 'table' && diff.oldNodeJSON, + ); + expect(tableModification).toBeUndefined(); + + const tableAddition = diffs.find((diff) => diff.action === 'added' && diff.nodeType === 'table'); + expect(getNodeTextContent(tableAddition?.nodeJSON)?.trim()).toBe('New table'); + + const trailingParagraph = diffs.find( + (diff) => diff.action === 'added' && diff.nodeType === 'paragraph' && diff.text === '', + ); + expect(trailingParagraph).toBeDefined(); + + const thirdHeaderDiff = diffs.find( + (diff) => + diff.action === 'modified' && diff.oldText === 'Third header' && diff.newText === 'Third header modified', + ); + expect( + thirdHeaderDiff?.contentDiff?.some((change) => change.action === 'added' && change.text === ' modified'), + ).toBe(true); + + const firstCellDiff = diffs.find( + (diff) => diff.action === 'modified' && diff.oldText === 'First cell' && diff.newText === 'cell', + ); + expect(firstCellDiff?.contentDiff?.[0]?.text).toBe('First '); + }); + + it('Compare documents with comments and tracked changes', async () => { + const { doc: docBefore, schema, comments: commentsBefore } = await getDocument('diff_before8.docx'); + const { doc: docAfter, comments: commentsAfter } = await getDocument('diff_after8.docx'); + + const { docDiffs, commentDiffs } = computeDiff(docBefore, docAfter, schema, commentsBefore, commentsAfter); + + expect(docDiffs.length).toBeGreaterThan(0); + expect(docDiffs.filter((diff) => diff.action === 'modified')).toHaveLength(2); + expect(commentDiffs).toHaveLength(2); + + const commentAnchorDiff = docDiffs.find( + (diff) => diff.action === 'modified' && diff.oldText === 'Here’s some text. It has a comment.', + ); + expect(commentAnchorDiff).toBeDefined(); + expect(commentAnchorDiff?.contentDiff?.some((change) => change.kind === 'inlineNode')).toBe(true); + expect( + commentAnchorDiff?.contentDiff?.some( + (change) => change.kind === 'inlineNode' && change.nodeType === 'commentRangeStart', + ), + ).toBe(true); + expect( + commentAnchorDiff?.contentDiff?.some( + (change) => change.kind === 'text' && change.marksDiff?.deleted?.some((mark) => mark.name === 'commentMark'), + ), + ).toBe(true); + + const trackedChangeDiff = docDiffs.find( + (diff) => diff.action === 'modified' && diff.oldText === 'I will add a comment to this one too.', + ); + expect(trackedChangeDiff).toBeDefined(); + expect( + trackedChangeDiff?.contentDiff?.some( + (change) => change.kind === 'text' && change.marksDiff?.added?.some((mark) => mark.name === 'commentMark'), + ), + ).toBe(true); + expect( + trackedChangeDiff?.contentDiff?.some( + (change) => change.kind === 'text' && change.marksDiff?.added?.some((mark) => mark.name === 'trackDelete'), + ), + ).toBe(true); + + const modifiedComment = commentDiffs.find( + (diff) => diff.action === 'modified' && diff.nodeType === 'comment' && diff.commentId === '0', + ); + expect(modifiedComment).toBeDefined(); + expect(modifiedComment?.oldText).toBe('Old comment.'); + expect(modifiedComment?.newText).toBe('Old comment.'); + expect(modifiedComment?.attrsDiff?.modified?.isDone).toEqual({ from: false, to: true }); + + const addedComment = commentDiffs.find( + (diff) => diff.action === 'added' && diff.nodeType === 'comment' && diff.commentId === '1', + ); + expect(addedComment).toBeDefined(); + expect(addedComment?.text).toBe('New comment'); + }); +}); diff --git a/packages/super-editor/src/extensions/diffing/computeDiff.ts b/packages/super-editor/src/extensions/diffing/computeDiff.ts new file mode 100644 index 000000000..6b686905d --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/computeDiff.ts @@ -0,0 +1,43 @@ +import type { Node as PMNode, Schema } from 'prosemirror-model'; +import { diffComments, type CommentInput, type CommentDiff } from './algorithm/comment-diffing.ts'; +import { diffNodes, normalizeNodes, type NodeDiff } from './algorithm/generic-diffing.ts'; + +/** + * Result payload for document diffing. + */ +export interface DiffResult { + /** Diffs computed from the ProseMirror document structure. */ + docDiffs: NodeDiff[]; + /** Diffs computed from comment content and metadata. */ + commentDiffs: CommentDiff[]; +} + +/** + * Computes structural diffs between two ProseMirror documents, emitting insert/delete/modify operations for any block + * node (paragraphs, images, tables, etc.). Paragraph mutations include inline text and inline-node diffs so consumers + * can reflect character-level and formatting changes as well. + * + * Diffs are intended to be replayed on top of the old document in reverse order: `pos` marks the cursor location + * that should be used before applying the diff at that index. For example, consecutive additions that sit between the + * same pair of old nodes will share the same `pos`, so applying them from the end of the list guarantees they appear + * in the correct order in the reconstructed document. + * + * @param oldPmDoc The previous ProseMirror document. + * @param newPmDoc The updated ProseMirror document. + * @param schema The schema used to interpret document nodes. + * @param oldComments Comment list from the old document. + * @param newComments Comment list from the new document. + * @returns Object containing document and comment diffs. + */ +export function computeDiff( + oldPmDoc: PMNode, + newPmDoc: PMNode, + schema: Schema, + oldComments: CommentInput[] = [], + newComments: CommentInput[] = [], +): DiffResult { + return { + docDiffs: diffNodes(normalizeNodes(oldPmDoc), normalizeNodes(newPmDoc)), + commentDiffs: diffComments(oldComments, newComments, schema), + }; +} diff --git a/packages/super-editor/src/extensions/diffing/diffing.js b/packages/super-editor/src/extensions/diffing/diffing.js new file mode 100644 index 000000000..9317a7e38 --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/diffing.js @@ -0,0 +1,36 @@ +// @ts-nocheck +import { Extension } from '@core/Extension.js'; +import { computeDiff } from './computeDiff.ts'; + +export const Diffing = Extension.create({ + name: 'documentDiffing', + + addCommands() { + return { + /** + * Compares the current document against `updatedDocument` and returns the diffs required to + * transform the former into the latter. + * + * These diffs are intended to be replayed on-top of the old document, so apply the + * returned list in reverse (last entry first) to keep insertions that share the same + * `pos` anchor in the correct order. + * + * @param {import('prosemirror-model').Node} updatedDocument + * @param {import('./algorithm/comment-diffing.ts').CommentInput[]} [updatedComments] + * @returns {import('./computeDiff.ts').DiffResult} + */ + compareDocuments: + (updatedDocument, updatedComments = []) => + ({ state }) => { + const diffs = computeDiff( + state.doc, + updatedDocument, + state.schema, + this.editor.converter?.comments ?? [], + updatedComments, + ); + return diffs; + }, + }; + }, +}); diff --git a/packages/super-editor/src/extensions/diffing/index.js b/packages/super-editor/src/extensions/diffing/index.js new file mode 100644 index 000000000..0a3aee23b --- /dev/null +++ b/packages/super-editor/src/extensions/diffing/index.js @@ -0,0 +1 @@ +export { Diffing } from './diffing.js'; diff --git a/packages/super-editor/src/extensions/index.js b/packages/super-editor/src/extensions/index.js index 0343bedf4..e5816d9df 100644 --- a/packages/super-editor/src/extensions/index.js +++ b/packages/super-editor/src/extensions/index.js @@ -69,6 +69,7 @@ import { CustomSelection } from './custom-selection/index.js'; // Helpers import { trackChangesHelpers } from './track-changes/index.js'; +import { Diffing } from './diffing/index.js'; const getRichTextExtensions = () => { return [ @@ -235,6 +236,7 @@ export { trackChangesHelpers, getStarterExtensions, getRichTextExtensions, + Diffing, AiMark, AiAnimationMark, AiLoaderNode, diff --git a/packages/super-editor/src/tests/data/diffing/diff_after.docx b/packages/super-editor/src/tests/data/diffing/diff_after.docx new file mode 100644 index 000000000..75c363abb Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_after.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_after2.docx b/packages/super-editor/src/tests/data/diffing/diff_after2.docx new file mode 100644 index 000000000..7f0426d99 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_after2.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_after3.docx b/packages/super-editor/src/tests/data/diffing/diff_after3.docx new file mode 100644 index 000000000..df9a28ae2 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_after3.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_after4.docx b/packages/super-editor/src/tests/data/diffing/diff_after4.docx new file mode 100644 index 000000000..bf426c169 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_after4.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_after5.docx b/packages/super-editor/src/tests/data/diffing/diff_after5.docx new file mode 100644 index 000000000..44e2dcbb8 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_after5.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_after6.docx b/packages/super-editor/src/tests/data/diffing/diff_after6.docx new file mode 100644 index 000000000..9fc16f085 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_after6.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_after7.docx b/packages/super-editor/src/tests/data/diffing/diff_after7.docx new file mode 100644 index 000000000..db208187b Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_after7.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_after8.docx b/packages/super-editor/src/tests/data/diffing/diff_after8.docx new file mode 100644 index 000000000..1202888ef Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_after8.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_before.docx b/packages/super-editor/src/tests/data/diffing/diff_before.docx new file mode 100644 index 000000000..3fec392b9 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_before.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_before2.docx b/packages/super-editor/src/tests/data/diffing/diff_before2.docx new file mode 100644 index 000000000..6db7a2b99 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_before2.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_before3.docx b/packages/super-editor/src/tests/data/diffing/diff_before3.docx new file mode 100644 index 000000000..b094d85c8 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_before3.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_before4.docx b/packages/super-editor/src/tests/data/diffing/diff_before4.docx new file mode 100644 index 000000000..155ce140b Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_before4.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_before5.docx b/packages/super-editor/src/tests/data/diffing/diff_before5.docx new file mode 100644 index 000000000..155ce140b Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_before5.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_before6.docx b/packages/super-editor/src/tests/data/diffing/diff_before6.docx new file mode 100644 index 000000000..34f9c220c Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_before6.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_before7.docx b/packages/super-editor/src/tests/data/diffing/diff_before7.docx new file mode 100644 index 000000000..1d9cfa578 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_before7.docx differ diff --git a/packages/super-editor/src/tests/data/diffing/diff_before8.docx b/packages/super-editor/src/tests/data/diffing/diff_before8.docx new file mode 100644 index 000000000..b4c85db33 Binary files /dev/null and b/packages/super-editor/src/tests/data/diffing/diff_before8.docx differ diff --git a/packages/super-editor/src/tests/export/export-helpers/export-helpers.js b/packages/super-editor/src/tests/export/export-helpers/export-helpers.js index 9ac0aeeb3..82cd13905 100644 --- a/packages/super-editor/src/tests/export/export-helpers/export-helpers.js +++ b/packages/super-editor/src/tests/export/export-helpers/export-helpers.js @@ -37,7 +37,7 @@ export const getTextFromNode = (node) => { * @param {string} name The name of the file in the test data folder * @returns {Promise} The test data as abuffer */ -const getTestDataAsBuffer = async (name) => { +export const getTestDataAsBuffer = async (name) => { try { const basePath = join(__dirname, '../../data', name); return await readFile(basePath);