diff --git a/.gitignore b/.gitignore index 247e36b4..416d4bc1 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ playground/ build/ js/ .vscode +.idea coverage package-lock.json /examples/browserify/bundle.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 06fcded2..26da5504 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Unreleased - Fix garbled text copying in Chrome/Edge for PDFs with >256 unique characters (#1659) +- Fix Link accessibility issues ### [v0.17.2] - 2025-08-30 diff --git a/examples/accessible-links.js b/examples/accessible-links.js new file mode 100644 index 00000000..bc4e2cbd --- /dev/null +++ b/examples/accessible-links.js @@ -0,0 +1,87 @@ +var PDFDocument = require('../'); +var fs = require('fs'); + +// Create a new PDFDocument +var doc = new PDFDocument({ + autoFirstPage: true, + bufferPages: true, + pdfVersion: '1.5', + // @ts-ignore PDF/UA needs to be enforced for PAC accessibility checker + subset: 'PDF/UA', + tagged: true, + displayTitle: true, + lang: 'en-US', + fontSize: 12, +}); + +doc.pipe(fs.createWriteStream('accessible-links.pdf')); + +// Set some meta data +doc.info['Title'] = 'Test Document'; +doc.info['Author'] = 'Devon Govett'; + +// Initialise document logical structure +var struct = doc.struct('Document'); +doc.addStructure(struct); + +// Register a font name for use later +doc.registerFont('Palatino', 'fonts/PalatinoBold.ttf'); + +// Set the font and draw some text +struct.add( + doc.struct('P', () => { + doc + .font('Palatino') + .fontSize(25) + .text('Some text with an embedded font! ', 100, 100); + }), +); + +// Add another page +doc.addPage(); + +// Add some text with annotations +var linkSection = doc.struct('Sect'); +struct.add(linkSection); + +var paragraph = doc.struct('P'); +linkSection.add(paragraph); + +paragraph.add( + doc.struct('Span', () => { + doc + .font('Palatino') + .fillColor('black') + .text('This is some text before ', 100, 100, { + continued: true, + }); + }), +); + +paragraph.add( + doc.struct( + 'Link', + { + alt: 'Here is a link! ', + }, + () => { + doc.fillColor('blue').text('Here is a link!', { + link: 'http://google.com/', + underline: true, + continued: true, + }); + }, + ), +); + +paragraph.add( + doc.struct('Span', () => { + doc.fillColor('black').text(' and this is text after the link.'); + }), +); + +paragraph.end(); +linkSection.end(); + +// End and flush the document +doc.end(); diff --git a/examples/accessible-links.pdf b/examples/accessible-links.pdf new file mode 100644 index 00000000..fc9722a6 Binary files /dev/null and b/examples/accessible-links.pdf differ diff --git a/examples/kitchen-sink-accessible.js b/examples/kitchen-sink-accessible.js index bf0a824e..06b25d8e 100644 --- a/examples/kitchen-sink-accessible.js +++ b/examples/kitchen-sink-accessible.js @@ -7,7 +7,9 @@ var doc = new PDFDocument({ pdfVersion: '1.5', lang: 'en-US', tagged: true, - displayTitle: true + displayTitle: true, + // @ts-ignore PDF/UA needs to be enforced for PAC accessibility checker + subset: 'PDF/UA', }); doc.pipe(fs.createWriteStream('kitchen-sink-accessible.pdf')); diff --git a/examples/kitchen-sink-accessible.pdf b/examples/kitchen-sink-accessible.pdf index 002f9b52..91a2f54f 100644 Binary files a/examples/kitchen-sink-accessible.pdf and b/examples/kitchen-sink-accessible.pdf differ diff --git a/lib/mixins/annotations.js b/lib/mixins/annotations.js index 51a72c57..ad9b7047 100644 --- a/lib/mixins/annotations.js +++ b/lib/mixins/annotations.js @@ -1,3 +1,5 @@ +import PDFAnnotationReference from '../structure_annotation'; + export default { annotate(x, y, w, h, options) { options.Type = 'Annot'; @@ -19,6 +21,9 @@ export default { options.Dest = new String(options.Dest); } + const structParent = options.structParent; + delete options.structParent; + // Capitalize keys for (let key in options) { const val = options[key]; @@ -27,6 +32,12 @@ export default { const ref = this.ref(options); this.page.annotations.push(ref); + + if (structParent && typeof structParent.add === 'function') { + const annotRef = new PDFAnnotationReference(ref); + structParent.add(annotRef); + } + ref.end(); return this; }, @@ -77,6 +88,10 @@ export default { options.A.end(); } + if (options.structParent && !options.Contents) { + options.Contents = new String(''); + } + return this.annotate(x, y, w, h, options); }, diff --git a/lib/mixins/markings.js b/lib/mixins/markings.js index a0592e24..7a1c5f05 100644 --- a/lib/mixins/markings.js +++ b/lib/mixins/markings.js @@ -99,6 +99,13 @@ export default { endMarkedContent() { this.page.markings.pop(); this.addContent('EMC'); + if (this._textOptions) { + delete this._textOptions.link; + delete this._textOptions.goTo; + delete this._textOptions.destination; + delete this._textOptions.underline; + delete this._textOptions.strike; + } return this; }, diff --git a/lib/mixins/text.js b/lib/mixins/text.js index be34348b..82906531 100644 --- a/lib/mixins/text.js +++ b/lib/mixins/text.js @@ -531,7 +531,21 @@ export default { // create link annotations if the link option is given if (options.link != null) { - this.link(x, y, renderedWidth, this.currentLineHeight(), options.link); + const linkOptions = {}; + if ( + this._currentStructureElement && + this._currentStructureElement.dictionary.data.S === 'Link' + ) { + linkOptions.structParent = this._currentStructureElement; + } + this.link( + x, + y, + renderedWidth, + this.currentLineHeight(), + options.link, + linkOptions, + ); } if (options.goTo != null) { this.goTo(x, y, renderedWidth, this.currentLineHeight(), options.goTo); diff --git a/lib/structure_annotation.js b/lib/structure_annotation.js new file mode 100644 index 00000000..fe5ddbfd --- /dev/null +++ b/lib/structure_annotation.js @@ -0,0 +1,7 @@ +class PDFAnnotationReference { + constructor(annotationRef) { + this.annotationRef = annotationRef; + } +} + +export default PDFAnnotationReference; diff --git a/lib/structure_element.js b/lib/structure_element.js index 4f62d7ae..9f9cf19e 100644 --- a/lib/structure_element.js +++ b/lib/structure_element.js @@ -4,6 +4,7 @@ By Ben Schmidt */ import PDFStructureContent from './structure_content'; +import PDFAnnotationReference from './structure_annotation'; class PDFStructureElement { constructor(document, type, options = {}, children = null) { @@ -71,6 +72,10 @@ class PDFStructureElement { this._addContentToParentTree(child); } + if (child instanceof PDFAnnotationReference) { + this._addAnnotationToParentTree(child.annotationRef); + } + if (typeof child === 'function' && this._attached) { // _contentForClosure() adds the content to the parent tree child = this._contentForClosure(child); @@ -90,6 +95,15 @@ class PDFStructureElement { }); } + _addAnnotationToParentTree(annotRef) { + const parentTreeKey = this.document.createStructParentTreeNextKey(); + + annotRef.data.StructParent = parentTreeKey; + + const parentTree = this.document.getStructParentTree(); + parentTree.add(parentTreeKey, this.dictionary); + } + setParent(parentRef) { if (this.dictionary.data.P) { throw new Error(`Structure element added to more than one parent`); @@ -137,13 +151,25 @@ class PDFStructureElement { return ( child instanceof PDFStructureElement || child instanceof PDFStructureContent || + child instanceof PDFAnnotationReference || typeof child === 'function' ); } _contentForClosure(closure) { const content = this.document.markStructureContent(this.dictionary.data.S); + + const prevStructElement = this.document._currentStructureElement; + this.document._currentStructureElement = this; + + const wasEnded = this._ended; + this._ended = false; + closure(); + + this._ended = wasEnded; + + this.document._currentStructureElement = prevStructElement; this.document.endMarkedContent(); this._addContentToParentTree(content); @@ -209,6 +235,16 @@ class PDFStructureElement { } }); } + + if (child instanceof PDFAnnotationReference) { + const pageRef = this.document.page.dictionary; + const objr = { + Type: 'OBJR', + Obj: child.annotationRef, + Pg: pageRef, + }; + this.dictionary.data.K.push(objr); + } } } diff --git a/tests/unit/annotations.spec.js b/tests/unit/annotations.spec.js index 2210f7ca..34f2a97a 100644 --- a/tests/unit/annotations.spec.js +++ b/tests/unit/annotations.spec.js @@ -178,4 +178,41 @@ describe('Annotations', () => { ]); }); }); + + describe('annotations with structure parent', () => { + test('should add structParent to link annotations', () => { + document = new PDFDocument({ + info: { CreationDate: new Date(Date.UTC(2018, 1, 1)) }, + compress: false, + tagged: true, + }); + + const docData = logData(document); + + const linkElement = document.struct('Link'); + document.addStructure(linkElement); + + document.link(100, 100, 100, 20, 'http://example.com', { + structParent: linkElement, + }); + + linkElement.end(); + document.end(); + + const dataStr = docData.join('\n'); + expect(dataStr).toContain('/StructParent 0'); + expect(dataStr).toContain('/Contents ()'); + }); + + test('should work without structParent (backwards compatibility)', () => { + const docData = logData(document); + + document.link(100, 100, 100, 20, 'http://example.com'); + document.end(); + + const dataStr = docData.join('\n'); + expect(dataStr).toContain('/Subtype /Link'); + expect(dataStr).not.toContain('/StructParent'); + }); + }); }); diff --git a/tests/unit/markings.spec.js b/tests/unit/markings.spec.js index 6486c17d..c07fedf0 100644 --- a/tests/unit/markings.spec.js +++ b/tests/unit/markings.spec.js @@ -525,6 +525,23 @@ EMC document.struct('Foo', [1]); }).toThrow(); }); + + test('_currentStructureElement tracking with closures', () => { + const section = document.struct('Sect'); + document.addStructure(section); + + let capturedStructElement = null; + + const paragraph = document.struct('P', () => { + capturedStructElement = document._currentStructureElement; + }); + + section.add(paragraph); + section.end(); + document.end(); + + expect(capturedStructElement).toBe(paragraph); + }); }); describe('accessible document', () => { diff --git a/tests/unit/structure_annotation.spec.js b/tests/unit/structure_annotation.spec.js new file mode 100644 index 00000000..28faa82a --- /dev/null +++ b/tests/unit/structure_annotation.spec.js @@ -0,0 +1,66 @@ +import PDFDocument from '../../lib/document'; +import PDFAnnotationReference from '../../lib/structure_annotation'; +import { logData } from './helpers'; + +describe('PDFAnnotationReference', () => { + let document; + + beforeEach(() => { + document = new PDFDocument({ + info: { CreationDate: new Date(Date.UTC(2018, 1, 1)) }, + compress: false, + tagged: true, + }); + }); + + test('should add annotation reference to structure element with StructParent', () => { + const docData = logData(document); + + const linkElement = document.struct('Link'); + document.addStructure(linkElement); + + const annotRef = document.ref({ + Type: 'Annot', + Subtype: 'Link', + Rect: [100, 100, 200, 120], + }); + + linkElement.add(new PDFAnnotationReference(annotRef)); + linkElement.end(); + annotRef.end(); + document.end(); + + const dataStr = docData.join('\n'); + expect(dataStr).toContain('/Type /OBJR'); + expect(dataStr).toContain('/StructParent 0'); + }); + + test('should handle multiple annotations with different StructParent values', () => { + const docData = logData(document); + + const section = document.struct('Sect'); + document.addStructure(section); + + const link1 = document.struct('Link'); + const link2 = document.struct('Link'); + section.add(link1); + section.add(link2); + + const annotRef1 = document.ref({ Type: 'Annot', Subtype: 'Link' }); + const annotRef2 = document.ref({ Type: 'Annot', Subtype: 'Link' }); + + link1.add(new PDFAnnotationReference(annotRef1)); + link2.add(new PDFAnnotationReference(annotRef2)); + + link1.end(); + link2.end(); + section.end(); + annotRef1.end(); + annotRef2.end(); + document.end(); + + const dataStr = docData.join('\n'); + expect(dataStr).toContain('/StructParent 0'); + expect(dataStr).toContain('/StructParent 1'); + }); +}); diff --git a/tests/unit/text.spec.js b/tests/unit/text.spec.js index 6a1153cc..8d6cc7e7 100644 --- a/tests/unit/text.spec.js +++ b/tests/unit/text.spec.js @@ -193,4 +193,90 @@ Q expect(docData).toContainText({ text }); }); }); + + describe('text with structure parent links', () => { + beforeEach(() => { + document = new PDFDocument({ + info: { CreationDate: new Date(Date.UTC(2018, 1, 1)) }, + compress: false, + tagged: true, + }); + }); + + test('should auto-link text inside Link structure element', () => { + const docData = logData(document); + + const linkElement = document.struct('Link', () => { + document.text('Click here', 100, 100, { + link: 'http://example.com', + }); + }); + + document.addStructure(linkElement); + linkElement.end(); + document.end(); + + const dataStr = docData.join('\n'); + expect(dataStr).toContain('/S /Link'); + expect(dataStr).toContain('/StructParent'); + }); + + test('should not add StructParent outside Link structure', () => { + const docData = logData(document); + + document.text('Click here', 100, 100, { + link: 'http://example.com', + }); + + document.end(); + + const dataStr = docData.join('\n'); + expect(dataStr).toContain('/Subtype /Link'); + expect(dataStr).not.toContain('/StructParent'); + }); + + test('should not leak link options to subsequent structure elements with continued text', () => { + const docData = logData(document); + + const paragraph = document.struct('P'); + document.addStructure(paragraph); + + paragraph.add( + document.struct('Span', () => { + document.text('This is some text before ', 100, 100, { + continued: true, + }); + }), + ); + + paragraph.add( + document.struct('Link', () => { + document.text('Here is a link!', { + link: 'http://google.com/', + underline: true, + continued: true, + }); + }), + ); + + paragraph.add( + document.struct('Span', () => { + document.text(' and this is text after the link.'); + }), + ); + + paragraph.end(); + document.end(); + + const dataStr = docData.join('\n'); + + // Count how many link annotations exist - should be exactly 1 + const linkMatches = dataStr.match(/\/Subtype \/Link/g); + expect(linkMatches).toBeTruthy(); + expect(linkMatches.length).toBe(1); + + expect(dataStr).toContain('/S /Span'); + expect(dataStr).toContain('/S /Link'); + }); + }); });