diff --git a/README.md b/README.md index f384299..a1a1ef6 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,20 @@ pdftk }); ``` +#### Get names and values of form fields #### + +```javascript +pdftk + .input(fs.readFileSync('./file.pdf')) + .readFormFieldValuesAsJSON() + .then(json => for (const [name, value] of Object.entries(json)) { + // print values or fill the form with modifications + }) + .catch(err => { + // handle errors + }); +``` + #### Useful chaining #### ```javascript diff --git a/index.js b/index.js index da0f359..f49d78b 100644 --- a/index.js +++ b/index.js @@ -177,6 +177,41 @@ class PdfTk { return new Buffer(str, encoding); } + /** + * Creates JSON data from fdf buffer output. + * @static + * @public + * @param {Buffer} fdfBuffer - fdf buffer to transform to JSON. + * @returns {object} Fdf field data as a JSON. + */ + static fdfToJSON(fdfBuffer) { + const fieldsRegExp = /\/Fields\s*\[\s*<<([\s\S]*?)>>\s*\]\s*>>/; + const singleFieldRegExp = /\/([VT])\s*[(]((?:\\[)]|[^)])*)[)]/g; + + const fieldsMatch = fdfBuffer.toString().match(fieldsRegExp); + if (!fieldsMatch) { + throw TypeError('Function must be called on generated FDF output'); + } + const fdfJson = fieldsMatch[1].split(/>>\s*< { + let fieldName = null; + let fieldValue = null; + // eslint-disable-next-line comma-dangle + for (const [, letter, contents] of field.matchAll(singleFieldRegExp)) { + switch (letter) { + case 'V': fieldValue = contents; break; + case 'T': fieldName = contents; break; + default: break; + } + } + if (fieldValue !== null && fieldName !== null) { + json[fieldName] = fieldValue; + } + return json; + }, {}); + return fdfJson; + } + /** * Sanitizes fdf input * @statuc @@ -496,6 +531,16 @@ class PdfTk { return this; } + /** + * Read PDF form fields into JSON data. Implies generateFdf(). + * @public + * @returns {Promise} Promise that resolves the form fill data as JSON, field name as key and field value as value. + * @see {@link https://www.pdflabs.com/docs/pdftk-man-page/#dest-op-generate-fdf} + */ + readFormFieldValuesAsJSON() { + return this.generateFdf().output().then(buffer => PdfTk.fdfToJSON(buffer)); + } + /** * Fill a PDF form from JSON data. * @public diff --git a/test/files/emptyform.json b/test/files/emptyform.json new file mode 100644 index 0000000..4b3487d --- /dev/null +++ b/test/files/emptyform.json @@ -0,0 +1,4 @@ +{ + "email": "", + "name": "" +} diff --git a/test/files/filledform.json b/test/files/filledform.json new file mode 100644 index 0000000..b78ca03 --- /dev/null +++ b/test/files/filledform.json @@ -0,0 +1,4 @@ +{ + "email": "test@email.com", + "name": "John Doe" +} diff --git a/test/files/filledformwithnumber.json b/test/files/filledformwithnumber.json new file mode 100644 index 0000000..d19aab1 --- /dev/null +++ b/test/files/filledformwithnumber.json @@ -0,0 +1,4 @@ +{ + "email": "test@email.com", + "name": "123" +} diff --git a/test/readFormFieldValuesAsJSON.spec.js b/test/readFormFieldValuesAsJSON.spec.js new file mode 100644 index 0000000..74db61b --- /dev/null +++ b/test/readFormFieldValuesAsJSON.spec.js @@ -0,0 +1,61 @@ +/* globals describe, it */ +const chai = require('chai'); + +const { expect, } = chai; + +const pdftk = require('../'); +const fs = require('fs'); +const path = require('path'); + +describe('readFormFieldValuesAsJSON', function () { + + it('should generate a json file from the fdf of an unfilled pdf', function () { + + const testFile = JSON.parse(fs.readFileSync(path.join(__dirname, './files/emptyform.json'))); + const input = path.join(__dirname, './files/form.pdf'); + + return pdftk + .input(input) + .readFormFieldValuesAsJSON() + .then(json => expect(json).to.eql(testFile)) + .catch(err => expect(err).to.be.null); + }); + + it('should generate an json file from the fdf of a filled pdf', function () { + + const testFile = JSON.parse(fs.readFileSync(path.join(__dirname, './files/filledform.json'))); + const input = fs.readFileSync(path.join(__dirname, './files/filledform.temp.pdf')); + + return pdftk + .input(input) + .readFormFieldValuesAsJSON() + .then(json => expect(json).to.deep.equal(testFile)) + .catch(err => expect(err).to.be.null); + }); + + it('should generate an json file from the fdf of a number filled pdf', function () { + + const testFile = JSON.parse(fs.readFileSync(path.join(__dirname, './files/filledformwithnumber.json'))); + const input = fs.readFileSync(path.join(__dirname, './files/filledformwithnumber.temp.pdf')); + + return pdftk + .input(input) + .readFormFieldValuesAsJSON() + .then(json => expect(json).to.eql(testFile)) + .catch(err => expect(err).to.be.null); + }); + + it('should throw and TypeError if the buffer is not an fdf', function () { + + const input = path.join(__dirname, './files/form.pdf'); + + return pdftk + .input(input) + .readFormFieldValuesAsJSON() + .then(json => json) + .catch(err => { + expect(err.name).to.equal('TypeError'); + expect(err.message).to.equal('Function must be called on generated FDF output'); + }); + }); +});