From 97546a9fdff39f317338f8d32ca3b9638ee9ad8b Mon Sep 17 00:00:00 2001 From: Rob O'Callaghan Date: Tue, 27 Aug 2019 17:09:52 +0100 Subject: [PATCH 1/2] support reading FDF data as JSON --- index.js | 34 ++++++++++++++ test/files/emptyform.json | 4 ++ test/files/filledform.json | 4 ++ test/files/filledformwithnumber.json | 4 ++ test/generateFormDataJson.spec.js | 66 ++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+) create mode 100644 test/files/emptyform.json create mode 100644 test/files/filledform.json create mode 100644 test/files/filledformwithnumber.json create mode 100644 test/generateFormDataJson.spec.js diff --git a/index.js b/index.js index da0f359..173ad5b 100644 --- a/index.js +++ b/index.js @@ -177,6 +177,39 @@ class PdfTk { return new Buffer(str, encoding); } + /** + * Creates JSON data from fdf buffer output. + * @static + * @public + * @param {Buffer} fdfBuffer - fdf buffer to transform to JSON. + * @returns {object} Fdf field data as a JSON. + */ + static fdfToJSON(fdfBuffer) { + try { + const fdfString = fdfBuffer.toString(); + const fieldsString = fdfString.substring( + fdfString.lastIndexOf('/Fields ['), + fdfString.lastIndexOf(']') + ); + const fdfJson = fieldsString.split('<<') + .filter(stringSplit => stringSplit.includes('/T') || stringSplit.includes('\V')) + .reduce((json, stringLine) => { + // eslint-disable-next-line array-bracket-spacing + const [, valueMatch, titleMatch] = stringLine.match(/\/V\s*([^\n\r]*)\s*\/T\s*([^\n\r]*)/); + titleMatch = titleMatch.startsWith('(') ? titleMatch.substring(1) : titleMatch; + titleMatch = titleMatch.endsWith(')') ? titleMatch.substring(0, titleMatch.length - 1) : titleMatch; + valueMatch = valueMatch.startsWith('(') ? valueMatch.substring(1) : valueMatch; + valueMatch = valueMatch.endsWith(')') ? valueMatch.substring(0, valueMatch.length - 1) : valueMatch; + json[titleMatch] = valueMatch; + return json; + }, {}); + return fdfJson; + } catch (err) { + if (err instanceof TypeError) err.message = 'Function must be called on generated FDF output'; + throw err; + } + } + /** * Sanitizes fdf input * @statuc @@ -1106,4 +1139,5 @@ module.exports = { }, }); }, + fdfToJSON: PdfTk.fdfToJSON, }; diff --git a/test/files/emptyform.json b/test/files/emptyform.json new file mode 100644 index 0000000..4b3487d --- /dev/null +++ b/test/files/emptyform.json @@ -0,0 +1,4 @@ +{ + "email": "", + "name": "" +} diff --git a/test/files/filledform.json b/test/files/filledform.json new file mode 100644 index 0000000..b78ca03 --- /dev/null +++ b/test/files/filledform.json @@ -0,0 +1,4 @@ +{ + "email": "test@email.com", + "name": "John Doe" +} diff --git a/test/files/filledformwithnumber.json b/test/files/filledformwithnumber.json new file mode 100644 index 0000000..d19aab1 --- /dev/null +++ b/test/files/filledformwithnumber.json @@ -0,0 +1,4 @@ +{ + "email": "test@email.com", + "name": "123" +} diff --git a/test/generateFormDataJson.spec.js b/test/generateFormDataJson.spec.js new file mode 100644 index 0000000..f729a9f --- /dev/null +++ b/test/generateFormDataJson.spec.js @@ -0,0 +1,66 @@ +/* globals describe, it */ +const chai = require('chai'); + +const { expect, } = chai; + +const pdftk = require('../'); +const fs = require('fs'); +const path = require('path'); + +describe('generateFormDataJson', function () { + + it('should generate a json file from the fdf of an unfilled pdf', function () { + + const testFile = JSON.parse(fs.readFileSync(path.join(__dirname, './files/emptyform.json'))); + const input = path.join(__dirname, './files/form.pdf'); + + return pdftk + .input(input) + .generateFdf() + .output() + .then(buffer => expect(pdftk.fdfToJSON(buffer)).to.eql(testFile)) + .catch(err => expect(err).to.be.null); + }); + + it('should generate an json file from the fdf of a filled pdf', function () { + + const testFile = JSON.parse(fs.readFileSync(path.join(__dirname, './files/filledform.json'))); + const input = fs.readFileSync(path.join(__dirname, './files/filledform.temp.pdf')); + + return pdftk + .input(input) + .generateFdf() + .output() + .then(buffer => { + expect(pdftk.fdfToJSON(buffer)).to.deep.equal(testFile); + }) + .catch(err => expect(err).to.be.null); + }); + + it('should generate an json file from the fdf of a number filled pdf', function () { + + const testFile = JSON.parse(fs.readFileSync(path.join(__dirname, './files/filledformwithnumber.json'))); + const input = fs.readFileSync(path.join(__dirname, './files/filledformwithnumber.temp.pdf')); + + return pdftk + .input(input) + .generateFdf() + .output() + .then(buffer => expect(pdftk.fdfToJSON(buffer)).to.eql(testFile)) + .catch(err => expect(err).to.be.null); + }); + + it('should throw and TypeError if the buffer is not an fdf', function () { + + const input = path.join(__dirname, './files/form.pdf'); + + return pdftk + .input(input) + .output() + .then(buffer => pdftk.fdfToJSON(buffer)) + .catch(err => { + expect(err.name).to.equal('TypeError'); + expect(err.message).to.equal('Function must be called on generated FDF output'); + }); + }); +}); From 0aee502edbdaa03f3ed7f56207a945d6c3c6c164 Mon Sep 17 00:00:00 2001 From: Jakob Petsovits Date: Sat, 4 Jul 2020 19:31:15 -0400 Subject: [PATCH 2/2] expose promisified readFormFieldValuesAsJSON(), keep fdfToJSON() private --- README.md | 14 +++++ index.js | 57 +++++++++++-------- ...c.js => readFormFieldValuesAsJSON.spec.js} | 23 +++----- 3 files changed, 57 insertions(+), 37 deletions(-) rename test/{generateFormDataJson.spec.js => readFormFieldValuesAsJSON.spec.js} (76%) diff --git a/README.md b/README.md index f384299..a1a1ef6 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,20 @@ pdftk }); ``` +#### Get names and values of form fields #### + +```javascript +pdftk + .input(fs.readFileSync('./file.pdf')) + .readFormFieldValuesAsJSON() + .then(json => for (const [name, value] of Object.entries(json)) { + // print values or fill the form with modifications + }) + .catch(err => { + // handle errors + }); +``` + #### Useful chaining #### ```javascript diff --git a/index.js b/index.js index 173ad5b..f49d78b 100644 --- a/index.js +++ b/index.js @@ -185,29 +185,31 @@ class PdfTk { * @returns {object} Fdf field data as a JSON. */ static fdfToJSON(fdfBuffer) { - try { - const fdfString = fdfBuffer.toString(); - const fieldsString = fdfString.substring( - fdfString.lastIndexOf('/Fields ['), - fdfString.lastIndexOf(']') - ); - const fdfJson = fieldsString.split('<<') - .filter(stringSplit => stringSplit.includes('/T') || stringSplit.includes('\V')) - .reduce((json, stringLine) => { - // eslint-disable-next-line array-bracket-spacing - const [, valueMatch, titleMatch] = stringLine.match(/\/V\s*([^\n\r]*)\s*\/T\s*([^\n\r]*)/); - titleMatch = titleMatch.startsWith('(') ? titleMatch.substring(1) : titleMatch; - titleMatch = titleMatch.endsWith(')') ? titleMatch.substring(0, titleMatch.length - 1) : titleMatch; - valueMatch = valueMatch.startsWith('(') ? valueMatch.substring(1) : valueMatch; - valueMatch = valueMatch.endsWith(')') ? valueMatch.substring(0, valueMatch.length - 1) : valueMatch; - json[titleMatch] = valueMatch; - return json; - }, {}); - return fdfJson; - } catch (err) { - if (err instanceof TypeError) err.message = 'Function must be called on generated FDF output'; - throw err; + const fieldsRegExp = /\/Fields\s*\[\s*<<([\s\S]*?)>>\s*\]\s*>>/; + const singleFieldRegExp = /\/([VT])\s*[(]((?:\\[)]|[^)])*)[)]/g; + + const fieldsMatch = fdfBuffer.toString().match(fieldsRegExp); + if (!fieldsMatch) { + throw TypeError('Function must be called on generated FDF output'); } + const fdfJson = fieldsMatch[1].split(/>>\s*< { + let fieldName = null; + let fieldValue = null; + // eslint-disable-next-line comma-dangle + for (const [, letter, contents] of field.matchAll(singleFieldRegExp)) { + switch (letter) { + case 'V': fieldValue = contents; break; + case 'T': fieldName = contents; break; + default: break; + } + } + if (fieldValue !== null && fieldName !== null) { + json[fieldName] = fieldValue; + } + return json; + }, {}); + return fdfJson; } /** @@ -529,6 +531,16 @@ class PdfTk { return this; } + /** + * Read PDF form fields into JSON data. Implies generateFdf(). + * @public + * @returns {Promise} Promise that resolves the form fill data as JSON, field name as key and field value as value. + * @see {@link https://www.pdflabs.com/docs/pdftk-man-page/#dest-op-generate-fdf} + */ + readFormFieldValuesAsJSON() { + return this.generateFdf().output().then(buffer => PdfTk.fdfToJSON(buffer)); + } + /** * Fill a PDF form from JSON data. * @public @@ -1139,5 +1151,4 @@ module.exports = { }, }); }, - fdfToJSON: PdfTk.fdfToJSON, }; diff --git a/test/generateFormDataJson.spec.js b/test/readFormFieldValuesAsJSON.spec.js similarity index 76% rename from test/generateFormDataJson.spec.js rename to test/readFormFieldValuesAsJSON.spec.js index f729a9f..74db61b 100644 --- a/test/generateFormDataJson.spec.js +++ b/test/readFormFieldValuesAsJSON.spec.js @@ -7,7 +7,7 @@ const pdftk = require('../'); const fs = require('fs'); const path = require('path'); -describe('generateFormDataJson', function () { +describe('readFormFieldValuesAsJSON', function () { it('should generate a json file from the fdf of an unfilled pdf', function () { @@ -16,9 +16,8 @@ describe('generateFormDataJson', function () { return pdftk .input(input) - .generateFdf() - .output() - .then(buffer => expect(pdftk.fdfToJSON(buffer)).to.eql(testFile)) + .readFormFieldValuesAsJSON() + .then(json => expect(json).to.eql(testFile)) .catch(err => expect(err).to.be.null); }); @@ -29,11 +28,8 @@ describe('generateFormDataJson', function () { return pdftk .input(input) - .generateFdf() - .output() - .then(buffer => { - expect(pdftk.fdfToJSON(buffer)).to.deep.equal(testFile); - }) + .readFormFieldValuesAsJSON() + .then(json => expect(json).to.deep.equal(testFile)) .catch(err => expect(err).to.be.null); }); @@ -44,9 +40,8 @@ describe('generateFormDataJson', function () { return pdftk .input(input) - .generateFdf() - .output() - .then(buffer => expect(pdftk.fdfToJSON(buffer)).to.eql(testFile)) + .readFormFieldValuesAsJSON() + .then(json => expect(json).to.eql(testFile)) .catch(err => expect(err).to.be.null); }); @@ -56,8 +51,8 @@ describe('generateFormDataJson', function () { return pdftk .input(input) - .output() - .then(buffer => pdftk.fdfToJSON(buffer)) + .readFormFieldValuesAsJSON() + .then(json => json) .catch(err => { expect(err.name).to.equal('TypeError'); expect(err.message).to.equal('Function must be called on generated FDF output');