diff --git a/src/core/core_utils.js b/src/core/core_utils.js index f3218ac614053..0e4aa9dddc477 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -62,10 +62,11 @@ function getLookupTableFactory(initializer) { } class MissingDataException extends BaseException { - constructor(begin, end) { + constructor(begin, end, objStreamRefNum = 0) { super(`Missing data [${begin}, ${end})`, "MissingDataException"); this.begin = begin; this.end = end; + this.objStreamRefNum = objStreamRefNum; } } diff --git a/src/core/document.js b/src/core/document.js index 9d65d04ac84ab..c1e9800e7553f 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -1038,8 +1038,8 @@ class PDFDocument { }; } - parse(recoveryMode) { - this.xref.parse(recoveryMode); + async parse(recoveryMode) { + await this.xref.parse(recoveryMode); this.catalog = new Catalog(this.pdfManager, this.xref); } diff --git a/src/core/pdf_manager.js b/src/core/pdf_manager.js index 83ea377bc47a2..ec26a197cfbfb 100644 --- a/src/core/pdf_manager.js +++ b/src/core/pdf_manager.js @@ -191,7 +191,7 @@ class NetworkPdfManager extends BasePdfManager { try { const value = obj[prop]; if (typeof value === "function") { - return value.apply(obj, args); + return await value.apply(obj, args); } return value; } catch (ex) { @@ -199,6 +199,11 @@ class NetworkPdfManager extends BasePdfManager { throw ex; } await this.requestRange(ex.begin, ex.end); + if (ex.objectStreamOffset) { + await this.pdfDocument.xref.decompressObjectStreams( + ex.objectStreamOffset + ); + } return this.ensure(obj, prop, args); } } diff --git a/src/core/xref.js b/src/core/xref.js index c13c08a7d3bde..7b3489cef3100 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -31,6 +31,7 @@ import { } from "./core_utils.js"; import { BaseStream } from "./base_stream.js"; import { CipherTransformFactory } from "./crypto.js"; +import { Stream } from "./stream.js"; class XRef { constructor(stream, pdfManager) { @@ -43,6 +44,7 @@ class XRef { this._newPersistentRefNum = null; this._newTemporaryRefNum = null; this._persistentRefsCache = null; + this._objectStreams = new Map(); } getNewPersistentRef(obj) { @@ -96,7 +98,7 @@ class XRef { this.startXRefQueue = [startXRef]; } - parse(recoveryMode = false) { + async parse(recoveryMode = false) { let trailerDict; if (!recoveryMode) { trailerDict = this.readXRef(); @@ -107,6 +109,8 @@ class XRef { trailerDict.assignXref(this); this.trailer = trailerDict; + await this.decompressObjectStreams(); + let encrypt; try { encrypt = trailerDict.get("Encrypt"); @@ -925,7 +929,29 @@ class XRef { fetchCompressed(ref, xrefEntry, suppressEncryption = false) { const tableOffset = xrefEntry.offset; - const stream = this.fetch(Ref.get(tableOffset, 0)); + const objectStream = this._objectStreams.get(tableOffset); + let stream; + if (objectStream) { + // The object stream has already been parsed. + stream = objectStream; + this._objectStreams.delete(tableOffset); + } else { + try { + stream = this.fetch(Ref.get(tableOffset, 0)); + } catch (ex) { + if (ex instanceof MissingDataException) { + const objStream = this.entries[tableOffset]; + const start = this.stream.start + objStream.offset; + const end = this.stream.start + this.entries[tableOffset + 1].offset; + throw new MissingDataException( + start, + end, + /* objStreamRefNum = */ ref.num + ); + } + throw new FormatError("bad ObjStm stream"); + } + } if (!(stream instanceof BaseStream)) { throw new FormatError("bad ObjStm stream"); } @@ -1030,6 +1056,58 @@ class XRef { getCatalogObj() { return this.root; } + + async decompressObjectStreams(entryOffset = null) { + const done = new Set([0]); + const promises = []; + let entries = this.entries; + if (entryOffset !== null) { + entries = { [entryOffset]: this.entries[entryOffset] }; + } + for (const num in entries) { + if (!Object.hasOwn(entries, num)) { + continue; + } + const entry = entries[num]; + if (entry.uncompressed) { + continue; + } + const tableOffset = entry.offset; + if (done.has(tableOffset)) { + continue; + } + done.add(tableOffset); + let stream; + try { + stream = this.fetch(Ref.get(tableOffset, 0)); + } catch {} + + if ( + !(stream instanceof BaseStream) || + !stream.isAsync || + !stream.isDataLoaded + ) { + continue; + } + + promises.push( + stream + .asyncGetBytes() + .then(bytes => { + if (bytes) { + this._objectStreams.set( + tableOffset, + new Stream(bytes, 0, bytes.length, stream.dict) + ); + } + }) + .catch(() => { + /* no-op */ + }) + ); + } + await Promise.all(promises); + } } export { XRef };