diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..a63e704e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +version: 2 +updates: + # Enable version updates for GitHub action workflows + - package-ecosystem: "github-actions" + directory: "/" + # Check for updates to GitHub Actions once per week + schedule: + interval: "weekly" + open-pull-requests-limit: 10 + # Enable version updates for Go modules + - package-ecosystem: gomod + directory: / + # Check for updates to Go modules once per week + schedule: + interval: "weekly" + open-pull-requests-limit: 10 diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index a23013c4..5717e12f 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,8 +1,10 @@ ## Thank you for your contribution! -1. Please do not create a Pull Request without creating an issue first. +1. **Please do not create a Pull Request without creating an issue first.** -2. **Any** change needs to be discussed before proceeding. +2. **Any** change needs to be discussed within the issue before proceeding. + +3. Issue discussion will determine further steps like whether a PR is needed or not. 3. Please provide enough information for PR review. diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ed81be92..cb9650e6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,24 +14,29 @@ jobs: goarch: arm64 - goos: linux goarch: amd64 + - goos: linux + goarch: arm + goarm: 7 + - goos: linux + goarch: arm64 - goos: windows goarch: amd64 go: - - '1.20.x' - - '1.21.x' + - '1.24.x' + - '1.25.x' runs-on: ubuntu-latest steps: + - name: Checkout repo + uses: actions/checkout@v5 + - name: Set up Go ${{ matrix.go }} - uses: actions/setup-go@v4 + uses: actions/setup-go@v6 with: go-version: ${{ matrix.go }} - run: go version - - name: Checkout repo - uses: actions/checkout@v3 - - name: Go vet run: go vet -v ./... @@ -40,7 +45,7 @@ jobs: with: flag-name: Go-${{ matrix.go }} parallel: true - + finish: needs: test runs-on: ubuntu-latest diff --git a/.goreleaser.yml b/.goreleaser.yml index 14178854..d2b9ffed 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -1,23 +1,35 @@ +version: 2 + builds: -- main: ./cmd/pdfcpu - env: - - CGO_ENABLED=0 - ldflags: - - '-s -w -X main.version={{.Version}} -X github.com/pdfcpu/pdfcpu/pkg/pdfcpu.VersionStr={{.Version}} -X main.commit={{.ShortCommit}} -X main.date={{.Date}} -X main.builtBy=goreleaser' - goos: - - js - - linux - - darwin - - windows - goarch: - - "386" - - arm64 - - wasm - - amd64 + - main: ./cmd/pdfcpu + env: + - CGO_ENABLED=0 + ldflags: + - '-s -w -X main.version={{.Version}} -X github.com/pdfcpu/pdfcpu/pkg/pdfcpu.VersionStr={{.Version}} -X main.commit={{.ShortCommit}} -X main.date={{.Date}} -X main.builtBy=goreleaser' + goos: + - linux + - darwin + - windows + - js + goarch: + - amd64 + - arm64 + - arm + - wasm + - "386" + goarm: + - 7 + ignore: + - goos: windows + goarch: arm + goarm: 7 + - goos: windows + goarch: arm64 + dist: ./dist + archives: - - - format: tar.xz + - format: tar.xz format_overrides: - goos: windows format: zip @@ -40,5 +52,13 @@ changelog: sort: asc filters: exclude: - - '^docs:' - - '^test:' + - '^docs:' + - '^test:' + +release: + github: + owner: pdfcpu + name: pdfcpu + draft: false # Optional: Set to true if you want to create drafts and not publish immediately + prerelease: false # Optional: Set to true if it's a prerelease + diff --git a/Dockerfile b/Dockerfile index 32e352e3..3a3fa0c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ # /data # exit // exit container # Start from the latest golang base image -FROM golang:latest as builder +FROM golang:latest AS builder # install RUN go install github.com/pdfcpu/pdfcpu/cmd/pdfcpu@latest @@ -39,9 +39,10 @@ COPY --from=builder /go/bin ./ # Export path of executable ENV PATH="${PATH}:/root" -WORKDIR /data +VOLUME /app +WORKDIR /app -# Command to run executable -CMD pdfcpu && echo && pdfcpu version -v +# Entrypoint for container default executable +ENTRYPOINT ["pdfcpu"] diff --git a/README.md b/README.md index b59d0172..5abc6acc 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,21 @@ -# pdfcpu: a Go PDF processor +# pdfcpu: a Go PDF processor and CLI -[![Open in Visual Studio Code](https://img.shields.io/static/v1?logo=visualstudiocode&label=&message=Open%20in%20Visual%20Studio%20Code&labelColor=2c2c32&color=007acc&logoColor=007acc)](https://open.vscode.dev/pdfcpu/pdfcpu) [![Test](https://github.com/pdfcpu/pdfcpu/workflows/Test/badge.svg)](https://github.com/pdfcpu/pdfcpu/actions) [![Coverage Status](https://coveralls.io/repos/github/pdfcpu/pdfcpu/badge.svg?branch=master)](https://coveralls.io/github/pdfcpu/pdfcpu?branch=master) [![GoDoc](https://godoc.org/github.com/pdfcpu/pdfcpu?status.svg)](https://pkg.go.dev/github.com/pdfcpu/pdfcpu) [![Go Report Card](https://goreportcard.com/badge/github.com/pdfcpu/pdfcpu)](https://goreportcard.com/report/github.com/pdfcpu/pdfcpu) [![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)](https://opensource.org/licenses/Apache-2.0) +[![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20pdfcpu%20Guru-006BFF)](https://gurubase.io/g/pdfcpu) + [![Latest release](https://img.shields.io/github/release/pdfcpu/pdfcpu.svg)](https://github.com/pdfcpu/pdfcpu/releases) +[![](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/hhrutter) + -pdfcpu is a PDF processing library written in [Go](http://golang.org) supporting encryption. -It provides both an API and a CLI. Supported are all versions up to PDF 1.7 (ISO-32000). +pdfcpu is a PDF processing library written in [Go](https://go.dev/) that supports encryption and offers both an API and a command-line interface (CLI). It is compatible with all PDF versions with basic support and ongoing improvement for PDF 2.0 (ISO-32000-2). -Support for PDF 2.0 is basic and ongoing work. ## Motivation @@ -41,7 +42,8 @@ This is an effort to build a comprehensive PDF processing library from the groun ## Focus -The main focus lies on strong support for batch processing and scripting via a rich command line. At the same time pdfcpu wants to make it easy to integrate PDF processing into your Go based backend system by providing a robust command set. +The primary emphasis is on providing robust assistance for batch processing and scripting through a comprehensive command-line interface. +Simultaneously, pdfcpu aims to simplify the integration of PDF processing into your Go-based backend system by offering a versatile set of commands. ## Command Set @@ -50,10 +52,12 @@ The main focus lies on strong support for batch processing and scripting via a r * [booklet](https://pdfcpu.io/generate/booklet) * [bookmarks](https://pdfcpu.io/bookmarks/bookmarks) * [boxes](https://pdfcpu.io/boxes/boxes) +* [certificates](https://pdfcpu.io/core/certs) * [change owner password](https://pdfcpu.io/encrypt/change_opw) * [change user password](https://pdfcpu.io/encrypt/change_upw) * [collect](https://pdfcpu.io/core/collect) -* [create](https://pdfcpu.io/generate/create) +* [config](https://pdfcpu.io/config/config) +* [create](https://pdfcpu.io/create/create) * [crop](https://pdfcpu.io/core/crop) * [cut](https://pdfcpu.io/generate/cut) * [decrypt](https://pdfcpu.io/encrypt/decryptPDF) @@ -73,25 +77,27 @@ The main focus lies on strong support for batch processing and scripting via a r * [pagelayout](https://pdfcpu.io/pagelayout/pagelayout) * [pagemode](https://pdfcpu.io/pagemode/pagemode) * [pages](https://pdfcpu.io/pages/pages) -* [permissions](https://pdfcpu.io/encrypt/perm_add) +* [permissions](https://pdfcpu.io/encrypt/perm_set) * [portfolio](https://pdfcpu.io/portfolio/portfolio) * [poster](https://pdfcpu.io/generate/poster) * [properties](https://pdfcpu.io/properties/properties) * [resize](https://pdfcpu.io/core/resize) * [rotate](https://pdfcpu.io/core/rotate) +* [signatures](http://pdfcpu.io/core/sign) * [split](https://pdfcpu.io/core/split) * [stamp](https://pdfcpu.io/core/stamp) * [trim](https://pdfcpu.io/core/trim) -* [validate](https://pdfcpu.io/core/validate) 👉 now including rudimentory support for PDF 2.0 +* [validate](https://pdfcpu.io/core/validate) * [viewerpref](https://pdfcpu.io/viewerpref/viewerpref) * [watermark](https://pdfcpu.io/core/watermark) * [zoom](https://pdfcpu.io/core/zoom) ## Documentation -* The main entry point is [pdfcpu.io](https://pdfcpu.io). -* For CLI examples also go to [pdfcpu.io](https://pdfcpu.io). There you will find explanations of all the commands and their parameters. -* For API examples of all pdfcpu operations please refer to [GoDoc](https://pkg.go.dev/github.com/pdfcpu/pdfcpu/pkg/api). +* [pdfcpu.io](https://pdfcpu.io) +* [API tests](https://github.com/pdfcpu/pdfcpu/tree/master/pkg/api/test) +* [API samples](https://github.com/pdfcpu/pdfcpu/tree/master/pkg/samples) +* CLI usage: `$ pdfcpu help cmd` ### GoDoc @@ -147,10 +153,10 @@ $ pdfcpu version ### Run in a Docker container -``` +```shell $ docker build -t pdfcpu . -# mount current folder into container to process local files -$ docker run -it --mount type=bind,source="$(pwd)",target=/app pdfcpu ./pdfcpu validate /app/pdfs/a.pdf +# mount current host folder into container as /app to process files in the local host folder +$ docker run -it -v "$(pwd)":/app pdfcpu validate a.pdf ``` ## Contributing @@ -204,7 +210,8 @@ Thanks 💚 goes to these wonderful people: | [
Rafael Garcia Argente](https://github.com/rgargente) | [
truyet](https://github.com/truyet) | [
Christian Nicola](https://github.com/christiannicola) | [
Benjamin Krill](https://github.com/kben) | [
Peter Wyatt](https://github.com/petervwyatt) | [
Kroum Tzanev](https://github.com/kpym) | [
Stefan Huber](https://github.com/signalwerk) | | [
Juan Iscar](https://github.com/juaismar) | [
Eng Zer Jun](https://github.com/Juneezee) | [
Dmitry Ivanov](https://github.com/hant0508)|[
Rene Kaufmann](https://github.com/HeavyHorst)|[
Christian Heusel](https://github.com/christian-heusel) | [
Chris](https://github.com/freshteapot) | [
Lukasz Czaplinski](https://github.com/scoiatael) | [
Joel Silva Schutz](https://github.com/joelschutz) | [
semvis123](https://github.com/semvis123) | [
guangwu](https://github.com/testwill) | [
Yoshiki Nakagawa](https://github.com/yyoshiki41) | [
Steve van Loben Sels](https://github.com/stevevls) | [
Yaofu](https://github.com/mygityf) | [
vsenko](https://github.com/vsenko) | -[
Alexis Hildebrandt](https://github.com/afh) | [
Sivukhin Nikita](https://github.com/sivukhin) | [
Joachim Bauch](https://github.com/fancycode)| [
kalimit](https://github.com/kalimit) | | | +[
Alexis Hildebrandt](https://github.com/afh) | [
Sivukhin Nikita](https://github.com/sivukhin) | [
Joachim Bauch](https://github.com/fancycode) | [
kalimit](https://github.com/kalimit) | [
Andreas Erhard](https://github.com/xelan) | [
Matsumoto Toshi](https://github.com/toshi1127) | [
Carl Wilson](https://github.com/carlwilson) | +[
LNAhri](https://github.com/LNAhri) | [
vishal](https://github.com/vishal-at) | [
Andreas Deininger](https://github.com/deining) | [
Robert Raines](https://github.com/solintllc-robert) | [
Frank Anderson](https://github.com/frob) | | diff --git a/cmd/pdfcpu/cmd.go b/cmd/pdfcpu/cmd.go index 27441058..9bf3e6e2 100644 --- a/cmd/pdfcpu/cmd.go +++ b/cmd/pdfcpu/cmd.go @@ -75,6 +75,18 @@ func parseFlags(cmd *command) { } initLogging(verbose, veryVerbose) } + + flag.Visit(func(f *flag.Flag) { + if f.Name == "bookmarks" || f.Name == "b" { + bookmarksSet = true + } + if f.Name == "offline" || f.Name == "off" || f.Name == "o" { + offlineSet = true + } + if f.Name == "optimize" || f.Name == "opt" { + optimizeSet = true + } + }) } func validateConfigDirFlag() { @@ -103,7 +115,7 @@ func validateConfigDirFlag() { func ensureDefaultConfig() (*model.Configuration, error) { validateConfigDirFlag() if !types.MemberOf(model.ConfigPath, []string{"default", "disable"}) { - if err := model.EnsureDefaultConfigAt(model.ConfigPath); err != nil { + if err := model.EnsureDefaultConfigAt(model.ConfigPath, false); err != nil { return nil, err } } @@ -139,7 +151,16 @@ func (m commandMap) process(cmdPrefix string, command string) (string, error) { conf.OwnerPW = opw conf.UserPW = upw + if offlineSet { + conf.Offline = offline + } + if m[cmdStr].handler != nil { + + if conf.Version != model.VersionStr && cmdStr != "reset" { + model.CheckConfigVersion(conf.Version) + } + m[cmdStr].handler(conf) return command, nil } diff --git a/cmd/pdfcpu/init.go b/cmd/pdfcpu/init.go index 592c9e32..d626f7e8 100644 --- a/cmd/pdfcpu/init.go +++ b/cmd/pdfcpu/init.go @@ -71,6 +71,30 @@ func initBoxesCmdMap() commandMap { return m } +func initCertificatesCmdMap() commandMap { + m := newCommandMap() + for k, v := range map[string]command{ + "list": {processListCertificatesCommand, nil, "", ""}, + "inspect": {processInspectCertificatesCommand, nil, "", ""}, + "import": {processImportCertificatesCommand, nil, "", ""}, + "reset": {resetCertificates, nil, "", ""}, + } { + m.register(k, v) + } + return m +} + +func initConfigCmdMap() commandMap { + m := newCommandMap() + for k, v := range map[string]command{ + "list": {printConfiguration, nil, "", ""}, + "reset": {resetConfiguration, nil, "", ""}, + } { + m.register(k, v) + } + return m +} + func initFontsCmdMap() commandMap { m := newCommandMap() for k, v := range map[string]command{ @@ -103,7 +127,9 @@ func initFormCmdMap() commandMap { func initImagesCmdMap() commandMap { m := newCommandMap() for k, v := range map[string]command{ - "list": {processListImagesCommand, nil, "", ""}, + "list": {processListImagesCommand, nil, "", ""}, + "extract": {processExtractImagesCommand, nil, "", ""}, + "update": {processUpdateImagesCommand, nil, "", ""}, } { m.register(k, v) } @@ -217,6 +243,17 @@ func initPageLayoutCmdMap() commandMap { return m } +func initSignaturesCmdMap() commandMap { + m := newCommandMap() + for k, v := range map[string]command{ + "validate": {processValidateSignaturesCommand, nil, "", ""}, + //"add": {processAddSignatureCommand, nil, "", ""}, + } { + m.register(k, v) + } + return m +} + func initViewerPreferencesCmdMap() commandMap { m := newCommandMap() for k, v := range map[string]command{ @@ -234,6 +271,8 @@ func initCommandMap() { attachCmdMap := initAttachCmdMap() bookmarksCmdMap := initBookmarksCmdMap() boxesCmdMap := initBoxesCmdMap() + certificatesCmdMap := initCertificatesCmdMap() + configCmdMap := initConfigCmdMap() fontsCmdMap := initFontsCmdMap() formCmdMap := initFormCmdMap() imagesCmdMap := initImagesCmdMap() @@ -242,6 +281,7 @@ func initCommandMap() { permissionsCmdMap := initPermissionsCmdMap() portfolioCmdMap := initPortfolioCmdMap() propertiesCmdMap := initPropertiesCmdMap() + signaturesCmdMap := initSignaturesCmdMap() stampCmdMap := initStampCmdMap() watermarkCmdMap := initWatermarkCmdMap() pageModeCmdMap := initPageModeCmdMap() @@ -256,10 +296,11 @@ func initCommandMap() { "bookmarks": {nil, bookmarksCmdMap, usageBookmarks, usageLongBookmarks}, "booklet": {processBookletCommand, nil, usageBooklet, usageLongBooklet}, "boxes": {nil, boxesCmdMap, usageBoxes, usageLongBoxes}, + "certificates": {nil, certificatesCmdMap, usageCertificates, usageLongCertificates}, "changeopw": {processChangeOwnerPasswordCommand, nil, usageChangeOwnerPW, usageLongChangeOwnerPW}, "changeupw": {processChangeUserPasswordCommand, nil, usageChangeUserPW, usageLongChangeUserPW}, "collect": {processCollectCommand, nil, usageCollect, usageLongCollect}, - "config": {printConfiguration, nil, usageConfig, usageLongConfig}, + "config": {nil, configCmdMap, usageConfig, usageLongConfig}, "create": {processCreateCommand, nil, usageCreate, usageLongCreate}, "crop": {processCropCommand, nil, usageCrop, usageLongCrop}, "cut": {processCutCommand, nil, usageCut, usageLongCut}, @@ -290,6 +331,7 @@ func initCommandMap() { "resize": {processResizeCommand, nil, usageResize, usageLongResize}, "rotate": {processRotateCommand, nil, usageRotate, usageLongRotate}, "selectedpages": {printSelectedPages, nil, usageSelectedPages, usageLongSelectedPages}, + "signatures": {nil, signaturesCmdMap, usageSignatures, usageLongSignatures}, "split": {processSplitCommand, nil, usageSplit, usageLongSplit}, "stamp": {nil, stampCmdMap, usageStamp, usageLongStamp}, "trim": {processTrimCommand, nil, usageTrim, usageLongTrim}, @@ -308,8 +350,8 @@ func initFlags() { flag.BoolVar(&all, "a", false, "") bookmarksUsage := "create bookmarks while merging" - flag.BoolVar(&bookmarks, "bookmarks", true, bookmarksUsage) - flag.BoolVar(&bookmarks, "b", true, bookmarksUsage) + flag.BoolVar(&bookmarks, "bookmarks", false, bookmarksUsage) + flag.BoolVar(&bookmarks, "b", false, bookmarksUsage) confUsage := "the config directory path | skip | none" flag.StringVar(&conf, "config", "", confUsage) @@ -320,6 +362,12 @@ func initFlags() { flag.BoolVar(÷rPage, "dividerPage", false, dividerPageUsage) flag.BoolVar(÷rPage, "d", false, dividerPageUsage) + fontsUsage := "include font info" + flag.BoolVar(&fonts, "fonts", false, fontsUsage) + + flag.BoolVar(&full, "full", false, "") + flag.BoolVar(&full, "f", false, "") + jsonUsage := "produce JSON output" flag.BoolVar(&json, "json", false, jsonUsage) flag.BoolVar(&json, "j", false, jsonUsage) @@ -332,10 +380,18 @@ func initFlags() { flag.BoolVar(&links, "links", false, linksUsage) flag.BoolVar(&links, "l", false, linksUsage) - modeUsage := "validate: strict|relaxed; extract: image|font|content|page|meta; encrypt: rc4|aes, stamp:text|image/pdf" + modeUsage := "validate: strict|relaxed; extract: image|font|content|page|meta; encrypt: rc4|aes; stamp:text|image/pdf" flag.StringVar(&mode, "mode", "", modeUsage) flag.StringVar(&mode, "m", "", modeUsage) + flag.BoolVar(&offline, "offline", false, "") + flag.BoolVar(&offline, "off", false, "") + flag.BoolVar(&offline, "o", false, "") + + optimizeUsage := "merge: optimize before writing" + flag.BoolVar(&optimize, "optimize", false, optimizeUsage) + flag.BoolVar(&optimize, "opt", false, optimizeUsage) + selectedPagesUsage := "a comma separated list of pages or page ranges, see pdfcpu selectedpages" flag.StringVar(&selectedPages, "pages", "", selectedPagesUsage) flag.StringVar(&selectedPages, "p", "", selectedPagesUsage) @@ -372,6 +428,7 @@ func initFlags() { func initLogging(verbose, veryVerbose bool) { needStackTrace = verbose || veryVerbose if quiet { + // TODO Need separate logger for command result output. return } diff --git a/cmd/pdfcpu/main.go b/cmd/pdfcpu/main.go index dc9e643c..207ae2b2 100644 --- a/cmd/pdfcpu/main.go +++ b/cmd/pdfcpu/main.go @@ -20,22 +20,31 @@ package main import ( "fmt" "os" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" ) var ( fileStats, mode, selectedPages string upw, opw, key, perm, unit, conf string verbose, veryVerbose bool - links, quiet, sorted, bookmarks bool - all, dividerPage, json, replaceBookmarks bool + links, quiet, offline bool + replaceBookmarks bool // Import Bookmarks + all bool // List Viewer Preferences + full bool // eg. signature validation output + fonts bool // Info + json bool // List Viewer Preferences, Info + bookmarks, dividerPage, optimize, sorted bool // Merge + bookmarksSet, offlineSet, optimizeSet bool needStackTrace = true cmdMap commandMap ) // Set by Goreleaser. var ( - commit = "?" - date = "?" + version = model.VersionStr + commit = "?" + date = "?" ) func init() { diff --git a/cmd/pdfcpu/process.go b/cmd/pdfcpu/process.go index 538a121c..47032c46 100644 --- a/cmd/pdfcpu/process.go +++ b/cmd/pdfcpu/process.go @@ -17,6 +17,7 @@ limitations under the License. package main import ( + "bufio" "bytes" "flag" "fmt" @@ -32,6 +33,7 @@ import ( "github.com/angel-one/pdfcpu/pkg/api" "github.com/angel-one/pdfcpu/pkg/cli" + "github.com/angel-one/pdfcpu/pkg/log" "github.com/angel-one/pdfcpu/pkg/pdfcpu" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" @@ -39,6 +41,13 @@ import ( "github.com/pkg/errors" ) +func abs(i int) int { + if i < 0 { + return -i + } + return i +} + func hasPDFExtension(filename string) bool { return strings.HasSuffix(strings.ToLower(filename), ".pdf") } @@ -109,6 +118,62 @@ func printConfiguration(conf *model.Configuration) { fmt.Print(string(buf.String())) } +func confirmed() bool { + reader := bufio.NewReader(os.Stdin) + for { + fmt.Print("(yes/no): ") + input, err := reader.ReadString('\n') + if err != nil { + fmt.Println("Error reading input. Please try again.") + continue + } + + input = strings.TrimSpace(strings.ToLower(input)) + + switch input { + case "yes": + return true + case "no": + return false + default: + fmt.Println("Invalid input. Please type 'yes' or 'no'.") + } + } +} + +func resetConfiguration(conf *model.Configuration) { + fmt.Printf("Did you make a backup of %s ?\n", conf.Path) + if confirmed() { + fmt.Printf("Are you ready to reset your config.yml to %s ?\n", model.VersionStr) + if confirmed() { + fmt.Println("resetting..") + if err := model.ResetConfig(); err != nil { + fmt.Fprintf(os.Stderr, "pdfcpu: config problem: %v\n", err) + os.Exit(1) + } + fmt.Println("Finished - Don't forget to update config.yml with your modifications.") + } else { + fmt.Println("Operation canceled.") + } + } else { + fmt.Println("Operation canceled.") + } +} + +func resetCertificates(conf *model.Configuration) { + fmt.Println("Are you ready to reset your certificates to your system root certificates?") + if confirmed() { + fmt.Println("resetting..") + if err := model.ResetCertificates(); err != nil { + fmt.Fprintf(os.Stderr, "pdfcpu: config problem: %v\n", err) + os.Exit(1) + } + fmt.Println("Finished") + } else { + fmt.Println("Operation canceled") + } +} + func printPaperSizes(conf *model.Configuration) { fmt.Fprintln(os.Stderr, paperSizes) } @@ -123,7 +188,7 @@ func printVersion(conf *model.Configuration) { os.Exit(1) } - fmt.Fprintf(os.Stdout, "pdfcpu: %s\n", model.VersionStr) + fmt.Fprintf(os.Stdout, "pdfcpu: %s\n", version) if date == "?" { if info, ok := debug.ReadBuildInfo(); ok { @@ -165,45 +230,143 @@ func process(cmd *cli.Command) { //os.Exit(0) } -func processValidateCommand(conf *model.Configuration) { - if len(flag.Args()) == 0 || selectedPages != "" { - fmt.Fprintf(os.Stderr, "%s\n\n", usageValidate) - os.Exit(1) +func getBaseDir(path string) string { + i := strings.Index(path, "**") + basePath := path[:i] + basePath = filepath.Clean(basePath) + if basePath == "" { + return "." } + return basePath +} + +func isDir(path string) (bool, error) { + info, err := os.Stat(path) + if err != nil { + return false, err + } + return info.IsDir(), nil +} + +func expandWildcardsRec(s string, inFiles *[]string, conf *model.Configuration) error { + s = filepath.Clean(s) + wantsPdf := strings.HasSuffix(s, ".pdf") + return filepath.WalkDir(getBaseDir(s), func(path string, d os.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + if ok := hasPDFExtension(path); ok { + *inFiles = append(*inFiles, path) + return nil + } + if !wantsPdf && conf.CheckFileNameExt { + if !quiet { + fmt.Fprintf(os.Stderr, "%s needs extension \".pdf\".\n", path) + } + } + return nil + }) +} +func expandWildcards(s string, inFiles *[]string, conf *model.Configuration) error { + paths, err := filepath.Glob(s) + if err != nil { + return err + } + for _, path := range paths { + + if conf.CheckFileNameExt { + if !hasPDFExtension(path) { + if isDir, err := isDir(path); isDir && err == nil { + continue + } + if !quiet { + fmt.Fprintf(os.Stderr, "%s needs extension \".pdf\".\n", path) + } + continue + } + } + + *inFiles = append(*inFiles, path) + } + return nil +} + +func collectInFiles(conf *model.Configuration) []string { inFiles := []string{} + for _, arg := range flag.Args() { + + if strings.Contains(arg, "**") { + // **/ skips files w/o extension "pdf" + // **/*.pdf + if err := expandWildcardsRec(arg, &inFiles, conf); err != nil { + fmt.Fprintf(os.Stderr, "%s", err) + } + continue + } + if strings.Contains(arg, "*") { - matches, err := filepath.Glob(arg) - if err != nil { + // * skips files w/o extension "pdf" + // *.pdf + if err := expandWildcards(arg, &inFiles, conf); err != nil { fmt.Fprintf(os.Stderr, "%s", err) - os.Exit(1) } - inFiles = append(inFiles, matches...) continue } + if conf.CheckFileNameExt { - ensurePDFExtension(arg) + if !hasPDFExtension(arg) { + if isDir, err := isDir(arg); isDir && err == nil { + if err := expandWildcards(arg+"/*", &inFiles, conf); err != nil { + fmt.Fprintf(os.Stderr, "%s", err) + } + continue + } + if !quiet { + fmt.Fprintf(os.Stderr, "%s needs extension \".pdf\".\n", arg) + } + continue + } } + inFiles = append(inFiles, arg) } - if mode != "" && mode != "strict" && mode != "s" && mode != "relaxed" && mode != "r" { + return inFiles +} + +func processValidateCommand(conf *model.Configuration) { + if len(flag.Args()) == 0 || selectedPages != "" { fmt.Fprintf(os.Stderr, "%s\n\n", usageValidate) os.Exit(1) } + inFiles := collectInFiles(conf) + switch mode { case "strict", "s": conf.ValidationMode = model.ValidationStrict case "relaxed", "r": conf.ValidationMode = model.ValidationRelaxed + case "": + default: + fmt.Fprintf(os.Stderr, "%s\n\n", usageValidate) + os.Exit(1) } if links { conf.ValidateLinks = true } + conf.Optimize = false + if optimizeSet { + conf.Optimize = optimize + } + process(cli.ValidateCommand(inFiles, conf)) } @@ -335,6 +498,7 @@ func processArgsForMerge(conf *model.Configuration) ([]string, string) { fmt.Fprintf(os.Stderr, "%s", err) os.Exit(1) } + // TODO check extension inFiles = append(inFiles, matches...) continue } @@ -346,6 +510,23 @@ func processArgsForMerge(conf *model.Configuration) ([]string, string) { return inFiles, outFile } +func mergeCommandVariation(inFiles []string, outFile string, dividerPage bool, conf *model.Configuration) *cli.Command { + switch mode { + + case "create": + return cli.MergeCreateCommand(inFiles, outFile, dividerPage, conf) + + case "zip": + return cli.MergeCreateZipCommand(inFiles, outFile, conf) + + case "append": + return cli.MergeAppendCommand(inFiles, outFile, dividerPage, conf) + + } + + return nil +} + func processMergeCommand(conf *model.Configuration) { if mode == "" { mode = "create" @@ -378,24 +559,20 @@ func processMergeCommand(conf *model.Configuration) { if conf == nil { conf = model.NewDefaultConfiguration() - conf.CreateBookmarks = bookmarks } - conf.CreateBookmarks = bookmarks - - var cmd *cli.Command - - switch mode { - - case "create": - cmd = cli.MergeCreateCommand(inFiles, outFile, dividerPage, conf) - - case "zip": - cmd = cli.MergeCreateZipCommand(inFiles, outFile, conf) + if bookmarksSet { + conf.CreateBookmarks = bookmarks + } - case "append": - cmd = cli.MergeAppendCommand(inFiles, outFile, dividerPage, conf) + if optimizeSet { + conf.OptimizeBeforeWriting = optimize + } + cmd := mergeCommandVariation(inFiles, outFile, dividerPage, conf) + if cmd == nil { + fmt.Fprintf(os.Stderr, "%s\n\n", usageMerge) + os.Exit(1) } process(cmd) @@ -630,6 +807,7 @@ func processListPermissionsCommand(conf *model.Configuration) { fmt.Fprintf(os.Stderr, "%s", err) os.Exit(1) } + // TODO check extension inFiles = append(inFiles, matches...) continue } @@ -885,7 +1063,7 @@ func addWatermarks(conf *model.Configuration, onTop bool) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) var ( wm *model.Watermark @@ -954,7 +1132,7 @@ func updateWatermarks(conf *model.Configuration, onTop bool) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) var ( wm *model.Watermark @@ -964,10 +1142,8 @@ func updateWatermarks(conf *model.Configuration, onTop bool) { switch mode { case "text": wm, err = pdfcpu.ParseTextWatermarkDetails(flag.Arg(0), flag.Arg(1), onTop, conf.Unit) - case "image": wm, err = pdfcpu.ParseImageWatermarkDetails(flag.Arg(0), flag.Arg(1), onTop, conf.Unit) - case "pdf": wm, err = pdfcpu.ParsePDFWatermarkDetails(flag.Arg(0), flag.Arg(1), onTop, conf.Unit) default: @@ -1082,7 +1258,7 @@ func processImportImagesCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) var outFile string outFile = flag.Arg(0) @@ -1091,6 +1267,7 @@ func processImportImagesCommand(conf *model.Configuration) { imp := pdfcpu.DefaultImportConfig() imageFileNames := parseArgsForImageFileNames(1) process(cli.ImportImagesCommand(imageFileNames, outFile, imp, conf)) + return } // pdfcpu import description outFile imageFile... @@ -1111,21 +1288,11 @@ func processImportImagesCommand(conf *model.Configuration) { } func processInsertPagesCommand(conf *model.Configuration) { - if len(flag.Args()) == 0 || len(flag.Args()) > 2 { + if len(flag.Args()) == 0 || len(flag.Args()) > 3 { fmt.Fprintf(os.Stderr, "usage: %s\n\n", usagePagesInsert) os.Exit(1) } - inFile := flag.Arg(0) - if conf.CheckFileNameExt { - ensurePDFExtension(inFile) - } - outFile := "" - if len(flag.Args()) == 2 { - outFile = flag.Arg(1) - ensurePDFExtension(outFile) - } - pages, err := api.ParsePageSelection(selectedPages) if err != nil { fmt.Fprintf(os.Stderr, "problem with flag selectedPages: %v\n", err) @@ -1138,7 +1305,43 @@ func processInsertPagesCommand(conf *model.Configuration) { os.Exit(1) } - process(cli.InsertPagesCommand(inFile, outFile, pages, conf, mode)) + inFile := flag.Arg(0) + if hasPDFExtension(inFile) { + // pdfcpu pages insert inFile [outFile] + + outFile := "" + if len(flag.Args()) == 2 { + outFile = flag.Arg(1) + ensurePDFExtension(outFile) + } + + process(cli.InsertPagesCommand(inFile, outFile, pages, conf, mode, nil)) + + return + } + + // pdfcpu pages insert description inFile [outFile] + + pageConf, err := pdfcpu.ParsePageConfiguration(flag.Arg(0), conf.Unit) + if err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + os.Exit(1) + } + if pageConf == nil { + fmt.Fprintf(os.Stderr, "missing page configuration\n") + os.Exit(1) + } + + inFile = flag.Arg(1) + ensurePDFExtension(inFile) + + outFile := "" + if len(flag.Args()) == 3 { + outFile = flag.Arg(2) + ensurePDFExtension(outFile) + } + + process(cli.InsertPagesCommand(inFile, outFile, pages, conf, mode, pageConf)) } func processRemovePagesCommand(conf *model.Configuration) { @@ -1170,13 +1373,6 @@ func processRemovePagesCommand(conf *model.Configuration) { process(cli.RemovePagesCommand(inFile, outFile, pages, conf)) } -func abs(i int) int { - if i < 0 { - return -i - } - return i -} - func processRotateCommand(conf *model.Configuration) { if len(flag.Args()) < 2 || len(flag.Args()) > 3 { fmt.Fprintf(os.Stderr, "%s\n\n", usageRotate) @@ -1295,7 +1491,7 @@ func processNUpCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) pages, err := api.ParsePageSelection(selectedPages) if err != nil { @@ -1332,7 +1528,7 @@ func processGridCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) pages, err := api.ParsePageSelection(selectedPages) if err != nil { @@ -1370,7 +1566,7 @@ func processBookletCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) pages, err := api.ParsePageSelection(selectedPages) if err != nil { @@ -1402,7 +1598,7 @@ func processBookletCommand(conf *model.Configuration) { process(cli.BookletCommand(inFiles, outFile, pages, nup, conf)) } -func processDiplayUnit(conf *model.Configuration) { +func processDisplayUnit(conf *model.Configuration) { if !types.MemberOf(unit, []string{"", "points", "po", "inches", "in", "cm", "mm"}) { fmt.Fprintf(os.Stderr, "%s\n\n", "supported units: (po)ints, (in)ches, cm, mm") os.Exit(1) @@ -1434,6 +1630,7 @@ func processInfoCommand(conf *model.Configuration) { fmt.Fprintf(os.Stderr, "%s", err) os.Exit(1) } + // TODO check extension inFiles = append(inFiles, matches...) continue } @@ -1449,9 +1646,13 @@ func processInfoCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) + + if json { + log.SetCLILogger(nil) + } - process(cli.InfoCommand(inFiles, selectedPages, json, conf)) + process(cli.InfoCommand(inFiles, selectedPages, fonts, json, conf)) } func processListFontsCommand(conf *model.Configuration) { @@ -1575,7 +1776,7 @@ func processAddPropertiesCommand(conf *model.Configuration) { continue } // Ensure key value pair. - ss := strings.Split(arg, "=") + ss := strings.SplitN(arg, "=", 2) if len(ss) != 2 { fmt.Fprintf(os.Stderr, "keyValuePair = 'key = value'\n") fmt.Fprintf(os.Stderr, "usage: %s\n\n", usagePropertiesAdd) @@ -1611,6 +1812,13 @@ func processRemovePropertiesCommand(conf *model.Configuration) { } continue } + + if !validate.DocumentProperty(arg) { + fmt.Fprintf(os.Stderr, "property name \"%s\" not allowed!\n", arg) + fmt.Fprintf(os.Stderr, "usage: %s\n\n", usagePropertiesRemove) + os.Exit(1) + } + keys = append(keys, arg) } @@ -1649,7 +1857,7 @@ func processListBoxesCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) selectedPages, err := api.ParsePageSelection(selectedPages) if err != nil { @@ -1663,6 +1871,7 @@ func processListBoxesCommand(conf *model.Configuration) { ensurePDFExtension(inFile) } process(cli.ListBoxesCommand(inFile, selectedPages, nil, conf)) + return } pb, err := api.PageBoundariesFromBoxList(flag.Arg(0)) @@ -1685,7 +1894,7 @@ func processAddBoxesCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) pb, err := api.PageBoundaries(flag.Arg(0), conf.Unit) if err != nil { @@ -1760,7 +1969,7 @@ func processCropCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) box, err := api.Box(flag.Arg(0), conf.Unit) if err != nil { @@ -1807,6 +2016,7 @@ func processListAnnotationsCommand(conf *model.Configuration) { process(cli.ListAnnotationsCommand(inFile, selectedPages, conf)) } + func processRemoveAnnotationsCommand(conf *model.Configuration) { if len(flag.Args()) < 1 { fmt.Fprintf(os.Stderr, "usage: %s\n", usageAnnotsRemove) @@ -1867,6 +2077,7 @@ func processListImagesCommand(conf *model.Configuration) { fmt.Fprintf(os.Stderr, "%s", err) os.Exit(1) } + // TODO check extension inFiles = append(inFiles, matches...) continue } @@ -1885,6 +2096,75 @@ func processListImagesCommand(conf *model.Configuration) { process(cli.ListImagesCommand(inFiles, selectedPages, conf)) } +func processExtractImagesCommand(conf *model.Configuration) { + // See also processExtractCommand + if len(flag.Args()) != 2 { + fmt.Fprintf(os.Stderr, "%s\n\n", usageImagesExtract) + os.Exit(1) + } + + inFile := flag.Arg(0) + if conf.CheckFileNameExt { + ensurePDFExtension(inFile) + } + outDir := flag.Arg(1) + + pages, err := api.ParsePageSelection(selectedPages) + if err != nil { + fmt.Fprintf(os.Stderr, "problem with flag selectedPages: %v\n", err) + os.Exit(1) + } + + process(cli.ExtractImagesCommand(inFile, outDir, pages, conf)) +} + +func processUpdateImagesCommand(conf *model.Configuration) { + argCount := len(flag.Args()) + if argCount < 2 || argCount > 5 { + fmt.Fprintf(os.Stderr, "%s\n\n", usageImagesUpdate) + os.Exit(1) + } + + inFile := flag.Arg(0) + if conf.CheckFileNameExt { + ensurePDFExtension(inFile) + } + + imageFile := flag.Arg(1) + ensureImageExtension(imageFile) + + outFile := "" + objNrOrPageNr := 0 + id := "" + + if argCount > 2 { + c := 2 + if hasPDFExtension(flag.Arg(2)) { + outFile = flag.Arg(2) + c++ + } + if argCount > c { + i, err := strconv.Atoi(flag.Arg(c)) + if err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + os.Exit(1) + } + if i <= 0 { + fmt.Fprintln(os.Stderr, "objNr & pageNr must be > 0") + os.Exit(1) + } + objNrOrPageNr = i + if argCount == c+2 { + id = flag.Arg(c + 1) + } + } + } + + //fmt.Printf("inFile:%s imgFile:%s outFile:%s, objPageNr:%d, id:%s\n", inFile, imageFile, outFile, objNrOrPageNr, id) + + process(cli.UpdateImagesCommand(inFile, imageFile, outFile, objNrOrPageNr, id, conf)) +} + func processDumpCommand(conf *model.Configuration) { s := "No dump for you! - One year!\n\n" if len(flag.Args()) != 3 { @@ -1955,6 +2235,7 @@ func processListFormFieldsCommand(conf *model.Configuration) { fmt.Fprintf(os.Stderr, "%s", err) os.Exit(1) } + // TODO check extension inFiles = append(inFiles, matches...) continue } @@ -2187,7 +2468,7 @@ func processResizeCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) rc, err := pdfcpu.ParseResizeConfig(flag.Arg(0), conf.Unit) if err != nil { @@ -2221,7 +2502,7 @@ func processPosterCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) // formsize(=papersize) or dimensions, optionally: scalefactor, border, margin, bgcolor cut, err := pdfcpu.ParseCutConfigForPoster(flag.Arg(0), conf.Unit) @@ -2257,7 +2538,7 @@ func processNDownCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) selectedPages, err := api.ParsePageSelection(selectedPages) if err != nil { @@ -2289,6 +2570,7 @@ func processNDownCommand(conf *model.Configuration) { } process(cli.NDownCommand(inFile, outDir, outFile, selectedPages, n, cut, conf)) + return } // pdfcpu ndown description n inFile outDir outFile @@ -2326,7 +2608,7 @@ func processCutCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) // required: at least one of horizontalCut, verticalCut // optionally: border, margin, bgcolor @@ -2538,6 +2820,11 @@ func processListViewerPreferencesCommand(conf *model.Configuration) { if conf.CheckFileNameExt { ensurePDFExtension(inFile) } + + if json { + log.SetCLILogger(nil) + } + process(cli.ListViewerPreferencesCommand(inFile, all, json, conf)) } @@ -2583,7 +2870,7 @@ func processZoomCommand(conf *model.Configuration) { os.Exit(1) } - processDiplayUnit(conf) + processDisplayUnit(conf) zc, err := pdfcpu.ParseZoomConfig(flag.Arg(0), conf.Unit) if err != nil { @@ -2610,3 +2897,102 @@ func processZoomCommand(conf *model.Configuration) { process(cli.ZoomCommand(inFile, outFile, selectedPages, zc, conf)) } + +func processListCertificatesCommand(conf *model.Configuration) { + if len(flag.Args()) > 1 || selectedPages != "" { + fmt.Fprintf(os.Stderr, "%s\n\n", usageCertificatesList) + os.Exit(1) + } + if json { + log.SetCLILogger(nil) + } + process(cli.ListCertificatesCommand(json, conf)) +} + +func processInspectCertificatesCommand(conf *model.Configuration) { + if len(flag.Args()) < 1 || selectedPages != "" { + fmt.Fprintf(os.Stderr, "%s\n\n", usageCertificatesInspect) + os.Exit(1) + } + inFiles := []string{} + for _, arg := range flag.Args() { + if strings.Contains(arg, "*") { + matches, err := filepath.Glob(arg) + if err != nil { + fmt.Fprintf(os.Stderr, "%s", err) + os.Exit(1) + } + for _, inFile := range matches { + if !isCertificateFile(inFile) { + fmt.Fprintf(os.Stderr, "skipping %s - allowed extensions: .pem, .p7c, .cer, .crt\n", inFile) + } else { + inFiles = append(inFiles, inFile) + } + } + continue + } + if !isCertificateFile(arg) { + fmt.Fprintf(os.Stderr, "%s - allowed extensions: .pem, .p7c, .cer, .crt\n", arg) + os.Exit(1) + } + inFiles = append(inFiles, arg) + } + + process(cli.InspectCertificatesCommand(inFiles, conf)) +} + +func isCertificateFile(fName string) bool { + for _, ext := range []string{".p7c", ".pem", ".cer", ".crt"} { + if strings.HasSuffix(strings.ToLower(fName), ext) { + return true + } + } + return false +} + +func processImportCertificatesCommand(conf *model.Configuration) { + if len(flag.Args()) < 1 || selectedPages != "" { + fmt.Fprintf(os.Stderr, "%s\n\n", usageCertificatesImport) + os.Exit(1) + } + inFiles := []string{} + for _, arg := range flag.Args() { + if strings.Contains(arg, "*") { + matches, err := filepath.Glob(arg) + if err != nil { + fmt.Fprintf(os.Stderr, "%s", err) + os.Exit(1) + } + for _, inFile := range matches { + if !isCertificateFile(inFile) { + fmt.Fprintf(os.Stderr, "skipping %s - allowed extensions: .pem, .p7c, .cer, .crt\n", inFile) + } else { + inFiles = append(inFiles, inFile) + } + } + continue + } + if !isCertificateFile(arg) { + fmt.Fprintf(os.Stderr, "%s - allowed extensions: .pem, .p7c, .cer, .crt\n", arg) + os.Exit(1) + } + inFiles = append(inFiles, arg) + } + + process(cli.ImportCertificatesCommand(inFiles, conf)) +} + +func processValidateSignaturesCommand(conf *model.Configuration) { + if len(flag.Args()) > 1 || selectedPages != "" { + fmt.Fprintf(os.Stderr, "%s\n\n", usageSignaturesValidate) + os.Exit(1) + } + + inFile := flag.Arg(0) + + if conf.CheckFileNameExt { + ensurePDFExtension(inFile) + } + + process(cli.ValidateSignaturesCommand(inFile, all, full, conf)) +} diff --git a/cmd/pdfcpu/usage.go b/cmd/pdfcpu/usage.go index 8ed90ffd..3d0892a7 100644 --- a/cmd/pdfcpu/usage.go +++ b/cmd/pdfcpu/usage.go @@ -30,10 +30,11 @@ The commands are: booklet arrange pages onto larger sheets of paper to make a booklet or zine bookmarks list, import, export, remove bookmarks boxes list, add, remove page boundaries for selected pages + certificates list, inspect, import, reset certificates changeopw change owner password changeupw change user password collect create custom sequence of selected pages - config print configuration + config list, reset configuration create create PDF content including forms via JSON crop set cropbox for selected pages cut custom cut pages horizontally or vertically @@ -43,7 +44,7 @@ The commands are: fonts install, list supported fonts, create cheat sheets form list, remove fields, lock, unlock, reset, export, fill form via JSON or CSV grid rearrange pages or images for enhanced browsing experience - images list images for selected pages + images list, extract, update images import import/convert images to PDF info print file info keywords list, add, remove keywords @@ -62,6 +63,7 @@ The commands are: resize scale selected pages rotate rotate selected pages selectedpages print definition of the -pages flag + signatures validate signatures split split up a PDF by span or bookmark stamp add, remove, update Unicode text, image or PDF stamps for selected pages trim create trimmed version of selected pages @@ -81,6 +83,7 @@ Use "pdfcpu help [command]" for more information about a command.` common flags: -v(erbose) ... turn on logging -vv ... verbose logging -q(uiet) ... disable output + -o(ffline) ... disable http traffic -c(onf) ... set or disable config dir: $path|disable -opw ... owner password -upw ... user password @@ -89,20 +92,23 @@ common flags: -v(erbose) ... turn on logging cm ... centimetres mm ... millimetres` - usageValidate = "usage: pdfcpu validate [-m(ode) strict|relaxed] [-l(inks)] inFile..." + generalFlags + usageValidate = "usage: pdfcpu validate [-m(ode) strict|relaxed] [-l(inks) -opt(imize)] -- inFile..." + generalFlags usageLongValidate = `Check inFile for specification compliance. mode ... validation mode links ... check for broken links + optimize ... optimize resources (fonts, forms, images) inFile ... input PDF file The validation modes are: + strict ... validates against PDF 32000-1:2008 (PDF 1.7) and rudimentary against PDF 32000:2 (PDF 2.0) + relaxed ... (default) like strict but doesn't complain about common seen spec violations. - strict ... validates against PDF 32000-1:2008 (PDF 1.7) and rudimentary against PDF 32000:2 (PDF 2.0) -relaxed ... (default) like strict but doesn't complain about common seen spec violations.` +Validation turns off optimization unless in verbose mode. +You can enforce optimization using -opt=true.` - usageOptimize = "usage: pdfcpu optimize [-stats csvFile] inFile [outFile]" + generalFlags + usageOptimize = "usage: pdfcpu optimize [-stats csvFile] -- inFile [outFile]" + generalFlags usageLongOptimize = `Read inFile, remove redundant page resources like embedded fonts and images and write the result to outFile. stats ... appends a stats line to a csv file with information about the usage of root and page entries. @@ -110,7 +116,7 @@ relaxed ... (default) like strict but doesn't complain about common seen spec vi inFile ... input PDF file outFile ... output PDF file` - usageSplit = "usage: pdfcpu split [-m(ode) span|bookmark|page] inFile outDir [span|pageNr...]" + generalFlags + usageSplit = "usage: pdfcpu split [-m(ode) span|bookmark|page] -- inFile outDir [span|pageNr...]" + generalFlags usageLongSplit = `Generate a set of PDFs for the input file in outDir according to given span value or along bookmarks or page numbers. mode ... split mode (defaults to span) @@ -154,13 +160,14 @@ Eg. pdfcpu split test.pdf . (= pdfcpu split -m span test.pdf . 1) test_4-9.pdf test_10-20.pdf` - usageMerge = "usage: pdfcpu merge [-m(ode) create|append|zip] [ -s(ort) -b(ookmarks) -d(ivider)] outFile inFile..." + generalFlags + usageMerge = "usage: pdfcpu merge [-m(ode) create|append|zip] [ -s(ort) -b(ookmarks) -d(ivider) -opt(imize)] -- outFile inFile..." + generalFlags usageLongMerge = `Concatenate a sequence of PDFs/inFiles into outFile. mode ... merge mode (defaults to create) sort ... sort inFiles by file name bookmarks ... create bookmarks divider ... insert blank page between merged documents + optimize ... optimize before writing (default: true) outFile ... output PDF file inFile ... a list of PDF files subject to concatenation. @@ -173,7 +180,9 @@ The merge modes are: zip ... zip inFile1 and inFile2 into outFile (which will be created and possibly overwritten). -Skip bookmark creation like so: -bookmarks=false` +Skip bookmark creation: -b(ookmarks)=false + +Skip optimization before writing: -opt(imize)=false` usagePageSelection = `'-pages' selects pages for processing and is a comma separated list of expressions: @@ -195,7 +204,7 @@ Skip bookmark creation like so: -bookmarks=false` e.g. -3,5,7- or 4-7,!6 or 1-,!5 or odd,n1` - usageExtract = "usage: pdfcpu extract -m(ode) i(mage)|f(ont)|c(ontent)|p(age)|m(eta) [-p(ages) selectedPages] inFile outDir" + generalFlags + usageExtract = "usage: pdfcpu extract -m(ode) i(mage)|f(ont)|c(ontent)|p(age)|m(eta) [-p(ages) selectedPages] -- inFile outDir" + generalFlags usageLongExtract = `Export inFile's images, fonts, content or pages into outDir. mode ... extraction mode @@ -213,7 +222,7 @@ content ... extract raw page content ` - usageTrim = "usage: pdfcpu trim -p(ages) selectedPages inFile [outFile]" + generalFlags + usageTrim = "usage: pdfcpu trim -p(ages) selectedPages -- inFile [outFile]" + generalFlags usageLongTrim = `Generate a trimmed version of inFile for selected pages. pages ... Please refer to "pdfcpu selectedpages" @@ -265,8 +274,8 @@ content ... extract raw page content pdfcpu portfolio add test.pdf "test.mp3, Test sound file" "test.mkv, Test video file" ` - usagePermList = "pdfcpu permissions list [-upw userpw] [-opw ownerpw] inFile..." - usagePermSet = "pdfcpu permissions set [-perm none|print|all|max4Hex|max12Bits] [-upw userpw] -opw ownerpw inFile" + usagePermList = "pdfcpu permissions list [-upw userpw] [-opw ownerpw] -- inFile..." + usagePermSet = "pdfcpu permissions set [-perm none|print|all|max4Hex|max12Bits] [-upw userpw] -opw ownerpw -- inFile" usagePerm = "usage: " + usagePermList + "\n " + usagePermSet + generalFlags @@ -299,22 +308,24 @@ content ... extract raw page content 11: Assemble document (security handlers >= rev.3) 12: Print (security handlers >= rev.3)` - usageEncrypt = "usage: pdfcpu encrypt [-m(ode) rc4|aes] [-key 40|128|256] [-perm none|print|all] [-upw userpw] -opw ownerpw inFile [outFile]" + generalFlags + usageEncrypt = "usage: pdfcpu encrypt [-m(ode) rc4|aes] [-key 40|128|256] [-perm none|print|all] [-upw userpw] -opw ownerpw -- inFile [outFile]" + generalFlags usageLongEncrypt = `Setup password protection based on user and owner password. mode ... algorithm (default=aes) key ... key length in bits (default=256) perm ... user access permissions inFile ... input PDF file - outFile ... output PDF file` + outFile ... output PDF file + + PDF 2.0 files have to be encrypted using aes/256.` - usageDecrypt = "usage: pdfcpu decrypt [-upw userpw] [-opw ownerpw] inFile [outFile]" + generalFlags + usageDecrypt = "usage: pdfcpu decrypt [-upw userpw] [-opw ownerpw] -- inFile [outFile]" + generalFlags usageLongDecrypt = `Remove password protection and reset permissions. inFile ... input PDF file outFile ... output PDF file` - usageChangeUserPW = "usage: pdfcpu changeupw [-opw ownerpw] inFile upwOld upwNew" + generalFlags + usageChangeUserPW = "usage: pdfcpu changeupw [-opw ownerpw] -- inFile upwOld upwNew" + generalFlags usageLongChangeUserPW = `Change the user password also known as the open doc password. opw ... owner password, required unless = "" @@ -322,7 +333,7 @@ content ... extract raw page content upwOld ... old user password upwNew ... new user password` - usageChangeOwnerPW = "usage: pdfcpu changeopw [-upw userpw] inFile opwOld opwNew" + generalFlags + usageChangeOwnerPW = "usage: pdfcpu changeopw [-upw userpw] -- inFile opwOld opwNew" + generalFlags usageLongChangeOwnerPW = `Change the owner password also known as the set permissions password. upw ... user password, required unless = "" @@ -476,7 +487,7 @@ e.g. "pos:bl, off: 20 5" "rot:45" "op:0.5, scale:0.5 abs, rot: usageStampAdd = "pdfcpu stamp add [-p(ages) selectedPages] -m(ode) text|image|pdf -- string|file description inFile [outFile]" usageStampUpdate = "pdfcpu stamp update [-p(ages) selectedPages] -m(ode) text|image|pdf -- string|file description inFile [outFile]" - usageStampRemove = "pdfcpu stamp remove [-p(ages) selectedPages] inFile [outFile]" + usageStampRemove = "pdfcpu stamp remove [-p(ages) selectedPages] -- inFile [outFile]" usageStamp = "usage: " + usageStampAdd + "\n " + usageStampUpdate + @@ -499,7 +510,7 @@ description ... fontname, points, position, offset, scalefactor, aligntext, rota usageWatermarkAdd = "pdfcpu watermark add [-p(ages) selectedPages] -m(ode) text|image|pdf -- string|file description inFile [outFile]" usageWatermarkUpdate = "pdfcpu watermark update [-p(ages) selectedPages] -m(ode) text|image|pdf -- string|file description inFile [outFile]" - usageWatermarkRemove = "pdfcpu watermark remove [-p(ages) selectedPages] inFile [outFile]" + usageWatermarkRemove = "pdfcpu watermark remove [-p(ages) selectedPages] -- inFile [outFile]" usageWatermark = "usage: " + usageWatermarkAdd + "\n " + usageWatermarkUpdate + @@ -524,7 +535,7 @@ If outFile already exists the page sequence will be appended. Each imageFile will be rendered to a separate page. In its simplest form this converts an image into a PDF: "pdfcpu import img.pdf img.jpg" -description ... dimensions, format, position, offset, scale factor, boxes +description ... dimensions, formsize, position, offset, scale factor, boxes outFile ... output PDF file imageFile ... a list of image files @@ -532,7 +543,7 @@ description ... dimensions, format, position, offset, scale factor, boxes optional entries: - (defaults: "d:595 842, f:A4, pos:full, off:0 0, sc:0.5 rel, dpi:72, gray:off, sepia:off") + (defaults: "dim:595 842, f:A4, pos:full, off:0 0, sc:0.5 rel, dpi:72, gray:off, sepia:off") dimensions: (width height) in given display unit eg. '400 200' setting the media box @@ -560,19 +571,18 @@ description ... dimensions, format, position, offset, scale factor, boxes backgroundcolor: "bgcolor" is also accepted. - Only one of dimensions or format is allowed. + Only one of dimensions or formsize is allowed. position: full => image dimensions equal page dimensions. All configuration string parameters support completion. - e.g. "f:A5, pos:c" ... render the image centered on A5 with relative scaling 0.5.' - "d:300 600, pos:bl, off:20 20, sc:1.0 abs" ... render the image anchored to bottom left corner with offset 20,20 and abs. scaling 1.0. - "pos:full" ... render the image to a page with corresponding dimensions. - "f:A4, pos:c, dpi:300" ... render the image centered on A4 respecting a destination resolution of 300 dpi. - ` + e.g. "f:A5, pos:c" ... render the image centered on A5 with relative scaling 0.5.' + "dim:300 600, pos:bl, off:20 20, sc:1.0 abs" ... render the image anchored to bottom left corner with offset 20,20 and abs. scaling 1.0. + "pos:full" ... render the image to a page with corresponding dimensions. + "f:A4, pos:c, dpi:300" ... render the image centered on A4 respecting a destination resolution of 300 dpi.` - usagePagesInsert = "pdfcpu pages insert [-p(ages) selectedPages] [-m(ode) before|after] inFile [outFile]" - usagePagesRemove = "pdfcpu pages remove -p(ages) selectedPages inFile [outFile]" + usagePagesInsert = "pdfcpu pages insert [-p(ages) selectedPages] [-m(ode) before|after] -- [description] inFile [outFile]" + usagePagesRemove = "pdfcpu pages remove -p(ages) selectedPages -- inFile [outFile]" usagePages = "usage: " + usagePagesInsert + "\n " + usagePagesRemove + generalFlags @@ -580,12 +590,41 @@ description ... dimensions, format, position, offset, scale factor, boxes pages ... Please refer to "pdfcpu selectedpages" mode ... before, after (default: before) +description ... dimensions, formsize inFile ... input PDF file outFile ... output PDF file + is a comma separated configuration string containing: + + optional entries: + + (defaults: "dim:595 842, f:A4") + + dimensions: (width height) in given display unit eg. '400 200' setting the media box + + formsize: eg. A4, Letter, Legal... + Append 'L' to enforce landscape mode. (eg. A3L) + Append 'P' to enforce portrait mode. (eg. TabloidP) + Please refer to "pdfcpu paper" for a comprehensive list of defined paper sizes. + "papersize" is also accepted. + + All configuration string parameters support completion. + + Examples: pdfcpu pages insert in.pdf + Insert one blank page before each page using the form size imposed internally by the current media box. + + pdfcpu pages insert -pages 3 "f:A5L" in.pdf + Insert one blank A5 page in landscape mode before page 3. + + pdfcpu pages insert "dim: 10 5" -u cm in.pdf + Insert one blank 10 x 5 cm separator page for all pages. + + pdfcpu pages remove -p odd in.pdf out.pdf + pdfcpu pages remove -pages=odd in.pdf out.pdf + Remove all odd pages. ` - usageRotate = "usage: pdfcpu rotate [-p(ages) selectedPages] inFile rotation [outFile]" + generalFlags + usageRotate = "usage: pdfcpu rotate [-p(ages) selectedPages] -- inFile rotation [outFile]" + generalFlags usageLongRotate = `Rotate selected pages by a multiple of 90 degrees. pages ... Please refer to "pdfcpu selectedpages" @@ -601,7 +640,7 @@ This reduces the number of pages and therefore the required print time. If the input is one imageFile a single page n-up PDF gets generated. pages ... inFile only, please refer to "pdfcpu selectedpages" -description ... dimensions, format, orientation +description ... dimensions, formsize, orientation outFile ... output PDF file n ... the n-Up value (see below for details) inFile ... input PDF file @@ -620,23 +659,30 @@ description ... dimensions, format, orientation optional entries: - (defaults: "di:595 842, form:A4, or:rd, bo:on, ma:3") + (defaults: "di:595 842, form:A4, or:rd, bo:on, ma:3, enforce:on") dimensions: (width,height) in given display unit eg. '400 200' + formsize: The output sheet size, eg. A4, Letter, Legal... Append 'L' to enforce landscape mode. (eg. A3L) Append 'P' to enforce portrait mode. (eg. TabloidP) - Only one of dimensions or format is allowed. + Only one of dimensions or formsize is allowed. Please refer to "pdfcpu paper" for a comprehensive list of defined paper sizes. "papersize" is also accepted. + orientation: one of rd ... right down (=default) dr ... down right ld ... left down dl ... down left Orientation applies to PDF input files only. - border: Print border (on/off, true/false, t/f) + + enforce: enforce best-fit orientation of individual content (on/off, true/false, t/f). + + border: Print border (on/off, true/false, t/f) + margin: for n-up content: float >= 0 in given display unit - backgroundcolor: backgound color for margin > 0. + + backgroundcolor: background color for margin > 0. "bgcolor" is also accepted. All configuration string parameters support completion. @@ -651,10 +697,10 @@ Examples: pdfcpu nup out.pdf 4 in.pdf in.pdf's page size will be preserved. pdfcpu nup out.pdf 9 logo.jpg - Arrange instances of logo.jpg into a 3x3 grid and write result to out.pdf using the A4 default format. + Arrange instances of logo.jpg into a 3x3 grid and write result to out.pdf using the A4 default form size. pdfcpu nup -- "form:Tabloid" out.pdf 4 *.jpg - Rearrange all jpg files into 2x2 grids and write result to out.pdf using the Tabloid format + Rearrange all jpg files into 2x2 grids and write result to out.pdf using the Tabloid form size and the default orientation. ` @@ -679,11 +725,6 @@ n=2: This is the simplest case and the most common for those printing at home. Two of your pages fit on one side of a sheet (eg statement on letter, A5 on A4) Assemble by printing on both sides (odd pages on the front and even pages on the back) and folding down the middle. -A variant of n=2 is multifolio, a technique to bind your own hardback book. -This technique makes the most sense when your book has at least 128 pages. -For example, you can bind your paper in eight sheet folios (also known as signatures), with each folio containing 32 pages of your book. -For such a multi folio booklet set 'multifolio:on' and play around with 'foliosize' which defaults to 8. - n=4: Four of your pages fit on one side of a sheet (eg statement on ledger, A5 on A3, A6 on A4). When printing 4-up, your booklet can be bound either along the long-edge (for portrait this is the left side of the paper, for landscape the top) @@ -712,6 +753,14 @@ meaning that the pages are cut along the binding and not folded as in the other This results in a different page ordering on the sheet than the other methods. If you intend to perfect bind your booklet, use btype=perfectbound. +There is also an option to use signatures, a bookbinding method useful for books with higher page counts. +In this method of binding, you arrange your folios (sheets folded in half) in groups of 'foliosize'. +Each group is called a signature. You then stack the signatures together to form the book. +For example, you can bind your paper in groups of eight sheets (foliosize=8), so that each signature containing 32 pages of your book. +For such a multi folio booklet set 'multifolio:on' and 'foliosize', which defaults to 8. +The last signature may be shorter, e.g. for a booklet of 120 pages with signature size=16 (foliosize=4) will have 7 complete signatures and a final signature of only 8 pages. + + portrait landscape Possible values for n: 2 ... 1x2 -- 4 ... 2x2 2x2 @@ -726,7 +775,7 @@ use btype=perfectbound. formsize: The output sheet size, eg. A4, Letter, Legal... Append 'L' to enforce landscape mode. (eg. A3L) Append 'P' to enforce portrait mode. (eg. TabloidP) - Only one of dimensions or format is allowed. + Only one of dimensions or formsize is allowed. Please refer to "pdfcpu paper" for a comprehensive list of defined paper sizes. "papersize" is also accepted. btype: The method for arranging pages into a booklet. (booklet, bookletadvanced, perfectbound) @@ -736,7 +785,7 @@ use btype=perfectbound. border: Print border (on/off, true/false, t/f) guides: Print folding and cutting lines (on/off, true/false, t/f) margin: Apply content margin (float >= 0 in given display unit) - backgroundcolor: sheet backgound color for margin > 0. + backgroundcolor: sheet background color for margin > 0. "bgcolor" is also accepted. All configuration string parameters support completion. @@ -776,7 +825,7 @@ For image inputfiles each output page shows all images laid out onto grids of gi This command produces poster like PDF pages convenient for page and image browsing. pages ... Please refer to "pdfcpu selectedpages" -description ... dimensions, format, orientation +description ... dimensions, formsize, orientation, enforce outFile ... output PDF file m ... grid lines n ... grid columns @@ -787,21 +836,27 @@ description ... dimensions, format, orientation optional entries: - (defaults: "d:595 842, form:A4, o:rd, bo:on, ma:3") + (defaults: "d:595 842, form:A4, o:rd, bo:on, ma:3, enforce:on") dimensions: (width height) in given display unit eg. '400 200' + formsize: The output sheet size, eg. A4, Letter, Legal... Append 'L' to enforce landscape mode. (eg. A3L) Append 'P' to enforce portrait mode. (eg. TabloidP) - Only one of dimensions or format is allowed. + Only one of dimensions or formsize is allowed. Please refer to "pdfcpu paper" for a comprehensive list of defined paper sizes. "papersize" is also accepted. + orientation: one of rd ... right down (=default) dr ... down right ld ... left down dl ... down left Orientation applies to PDF input files only. - border: Print border (on/off, true/false, t/f) + + enforce: enforce best-fit orientation of individual content (on/off, true/false, t/f). + + border: Print border (on/off, true/false, t/f) + margin: Apply content margin (float >= 0 in given display unit) All configuration string parameters support completion. @@ -890,16 +945,14 @@ Examples: pdfcpu grid out.pdf 1 10 in.pdf usagePaper = "usage: pdfcpu paper" usageLongPaper = "Print a list of supported paper sizes." - usageConfig = "usage: pdfcpu config" - usageLongConfig = "Print configuration." - usageSelectedPages = "usage: pdfcpu selectedpages" usageLongSelectedPages = "Print definition of the -pages flag." - usageInfo = "usage: pdfcpu info [-p(ages) selectedPages] [-j(son)] inFile..." + generalFlags + usageInfo = "usage: pdfcpu info [-p(ages) selectedPages] [-fonts -j(son)] -- inFile..." + generalFlags usageLongInfo = `Print info about a PDF file. pages ... Please refer to "pdfcpu selectedpages" + fonts ... include font info json ... output JSON inFile ... a list of PDF input files` @@ -953,7 +1006,7 @@ nameValuePair ... 'name = value' remove all properties: pdfcpu properties remove test.pdf ` - usageCollect = "usage: pdfcpu collect -p(ages) selectedPages inFile [outFile]" + generalFlags + usageCollect = "usage: pdfcpu collect -p(ages) selectedPages -- inFile [outFile]" + generalFlags usageLongCollect = `Create custom sequence of selected pages. pages ... Please refer to "pdfcpu selectedpages" @@ -1052,8 +1105,8 @@ Examples: ` + usageBoxDescription - usageAnnotsList = "pdfcpu annotations list [-p(ages) selectedPages] inFile" - usageAnnotsRemove = "pdfcpu annotations remove [-p(ages) selectedPages] inFile [outFile] [objNr|annotId|annotType]..." + usageAnnotsList = "pdfcpu annotations list [-p(ages) selectedPages] -- inFile" + usageAnnotsRemove = "pdfcpu annotations remove [-p(ages) selectedPages] -- inFile [outFile] [objNr|annotId|annotType]..." usageAnnots = "usage: " + usageAnnotsList + "\n " + usageAnnotsRemove + generalFlags @@ -1094,16 +1147,48 @@ Examples: pdfcpu annot remove in.pdf out.pdf Link 30 Text someId ` - usageImagesList = "pdfcpu images list [-p(ages) selectedPages] inFile..." + generalFlags + usageImagesList = "pdfcpu images list [-p(ages) selectedPages] -- inFile..." + usageImagesExtract = "pdfcpu images extract [-p(ages) selectedPages] -- inFile outDir" + usageImagesUpdate = "pdfcpu images update inFile imageFile [outFile] [ objNr | (pageNr Id) ]" - usageImages = "usage: " + usageImagesList + usageImages = "usage: " + usageImagesList + + "\n " + usageImagesExtract + + "\n " + usageImagesUpdate + generalFlags - usageLongImages = `Manage keywords. + usageLongImages = `Manage images. pages ... Please refer to "pdfcpu selectedpages" inFile ... input PDF file + imageFile ... image file + outFile ... output PDF file + objNr ... obj# from "pdfcpu images list" + pageNr ... Page from "pdfcpu images list" + Id ... Id from "pdfcpu images list" - Example: pdfcpu images list -p "1-5" gallery.pdf + Example: pdfcpu images list gallery.pdf + gallery.pdf: + 1 images available (1.8 MB) + Page Obj# │ Id │ Type SoftMask ImgMask │ Width │ Height │ ColorSpace Comp bpc Interp │ Size │ Filters + ━━━━━━━━━━┿━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━┿━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━┿━━━━━━━━━━━━ + 1 3 │ Im0 │ image │ 1268 │ 720 │ DeviceRGB 3 8 * │ 1.8 MB │ FlateDecode + + # Extract all images into the current dir + pdfcpu images extract gallery.pdf . + extracting images from gallery.pdf into ./ ... + optimizing... + writing gallery_1_Im0.png + + # Update image with Id=Im0 on page=1 with gallery_1_Im0.png + pdfcpu images update gallery.pdf gallery_1_Im0.png + pdfcpu images update gallery.pdf gallery_1_Im0.png out.pdf + + # Update image object 3 with logo.png + pdfcpu images update gallery.pdf logo.png 3 + pdfcpu images update gallery.pdf logo.png out.pdf 3 + + # update image with Id=Im0 on page=1 with logo.jpg + pdfcpu images update gallery.pdf logo.jpg 1 Im0 + pdfcpu images update gallery.pdf logo.jpg out.pdf 1 Im0 ` usageCreate = "usage: pdfcpu create inFileJSON [inFile] outFile" + generalFlags @@ -1146,7 +1231,7 @@ For more info on json syntax & samples please refer to : usageFormReset = "pdfcpu form reset inFile [outFile] [fieldID|fieldName]..." usageFormExport = "pdfcpu form export inFile [outFileJSON]" usageFormFill = "pdfcpu form fill inFile inFileJSON [outFile]" - usageFormMultiFill = "pdfcpu form multifill [-m(ode) single|merge] inFile inFileData outDir [outName]" + usageFormMultiFill = "pdfcpu form multifill [-m(ode) single|merge] -- inFile inFileData outDir [outName]" usageForm = "usage: " + usageFormListFields + "\n " + usageFormRemoveFields + @@ -1326,11 +1411,11 @@ description ... scalefactor, dimensions, formsize, enforce, border, bgcolor Examples: pdfcpu poster "f:A4" in.pdf outDir - Page format is A2, the printer supports A4. + Page form size is A2, the printer supports A4. Generate a poster(A2) via a corresponding 2x2 grid of A4 pages. pdfcpu poster "f:A4, scale:2.0" in.pdf outDir - Page format is A2, the printer supports A4. + Page form size is A2, the printer supports A4. Generate a poster(A0) via a corresponding 4x4 grid of A4 pages. pdfcpu poster -u cm -- "dim:15 10, margin:1, bgcol:DarkGray, border:on" in.pdf outDir @@ -1370,15 +1455,15 @@ description ... scalefactor, dimensions, formsize, enforce, border, bgcolor Examples: pdfcpu ndown 2 in.pdf outDir - Page format is A2, the printer supports A3. + Page form size is A2, the printer supports A3. Quick cut page into 2 equally sized pages. pdfcpu ndown 4 in.pdf outDir - Page format is A2, the printer supports A4. + Page form size is A2, the printer supports A4. Quick cut page into 4 equally (A4) sized pages. pdfcpu ndown -u cm -- "margin:1, bgcol:DarkGray, border:on" 4 in.pdf outDir - Page format is A2, the printer supports A4. + Page format size is A2, the printer supports A4. Quick cut page into 4 equally (A4) sized pages and provide a glue area of 1 cm. See also the related commands: poster, cut` @@ -1429,7 +1514,7 @@ description ... scalefactor, dimensions, formsize, enforce, border, bgcolor See also the related commands: poster, ndown` usageBookmarksList = "pdfcpu bookmarks list inFile" - usageBookmarksImport = "pdfcpu bookmarks import [-r(eplace)] inFile inFileJSON [outFile]" + usageBookmarksImport = "pdfcpu bookmarks import [-r(eplace)] -- inFile inFileJSON [outFile]" usageBookmarksExport = "pdfcpu bookmarks export inFile [outFileJSON]" usageBookmarksRemove = "pdfcpu bookmarks remove inFile [outFile]" @@ -1499,9 +1584,9 @@ description ... scalefactor, dimensions, formsize, enforce, border, bgcolor pdfcpu pagemode reset test.pdf ` - usageViewerPreferencesList = "pdfcpu viewerpref list [-a(ll)] [-j(son)] inFile" - usageViewerPreferencesSet = "pdfcpu viewerpref set inFile (inFileJSON | JSONstring)" - usageViewerPreferencesReset = "pdfcpu viewerpref reset inFile" + usageViewerPreferencesList = "pdfcpu viewerpref list [-a(ll) -j(son)] -- inFile" + usageViewerPreferencesSet = "pdfcpu viewerpref set inFile (inFileJSON | JSONstring)" + usageViewerPreferencesReset = "pdfcpu viewerpref reset inFile" usageViewerPreferences = "usage: " + usageViewerPreferencesList + "\n " + usageViewerPreferencesSet + @@ -1576,7 +1661,7 @@ description ... scalefactor, dimensions, formsize, enforce, border, bgcolor set viewer preferences via JSON file: pdfcpu viewerpref set test.pdf viewerpref.json - and eg. viewerpref.json (each preferences is optional!): + and eg. viewerpref.json (each preference is optional!): { "viewerPreferences": { @@ -1628,4 +1713,48 @@ Examples: pdfcpu zoom -unit cm -- "vmargin: -1" in.pdf out.pdf ... zoom in to vertical margin of -1 cm pdfcpu zoom -unit cm -- "vmargin: 1, border:true, bgcolor:lightgray" in.pdf out.pdf ... zoom out to vertical margin of 1 cm ` + + usageConfigList = "pdfcpu config list" + usageConfigReset = "pdfcpu config reset" + + usageConfig = "usage: " + usageConfigList + + "\n " + usageConfigReset + generalFlags + + usageLongConfig = `Manage your pdfcpu configuration.` + + usageCertificatesList = "pdfcpu certificates list" + usageCertificatesInspect = "pdfcpu certificates inspect inFile" + usageCertificatesImport = "pdfcpu certificates import inFile.." + usageCertificatesReset = "pdfcpu certificates reset" + + usageCertificates = "usage: " + usageCertificatesList + + "\n " + usageCertificatesInspect + + "\n " + usageCertificatesImport + + "\n " + usageCertificatesReset + generalFlags + + usageLongCertificates = `Manage certificates. + + inFile ... .pem, .p7c, .cer, .crt file + inFileJSON ... input JSON file + outFile ... output PDF file + outFileJSON ... output PDF file + + pdfcpu comes preloaded with certificates approved by the EU Trusted Lists. + + Please import any missing certificates. +` + + usageSignaturesValidate = "pdfcpu signatures validate [-a(ll) -f(ull)] -- inFile" + usageSignatures = "usage: " + usageSignaturesValidate + generalFlags + + usageLongSignatures = `Manage digital signatures. + + all ... validate all signatures (authoritative/certified, cosigners, usage rights, digital timestamps) + full ... comprehensive output including certificate chains, revocation status and any problems encountered. + inFile ... input PDF file + + Related configuration parameters: timeoutCRL, + timeoutOCSP, + preferredCertRevocationChecker +` ) diff --git a/go.mod b/go.mod index b46c24ad..6aa7a103 100644 --- a/go.mod +++ b/go.mod @@ -1,15 +1,19 @@ module github.com/angel-one/pdfcpu -go 1.20 +go 1.24.0 + +toolchain go1.24.2 require ( github.com/hhrutter/lzw v1.0.0 - github.com/hhrutter/tiff v1.0.1 - github.com/mattn/go-runewidth v0.0.15 + github.com/hhrutter/pkcs7 v0.2.0 + github.com/hhrutter/tiff v1.0.2 + github.com/mattn/go-runewidth v0.0.16 github.com/pkg/errors v0.9.1 - golang.org/x/image v0.12.0 - golang.org/x/text v0.13.0 + golang.org/x/crypto v0.42.0 + golang.org/x/image v0.31.0 + golang.org/x/text v0.29.0 gopkg.in/yaml.v2 v2.4.0 ) -require github.com/rivo/uniseg v0.4.4 // indirect +require github.com/rivo/uniseg v0.4.7 // indirect diff --git a/go.sum b/go.sum index 5d3f2e73..0b3a109e 100644 --- a/go.sum +++ b/go.sum @@ -1,48 +1,22 @@ github.com/hhrutter/lzw v1.0.0 h1:laL89Llp86W3rRs83LvKbwYRx6INE8gDn0XNb1oXtm0= github.com/hhrutter/lzw v1.0.0/go.mod h1:2HC6DJSn/n6iAZfgM3Pg+cP1KxeWc3ezG8bBqW5+WEo= -github.com/hhrutter/tiff v1.0.1 h1:MIus8caHU5U6823gx7C6jrfoEvfSTGtEFRiM8/LOzC0= -github.com/hhrutter/tiff v1.0.1/go.mod h1:zU/dNgDm0cMIa8y8YwcYBeuEEveI4B0owqHyiPpJPHc= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/hhrutter/pkcs7 v0.2.0 h1:i4HN2XMbGQpZRnKBLsUwO3dSckzgX142TNqY/KfXg+I= +github.com/hhrutter/pkcs7 v0.2.0/go.mod h1:aEzKz0+ZAlz7YaEMY47jDHL14hVWD6iXt0AgqgAvWgE= +github.com/hhrutter/tiff v1.0.2 h1:7H3FQQpKu/i5WaSChoD1nnJbGx4MxU5TlNqqpxw55z8= +github.com/hhrutter/tiff v1.0.2/go.mod h1:pcOeuK5loFUE7Y/WnzGw20YxUdnqjY1P0Jlcieb/cCw= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= -github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/image v0.12.0 h1:w13vZbU4o5rKOFFR8y7M+c4A5jXDC0uXTdHYRP8X2DQ= -golang.org/x/image v0.12.0/go.mod h1:Lu90jvHG7GfemOIcldsh9A2hS01ocl6oNO7ype5mEnk= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= -golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= +golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= +golang.org/x/image v0.31.0 h1:mLChjE2MV6g1S7oqbXC0/UcKijjm5fnJLUYKIYrLESA= +golang.org/x/image v0.31.0/go.mod h1:R9ec5Lcp96v9FTF+ajwaH3uGxPH4fKfHHAVbUILxghA= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= diff --git a/internal/corefont/metrics/gen.go b/internal/corefont/metrics/gen.go index 45163b9b..93f06379 100644 --- a/internal/corefont/metrics/gen.go +++ b/internal/corefont/metrics/gen.go @@ -186,7 +186,7 @@ const header = `// generated by "go run gen.go". DO NOT EDIT. package metrics import ( - "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" ) ` diff --git a/offline_coverage.sh b/offline_coverage.sh new file mode 100755 index 00000000..e7aab18b --- /dev/null +++ b/offline_coverage.sh @@ -0,0 +1,48 @@ +#!/bin/sh + +# Copyright 2018 The pdfcpu Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +rm c.out + +set -e + +echo mode: set > c.out + +function internalDeps { + + for p in $(go list -f '{{.Deps}}' $1) + do + if [[ $p == github.com/pdfcpu/pdfcpu* ]]; then + idep=$idep,$p + fi + done +} + +echo collecting coverage ... + +for q in $(go list ./...) +do + #echo collecting coverage for $q + idep=$q + internalDeps $idep + if [[ $q == */test ]]; then + idep=${idep%/test} + fi + GITHUB_ACTIONS="true" go test -coverprofile=c1.out -coverpkg=$idep $q && tail -n +2 c1.out >> c.out +done + +rm c1.out + +go tool cover -html=c.out \ No newline at end of file diff --git a/pkg/api/api.go b/pkg/api/api.go index 1bb50aaa..150cdbf2 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -51,10 +51,10 @@ func logDisclaimerPDF20() { ***************************** Disclaimer **************************** * PDF 2.0 features are supported on a need basis. * * (See ISO 32000:2 6.3.2 Conformance of PDF processors) * -* At the moment pdfcpu comes with basic PDF 2.0 support. * +* At the moment pdfcpu ships with basic PDF 2.0 support. * * Please let us know which feature you would like to see supported, * * provide a sample PDF file and create an issue: * -* https://github.com/pdfcpu/pdfcpu/issues/new/choose * +* https://github.com/angel-one/pdfcpu/issues/new/choose * * Thank you for using pdfcpu <3 * *********************************************************************` @@ -87,11 +87,15 @@ func ReadContextFile(inFile string) (*model.Context, error) { return nil, err } - if ctx.Version() == model.V20 { + if ctx.Conf.Version != model.VersionStr { + model.CheckConfigVersion(ctx.Conf.Version) + } + + if ctx.XRefTable.Version() == model.V20 { logDisclaimerPDF20() } - if err = validate.XRefTable(ctx.XRefTable); err != nil { + if err = validate.XRefTable(ctx); err != nil { return nil, err } @@ -100,10 +104,10 @@ func ReadContextFile(inFile string) (*model.Context, error) { // ValidateContext validates ctx. func ValidateContext(ctx *model.Context) error { - if ctx.Version() == model.V20 { + if ctx.XRefTable.Version() == model.V20 { logDisclaimerPDF20() } - return validate.XRefTable(ctx.XRefTable) + return validate.XRefTable(ctx) } // OptimizeContext optimizes ctx. @@ -122,7 +126,7 @@ func WriteContext(ctx *model.Context, w io.Writer) error { } ctx.Write.Writer = bufio.NewWriter(w) defer ctx.Write.Flush() - return pdfcpu.Write(ctx) + return pdfcpu.WriteContext(ctx) } // WriteIncrement writes a PDF increment for ctx to w. @@ -155,14 +159,39 @@ func ReadAndValidate(rs io.ReadSeeker, conf *model.Configuration) (ctx *model.Co return ctx, nil } -// ReadValidateAndOptimize returns an optimized model.Context of rs ready for processing. +func cmdAssumingOptimization(cmd model.CommandMode) bool { + return cmd == model.OPTIMIZE || + cmd == model.FILLFORMFIELDS || + cmd == model.RESETFORMFIELDS || + cmd == model.LISTIMAGES || + cmd == model.UPDATEIMAGES || + cmd == model.EXTRACTIMAGES || + cmd == model.EXTRACTFONTS +} + +// ReadValidateAndOptimize returns an optimized model.Context of rs ready for processing a specific command. +// conf.Cmd is expected to be configured properly. func ReadValidateAndOptimize(rs io.ReadSeeker, conf *model.Configuration) (ctx *model.Context, err error) { + if conf == nil { + return nil, errors.New("pdfcpu: ReadValidateAndOptimize: missing conf") + } + ctx, err = ReadAndValidate(rs, conf) if err != nil { return nil, err } - if err = OptimizeContext(ctx); err != nil { + // With the exception of commands utilizing structs provided the Optimize step + // command optimization of the cross reference table is optional but usually recommended. + // For large or complex files it may make sense to skip optimization and set conf.Optimize = false. + if cmdAssumingOptimization(conf.Cmd) || conf.Optimize { + if err = OptimizeContext(ctx); err != nil { + return nil, err + } + } + + // TODO move to form related commands. + if err := pdfcpu.CacheFormFonts(ctx); err != nil { return nil, err } @@ -180,17 +209,17 @@ func Write(ctx *model.Context, w io.Writer, conf *model.Configuration) error { log.Stats.Printf("XRefTable:\n%s\n", ctx) } - if conf.PostProcessValidate { - if err := ValidateContext(ctx); err != nil { - return err - } - } + // Note side effects of validation before writing! + // if conf.PostProcessValidate { + // if err := ValidateContext(ctx); err != nil { + // return err + // } + // } return WriteContext(ctx, w) } func WriteIncr(ctx *model.Context, rws io.ReadWriteSeeker, conf *model.Configuration) error { - if log.StatsEnabled() { log.Stats.Printf("XRefTable:\n%s\n", ctx) } @@ -212,7 +241,7 @@ func WriteIncr(ctx *model.Context, rws io.ReadWriteSeeker, conf *model.Configura // If path/pdfcpu is not existent, it will be created including config.yml func EnsureDefaultConfigAt(path string) error { // Call if you have specific requirements regarding the location of the pdfcpu config dir. - return model.EnsureDefaultConfigAt(path) + return model.EnsureDefaultConfigAt(path, false) } var ( diff --git a/pkg/api/attach.go b/pkg/api/attach.go index db812573..11ed006d 100644 --- a/pkg/api/attach.go +++ b/pkg/api/attach.go @@ -107,7 +107,7 @@ func AddAttachments(rs io.ReadSeeker, w io.Writer, files []string, coll bool, co return errors.New("pdfcpu: AddAttachments: No attachment added") } - return WriteContext(ctx, w) + return Write(ctx, w, conf) } // AddAttachmentsFile embeds files into a PDF context read from inFile and writes the result to outFile. @@ -176,7 +176,7 @@ func RemoveAttachments(rs io.ReadSeeker, w io.Writer, files []string, conf *mode return errors.New("pdfcpu: RemoveAttachments: No attachment removed") } - return WriteContext(ctx, w) + return Write(ctx, w, conf) } // RemoveAttachmentsFile deletes embedded files from a PDF context read from inFile and writes the result to outFile. @@ -236,6 +236,36 @@ func ExtractAttachmentsRaw(rs io.ReadSeeker, outDir string, fileNames []string, return ctx.ExtractAttachments(fileNames) } +func SanitizePath(path string) string { + + // Do not process "'" and "..". + + if path == "" || path == "." || path == ".." { + return "attachment" + } + + path = strings.TrimPrefix(path, string(filepath.Separator)) + + parts := strings.Split(path, string(filepath.Separator)) + + cleanParts := []string{} + for i := 0; i < len(parts); i++ { + if parts[i] != "" && parts[i] != "." && parts[i] != ".." { + cleanParts = append(cleanParts, parts[i]) + continue + } + if i == len(parts)-1 { + cleanParts = append(cleanParts, "attachment") + } + } + + if len(cleanParts) == 0 { + return "attachment" + } + + return filepath.Join(cleanParts...) +} + // ExtractAttachments extracts embedded files from a PDF context read from rs into outDir. func ExtractAttachments(rs io.ReadSeeker, outDir string, fileNames []string, conf *model.Configuration) error { aa, err := ExtractAttachmentsRaw(rs, outDir, fileNames, conf) @@ -244,12 +274,19 @@ func ExtractAttachments(rs io.ReadSeeker, outDir string, fileNames []string, con } for _, a := range aa { - fileName := filepath.Join(outDir, a.FileName) - logWritingTo(fileName) + + fn := SanitizePath(a.FileName) + fileName := filepath.Join(outDir, fn) + f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) if err != nil { - return err + fileName = filepath.Base(a.FileName) + f, err = os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) + if err != nil { + return err + } } + logWritingTo(fileName) if _, err = io.Copy(f, a); err != nil { return err } diff --git a/pkg/api/certificate.go b/pkg/api/certificate.go new file mode 100644 index 00000000..c5c79c75 --- /dev/null +++ b/pkg/api/certificate.go @@ -0,0 +1,103 @@ +/* + Copyright 2025 The pdfcpu Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package api + +import ( + "crypto/x509" + "fmt" + "os" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" +) + +func LoadCertificates() (int, error) { + + if model.UserCertPool != nil { + return 0, nil + } + + // if log.CLIEnabled() { + // log.CLI.Printf("certDir: %s\n", model.CertDir) + // } + + if err := os.MkdirAll(model.CertDir, os.ModePerm); err != nil { + return 0, err + } + + rootCAs := x509.NewCertPool() + + n, err := pdfcpu.LoadCertificatesToCertPool(model.CertDir, rootCAs) + if err != nil { + return 0, err + } + + model.UserCertPool = rootCAs + + return n, nil +} + +// ImportCertificates validates and imports found certificate files to pdfcpu config dir. +func ImportCertificates(inFiles []string) ([]string, error) { + count := 0 + overwrite := true + ss := []string{} + for _, inFile := range inFiles { + n, ok, err := pdfcpu.ImportCertificate(inFile, overwrite) + if err != nil { + return nil, err + } + if !ok { + ss = append(ss, fmt.Sprintf("%s skipped (already imported)", inFile)) + continue + } + ss = append(ss, fmt.Sprintf("%s: %d certificates", inFile, n)) + count += n + } + + ss = append(ss, fmt.Sprintf("imported %d certificates", count)) + return ss, nil +} + +func InspectCertificates(inFiles []string) ([]string, error) { + count := 0 + ss := []string{} + + for _, inFile := range inFiles { + + certs, err := pdfcpu.LoadCertificates(inFile) + if err != nil { + return nil, err + } + + ss = append(ss, fmt.Sprintf("%s: %d certificates\n", inFile, len(certs))) + + for i, cert := range certs { + s, err := pdfcpu.InspectCertificate(cert) + if err != nil { + return nil, err + } + ss = append(ss, fmt.Sprintf("%d:", i+1)) + ss = append(ss, s) + count++ + } + + } + + ss = append(ss, fmt.Sprintf("inspected %d certificates", count)) + return ss, nil +} diff --git a/pkg/api/cut.go b/pkg/api/cut.go index bb7217a1..a6c5ffec 100644 --- a/pkg/api/cut.go +++ b/pkg/api/cut.go @@ -59,10 +59,6 @@ func Poster(rs io.ReadSeeker, outDir, fileName string, selectedPages []string, c return errors.Errorf("pdfcpu: invalid scale factor %.2f: i >= 1.0\n", cut.Scale) } - if rs == nil { - return errors.New("pdfcpu poster: Please provide rs") - } - if conf == nil { conf = model.NewDefaultConfiguration() } @@ -78,16 +74,16 @@ func Poster(rs io.ReadSeeker, outDir, fileName string, selectedPages []string, c return nil } - for i, v := range pages { + for pageNr, v := range pages { if !v { continue } - ctxDest, err := pdfcpu.PosterPage(ctxSrc, i, cut) + ctxDest, err := pdfcpu.PosterPage(ctxSrc, pageNr, cut) if err != nil { return err } - outFile := filepath.Join(outDir, fmt.Sprintf("%s_page_%d.pdf", fileName, i)) + outFile := filepath.Join(outDir, fmt.Sprintf("%s_page_%d.pdf", fileName, pageNr)) logWritingTo(outFile) if conf.PostProcessValidate { @@ -144,11 +140,11 @@ func NDown(rs io.ReadSeeker, outDir, fileName string, selectedPages []string, n return nil } - for i, v := range pages { + for pageNr, v := range pages { if !v { continue } - ctxDest, err := pdfcpu.NDownPage(ctxSrc, i, n, cut) + ctxDest, err := pdfcpu.NDownPage(ctxSrc, pageNr, n, cut) if err != nil { return err } @@ -159,7 +155,7 @@ func NDown(rs io.ReadSeeker, outDir, fileName string, selectedPages []string, n } } - outFile := filepath.Join(outDir, fmt.Sprintf("%s_page_%d.pdf", fileName, i)) + outFile := filepath.Join(outDir, fmt.Sprintf("%s_page_%d.pdf", fileName, pageNr)) if log.CLIEnabled() { log.CLI.Printf("writing %s\n", outFile) } @@ -229,10 +225,6 @@ func Cut(rs io.ReadSeeker, outDir, fileName string, selectedPages []string, cut return err } - if rs == nil { - return errors.New("pdfcpu cut: Please provide rs") - } - if conf == nil { conf = model.NewDefaultConfiguration() } @@ -248,11 +240,11 @@ func Cut(rs io.ReadSeeker, outDir, fileName string, selectedPages []string, cut return nil } - for i, v := range pages { + for pageNr, v := range pages { if !v { continue } - ctxDest, err := pdfcpu.CutPage(ctxSrc, i, cut) + ctxDest, err := pdfcpu.CutPage(ctxSrc, pageNr, cut) if err != nil { return err } @@ -263,7 +255,7 @@ func Cut(rs io.ReadSeeker, outDir, fileName string, selectedPages []string, cut } } - outFile := filepath.Join(outDir, fmt.Sprintf("%s_page_%d.pdf", fileName, i)) + outFile := filepath.Join(outDir, fmt.Sprintf("%s_page_%d.pdf", fileName, pageNr)) logWritingTo(outFile) if err := WriteContextFile(ctxDest, outFile); err != nil { diff --git a/pkg/api/example_test.go b/pkg/api/example_test.go index 1e3e4a05..474f6d6a 100644 --- a/pkg/api/example_test.go +++ b/pkg/api/example_test.go @@ -65,7 +65,7 @@ func ExampleSplitFile() { // Create dual page PDFs for in.pdf in outDir using the default configuration. SplitFile("in.pdf", "outDir", 2, nil) - // Create a sequence of PDFs representing bookmark secions. + // Create a sequence of PDFs representing bookmark sections. SplitFile("in.pdf", "outDir", 0, nil) } @@ -98,10 +98,10 @@ func ExampleMergeAppendFile() { func ExampleInsertPagesFile() { // Insert a blank page into in.pdf before page #3. - InsertPagesFile("in.pdf", "", []string{"3"}, true, nil) + InsertPagesFile("in.pdf", "", []string{"3"}, true, nil, nil) // Insert a blank page into in.pdf after every page. - InsertPagesFile("in.pdf", "", nil, false, nil) + InsertPagesFile("in.pdf", "", nil, false, nil, nil) } func ExampleRemovePagesFile() { diff --git a/pkg/api/extract.go b/pkg/api/extract.go index c0495bf5..868441e1 100644 --- a/pkg/api/extract.go +++ b/pkg/api/extract.go @@ -17,6 +17,7 @@ package api import ( + "bytes" "fmt" "io" "os" @@ -28,6 +29,7 @@ import ( "github.com/angel-one/pdfcpu/pkg/log" "github.com/angel-one/pdfcpu/pkg/pdfcpu" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" ) @@ -174,11 +176,13 @@ func ExtractFonts(rs io.ReadSeeker, outDir, fileName string, selectedPages []str fileName = strings.TrimSuffix(filepath.Base(fileName), ".pdf") + objNrs, skipped := types.IntSet{}, types.IntSet{} + for i, v := range pages { if !v { continue } - ff, err := pdfcpu.ExtractPageFonts(ctx, i) + ff, err := pdfcpu.ExtractPageFonts(ctx, i, objNrs, skipped) if err != nil { return err } @@ -210,6 +214,35 @@ func ExtractFontsFile(inFile, outDir string, selectedPages []string, conf *model return ExtractFonts(f, outDir, filepath.Base(inFile), selectedPages, conf) } +// WritePage consumes an io.Reader containing some PDF bytes and writes to outDir/fileName. +func WritePage(r io.Reader, outDir, fileName string, pageNr int) error { + outFile := filepath.Join(outDir, fmt.Sprintf("%s_page_%d.pdf", fileName, pageNr)) + logWritingTo(outFile) + w, err := os.Create(outFile) + if err != nil { + return err + } + if _, err = io.Copy(w, r); err != nil { + return err + } + return w.Close() +} + +// ExtractPage extracts the page with pageNr out of ctx into an io.Reader. +func ExtractPage(ctx *model.Context, pageNr int) (io.Reader, error) { + ctxNew, err := pdfcpu.ExtractPages(ctx, []int{pageNr}, false) + if err != nil { + return nil, err + } + + var b bytes.Buffer + if err := WriteContext(ctxNew, &b); err != nil { + return nil, err + } + + return &b, nil +} + // ExtractPages generates single page PDF files from rs in outDir for selected pages. func ExtractPages(rs io.ReadSeeker, outDir, fileName string, selectedPages []string, conf *model.Configuration) error { if rs == nil { @@ -218,8 +251,8 @@ func ExtractPages(rs io.ReadSeeker, outDir, fileName string, selectedPages []str if conf == nil { conf = model.NewDefaultConfiguration() - conf.Cmd = model.EXTRACTPAGES } + conf.Cmd = model.EXTRACTPAGES ctx, err := ReadValidateAndOptimize(rs, conf) if err != nil { @@ -240,17 +273,12 @@ func ExtractPages(rs io.ReadSeeker, outDir, fileName string, selectedPages []str fileName = strings.TrimSuffix(filepath.Base(fileName), ".pdf") - for i, v := range pages { - if !v { - continue - } - ctxNew, err := pdfcpu.ExtractPage(ctx, i) + for _, i := range sortedPages(pages) { + r, err := ExtractPage(ctx, i) if err != nil { return err } - outFile := filepath.Join(outDir, fmt.Sprintf("%s_page_%d.pdf", fileName, i)) - logWritingTo(outFile) - if err := WriteContextFile(ctxNew, outFile); err != nil { + if err := WritePage(r, outDir, fileName, i); err != nil { return err } } diff --git a/pkg/api/form.go b/pkg/api/form.go index e157f579..06d9f678 100644 --- a/pkg/api/form.go +++ b/pkg/api/form.go @@ -24,6 +24,7 @@ import ( "io" "os" "path/filepath" + "strconv" "strings" "github.com/angel-one/pdfcpu/pkg/log" @@ -426,7 +427,11 @@ func validateComboBoxValues(f form.Form) error { } if len(cb.Options) > 0 { if !types.MemberOf(cb.Value, cb.Options) { - return errors.Errorf("pdfcpu: fill field name: \"%s\" unknown value: \"%s\" - options: %v\n", cb.Name, cb.Value, cb.Options) + i, err := strconv.Atoi(cb.Value) + if err == nil && i < len(cb.Options) { + return nil + } + return errors.Errorf("pdfcpu: fill field name: \"%s\" unknown value: \"%s\" - options: [%v]\n", cb.Name, cb.Value, strings.Join(cb.Options, ", ")) } } } @@ -441,7 +446,11 @@ func validateListBoxValues(f form.Form) error { if len(lb.Options) > 0 { for _, v := range lb.Values { if !types.MemberOf(v, lb.Options) { - return errors.Errorf("pdfcpu: fill field name: \"%s\" unknown value: \"%s\" - options: %v\n", lb.Name, v, lb.Options) + i, err := strconv.Atoi(v) + if err == nil && i < len(lb.Options) { + return nil + } + return errors.Errorf("pdfcpu: fill field name: \"%s\" unknown value: \"%s\" - options: [%v]\n", lb.Name, v, strings.Join(lb.Options, ", ")) } } } @@ -456,7 +465,11 @@ func validateRadioButtonGroupValues(f form.Form) error { } if len(rbg.Options) > 0 { if !types.MemberOf(rbg.Value, rbg.Options) { - return errors.Errorf("pdfcpu: fill field name: \"%s\" unknown value: \"%s\" - options: %v\n", rbg.Name, rbg.Value, rbg.Options) + i, err := strconv.Atoi(rbg.Value) + if err == nil && i < len(rbg.Options) { + return nil + } + return errors.Errorf("pdfcpu: fill field name: \"%s\" unknown value: \"%s\" - options: [%v]\n", rbg.Name, rbg.Value, strings.Join(rbg.Options, ", ")) } } } @@ -507,6 +520,7 @@ func FillForm(rs io.ReadSeeker, rd io.Reader, w io.Writer, conf *model.Configura return err } + // TODO not necessarily so ctx.RemoveSignature() var buf bytes.Buffer diff --git a/pkg/api/image.go b/pkg/api/image.go index 1d3a90fd..cbb8133c 100644 --- a/pkg/api/image.go +++ b/pkg/api/image.go @@ -18,6 +18,10 @@ package api import ( "io" + "os" + "path/filepath" + "strconv" + "strings" "github.com/angel-one/pdfcpu/pkg/pdfcpu" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" @@ -49,3 +53,121 @@ func Images(rs io.ReadSeeker, selectedPages []string, conf *model.Configuration) return ii, err } + +// UpdateImages replaces the XObject identified by objNr or (pageNr and resourceId). +func UpdateImages(rs io.ReadSeeker, rd io.Reader, w io.Writer, objNr, pageNr int, id string, conf *model.Configuration) error { + + if rs == nil { + return errors.New("pdfcpu: UpdateImages: missing rs") + } + + if conf == nil { + conf = model.NewDefaultConfiguration() + } + conf.Cmd = model.UPDATEIMAGES + + ctx, err := ReadValidateAndOptimize(rs, conf) + if err != nil { + return err + } + + if objNr > 0 { + if err := pdfcpu.UpdateImagesByObjNr(ctx, rd, objNr); err != nil { + return err + } + + return Write(ctx, w, conf) + } + + if pageNr == 0 || id == "" { + return errors.New("pdfcpu: UpdateImages: missing pageNr or id ") + } + + if err := pdfcpu.UpdateImagesByPageNrAndId(ctx, rd, pageNr, id); err != nil { + return err + } + + return Write(ctx, w, conf) +} + +func ensurePageNrAndId(pageNr *int, id *string, imageFile string) (err error) { + // If objNr and pageNr and id are not set, we assume an image filename produced by "pdfcpu image list" and parse this info. + // eg. mountain_1_Im0.png => pageNr:1, id:Im0 + + if *pageNr > 0 && *id != "" { + return nil + } + + s := strings.TrimSuffix(imageFile, filepath.Ext(imageFile)) + + ss := strings.Split(s, "_") + + if len(ss) < 3 { + return errors.Errorf("pdfcpu: invalid image filename:%s - must conform to output filename of \"pdfcpu extract\"", imageFile) + } + + *id = ss[len(ss)-1] + + *pageNr, err = strconv.Atoi(ss[len(ss)-2]) + if err != nil { + return err + } + + return nil +} + +// UpdateImagesFile replaces the XObject identified by objNr or (pageNr and resourceId). +func UpdateImagesFile(inFile, imageFile, outFile string, objNr, pageNr int, id string, conf *model.Configuration) (err error) { + + if objNr < 1 { + if err = ensurePageNrAndId(&pageNr, &id, imageFile); err != nil { + return err + } + } + + var f0, f1, f2 *os.File + + if f0, err = os.Open(inFile); err != nil { + return err + } + + if f1, err = os.Open(imageFile); err != nil { + return err + } + + tmpFile := inFile + ".tmp" + if outFile != "" && inFile != outFile { + tmpFile = outFile + logWritingTo(outFile) + } else { + logWritingTo(inFile) + } + if f2, err = os.Create(tmpFile); err != nil { + f1.Close() + return err + } + + defer func() { + if err != nil { + f2.Close() + f1.Close() + f0.Close() + os.Remove(tmpFile) + return + } + if err = f2.Close(); err != nil { + return + } + if err = f1.Close(); err != nil { + return + } + if err = f0.Close(); err != nil { + return + } + if outFile == "" || inFile == outFile { + err = os.Rename(tmpFile, inFile) + } + }() + + return UpdateImages(f0, f1, f2, objNr, pageNr, id, conf) +} diff --git a/pkg/api/importImage.go b/pkg/api/importImage.go index c6cabfdb..ff830cd3 100644 --- a/pkg/api/importImage.go +++ b/pkg/api/importImage.go @@ -63,7 +63,7 @@ func ImportImages(rs io.ReadSeeker, w io.Writer, imgs []io.Reader, imp *pdfcpu.I return err } - // This is the page tree root. + // Page tree root. pagesDict, err := ctx.DereferenceDict(*pagesIndRef) if err != nil { return err @@ -71,20 +71,20 @@ func ImportImages(rs io.ReadSeeker, w io.Writer, imgs []io.Reader, imp *pdfcpu.I for _, r := range imgs { - indRef, err := pdfcpu.NewPageForImage(ctx.XRefTable, r, pagesIndRef, imp) + indRefs, err := pdfcpu.NewPagesForImage(ctx.XRefTable, r, pagesIndRef, imp) if err != nil { return err } - if err := ctx.SetValid(*indRef); err != nil { - return err - } - - if err = model.AppendPageTree(indRef, 1, pagesDict); err != nil { - return err + for _, indRef := range indRefs { + if err := ctx.SetValid(*indRef); err != nil { + return err + } + if err = model.AppendPageTree(indRef, 1, pagesDict); err != nil { + return err + } + ctx.PageCount++ } - - ctx.PageCount++ } return Write(ctx, w, conf) diff --git a/pkg/api/info.go b/pkg/api/info.go index 3524580c..1488dd79 100644 --- a/pkg/api/info.go +++ b/pkg/api/info.go @@ -25,7 +25,7 @@ import ( ) // PDFInfo returns information about rs. -func PDFInfo(rs io.ReadSeeker, fileName string, selectedPages []string, conf *model.Configuration) (*pdfcpu.PDFInfo, error) { +func PDFInfo(rs io.ReadSeeker, fileName string, selectedPages []string, fonts bool, conf *model.Configuration) (*pdfcpu.PDFInfo, error) { if rs == nil { return nil, errors.New("pdfcpu: PDFInfo: missing rs") } @@ -42,6 +42,12 @@ func PDFInfo(rs io.ReadSeeker, fileName string, selectedPages []string, conf *mo return nil, err } + if fonts { + if err = OptimizeContext(ctx); err != nil { + return nil, err + } + } + pages, err := PagesForPageSelection(ctx.PageCount, selectedPages, false, true) if err != nil { return nil, err @@ -51,5 +57,5 @@ func PDFInfo(rs io.ReadSeeker, fileName string, selectedPages []string, conf *mo return nil, err } - return pdfcpu.Info(ctx, fileName, pages) + return pdfcpu.Info(ctx, fileName, pages, fonts) } diff --git a/pkg/api/keyword.go b/pkg/api/keyword.go index 25bb5a2e..f9e2e6e4 100644 --- a/pkg/api/keyword.go +++ b/pkg/api/keyword.go @@ -43,7 +43,7 @@ func Keywords(rs io.ReadSeeker, conf *model.Configuration) ([]string, error) { return nil, err } - return pdfcpu.KeywordsList(ctx.XRefTable) + return pdfcpu.KeywordsList(ctx) } // AddKeywords adds keywords to rs's infodict and writes the result to w. @@ -64,7 +64,7 @@ func AddKeywords(rs io.ReadSeeker, w io.Writer, files []string, conf *model.Conf return err } - if err = pdfcpu.KeywordsAdd(ctx.XRefTable, files); err != nil { + if err = pdfcpu.KeywordsAdd(ctx, files); err != nil { return err } @@ -128,7 +128,7 @@ func RemoveKeywords(rs io.ReadSeeker, w io.Writer, keywords []string, conf *mode } var ok bool - if ok, err = pdfcpu.KeywordsRemove(ctx.XRefTable, keywords); err != nil { + if ok, err = pdfcpu.KeywordsRemove(ctx, keywords); err != nil { return err } if !ok { diff --git a/pkg/api/merge.go b/pkg/api/merge.go index ff96b97a..9085276c 100644 --- a/pkg/api/merge.go +++ b/pkg/api/merge.go @@ -35,7 +35,7 @@ func appendTo(rs io.ReadSeeker, fName string, ctxDest *model.Context, dividerPag return err } - if ctxDest.Version() < model.V20 && ctxSource.Version() == model.V20 { + if ctxDest.XRefTable.Version() < model.V20 && ctxSource.XRefTable.Version() == model.V20 { return pdfcpu.ErrUnsupportedVersion } @@ -73,8 +73,10 @@ func MergeRaw(rsc []io.ReadSeeker, w io.Writer, dividerPage bool, conf *model.Co } } - if err = OptimizeContext(ctxDest); err != nil { - return err + if conf.OptimizeBeforeWriting { + if err = OptimizeContext(ctxDest); err != nil { + return err + } } return WriteContext(ctxDest, w) @@ -92,13 +94,26 @@ func prepDestContext(destFile string, rs io.ReadSeeker, conf *model.Configuratio } } - if ctxDest.Version() < model.V20 { + if ctxDest.XRefTable.Version() < model.V20 { ctxDest.EnsureVersionForWriting() } return ctxDest, nil } +func appendFile(fName string, ctxDest *model.Context, dividerPage bool) error { + f, err := os.Open(fName) + if err != nil { + return err + } + defer f.Close() + + if log.CLIEnabled() { + log.CLI.Println(fName) + } + return appendTo(f, filepath.Base(fName), ctxDest, dividerPage) +} + // Merge concatenates inFiles. // if destFile is supplied it appends the result to destfile (=MERGEAPPEND) // if no destFile supplied it writes the result to the first entry of inFiles (=MERGECREATE). @@ -120,6 +135,10 @@ func Merge(destFile string, inFiles []string, w io.Writer, conf *model.Configura inFiles = inFiles[1:] } + if conf.CreateBookmarks && log.CLIEnabled() { + log.CLI.Println("creating bookmarks...") + } + f, err := os.Open(destFile) if err != nil { return err @@ -138,29 +157,15 @@ func Merge(destFile string, inFiles []string, w io.Writer, conf *model.Configura } for _, fName := range inFiles { - if err := func() error { - f, err := os.Open(fName) - if err != nil { - return err - } - defer f.Close() - - if log.CLIEnabled() { - log.CLI.Println(fName) - } - if err = appendTo(f, filepath.Base(fName), ctxDest, dividerPage); err != nil { - return err - } - - return nil - - }(); err != nil { + if err := appendFile(fName, ctxDest, dividerPage); err != nil { return err } } - if err := OptimizeContext(ctxDest); err != nil { - return err + if conf.OptimizeBeforeWriting { + if err := OptimizeContext(ctxDest); err != nil { + return err + } } return WriteContext(ctxDest, w) @@ -254,7 +259,7 @@ func MergeCreateZip(rs1, rs2 io.ReadSeeker, w io.Writer, conf *model.Configurati if err != nil { return err } - if ctxDest.Version() == model.V20 { + if ctxDest.XRefTable.Version() == model.V20 { return pdfcpu.ErrUnsupportedVersion } ctxDest.EnsureVersionForWriting() @@ -267,7 +272,7 @@ func MergeCreateZip(rs1, rs2 io.ReadSeeker, w io.Writer, conf *model.Configurati if err != nil { return err } - if ctxSrc.Version() == model.V20 { + if ctxSrc.XRefTable.Version() == model.V20 { return pdfcpu.ErrUnsupportedVersion } @@ -275,8 +280,10 @@ func MergeCreateZip(rs1, rs2 io.ReadSeeker, w io.Writer, conf *model.Configurati return err } - if err := OptimizeContext(ctxDest); err != nil { - return err + if conf.OptimizeBeforeWriting { + if err := OptimizeContext(ctxDest); err != nil { + return err + } } return WriteContext(ctxDest, w) diff --git a/pkg/api/optimize.go b/pkg/api/optimize.go index 1a50e932..15a71849 100644 --- a/pkg/api/optimize.go +++ b/pkg/api/optimize.go @@ -35,7 +35,6 @@ func Optimize(rs io.ReadSeeker, w io.Writer, conf *model.Configuration) error { if conf == nil { conf = model.NewDefaultConfiguration() } - //conf.Cmd = model.OPTIMIZE ctx, err := ReadValidateAndOptimize(rs, conf) if err != nil { diff --git a/pkg/api/page.go b/pkg/api/page.go index 6facee5e..3a18e47e 100644 --- a/pkg/api/page.go +++ b/pkg/api/page.go @@ -29,7 +29,7 @@ import ( ) // InsertPages inserts a blank page before or after every page selected of rs and writes the result to w. -func InsertPages(rs io.ReadSeeker, w io.Writer, selectedPages []string, before bool, conf *model.Configuration) error { +func InsertPages(rs io.ReadSeeker, w io.Writer, selectedPages []string, before bool, pageConf *pdfcpu.PageConfiguration, conf *model.Configuration) error { if rs == nil { return errors.New("pdfcpu: InsertPages: missing rs") } @@ -52,7 +52,12 @@ func InsertPages(rs io.ReadSeeker, w io.Writer, selectedPages []string, before b return err } - if err = ctx.InsertBlankPages(pages, before); err != nil { + var dim *types.Dim + if pageConf != nil { + dim = pageConf.PageDim + } + + if err = ctx.InsertBlankPages(pages, dim, before); err != nil { return err } @@ -60,7 +65,7 @@ func InsertPages(rs io.ReadSeeker, w io.Writer, selectedPages []string, before b } // InsertPagesFile inserts a blank page before or after every inFile page selected and writes the result to w. -func InsertPagesFile(inFile, outFile string, selectedPages []string, before bool, conf *model.Configuration) (err error) { +func InsertPagesFile(inFile, outFile string, selectedPages []string, before bool, pageConf *pdfcpu.PageConfiguration, conf *model.Configuration) (err error) { var f1, f2 *os.File if f1, err = os.Open(inFile); err != nil { @@ -97,7 +102,7 @@ func InsertPagesFile(inFile, outFile string, selectedPages []string, before bool } }() - return InsertPages(f1, f2, selectedPages, before, conf) + return InsertPages(f1, f2, selectedPages, before, pageConf, conf) } // RemovePages removes selected pages from rs and writes the result to w. diff --git a/pkg/api/permission.go b/pkg/api/permission.go index b9dc3dfa..58378c9f 100644 --- a/pkg/api/permission.go +++ b/pkg/api/permission.go @@ -20,7 +20,6 @@ import ( "io" "os" - "github.com/angel-one/pdfcpu/pkg/pdfcpu" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/pkg/errors" ) @@ -67,10 +66,6 @@ func SetPermissions(rs io.ReadSeeker, w io.Writer, conf *model.Configuration) er return err } - if ctx.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } - return WriteContext(ctx, w) } @@ -136,10 +131,6 @@ func GetPermissions(rs io.ReadSeeker, conf *model.Configuration) (*int16, error) return nil, err } - if ctx.Version() == model.V20 { - return nil, pdfcpu.ErrUnsupportedVersion - } - if ctx.E == nil { // Full access - permissions don't apply. return nil, nil diff --git a/pkg/api/selectPages.go b/pkg/api/selectPages.go index 2aa6133d..df46d572 100644 --- a/pkg/api/selectPages.go +++ b/pkg/api/selectPages.go @@ -601,10 +601,9 @@ func parsePageRangeForCollection(pr []string, pageCount int, negated bool, cp *[ return nil } -// PagesForPageCollection returns a slice of page numbers for a page collection. -// Any page number in any order any number of times allowed. -func PagesForPageCollection(pageCount int, pageSelection []string) ([]int, error) { +func calcPagesForPageCollection(pageCount int, pageSelection []string) ([]int, error) { collectedPages := []int{} + for _, v := range pageSelection { if v == "even" { @@ -670,6 +669,22 @@ func PagesForPageCollection(pageCount int, pageSelection []string) ([]int, error return nil, err } } + + return collectedPages, nil +} + +// PagesForPageCollection returns a slice of page numbers for a page collection. +// Any page number in any order any number of times allowed. +func PagesForPageCollection(pageCount int, pageSelection []string) ([]int, error) { + collectedPages, err := calcPagesForPageCollection(pageCount, pageSelection) + if err != nil { + return nil, err + } + + if len(collectedPages) == 0 { + return nil, errors.Errorf("pdfcpu: no page selected") + } + return collectedPages, nil } diff --git a/pkg/api/sign.go b/pkg/api/sign.go new file mode 100644 index 00000000..71d4d4d0 --- /dev/null +++ b/pkg/api/sign.go @@ -0,0 +1,180 @@ +/* +Copyright 2025 The pdf Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package api + +import ( + "fmt" + "os" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/pkg/errors" +) + +func signatureStats(signValidResults []*model.SignatureValidationResult) model.SignatureStats { + sigStats := model.SignatureStats{Total: len(signValidResults)} + for _, svr := range signValidResults { + signed, signedVisible, unsigned, unsignedVisible := sigStats.Counter(svr) + if svr.Signed { + *signed++ + if svr.Visible { + *signedVisible++ + } + continue + } + *unsigned++ + if svr.Visible { + *unsignedVisible++ + } + } + return sigStats +} + +func statsCounter(stats model.SignatureStats, ss *[]string) { + plural := func(count int) string { + if count == 1 { + return "" + } + return "s" + } + + if stats.FormSigned > 0 { + *ss = append(*ss, fmt.Sprintf("%d signed form signature%s (%d visible)", stats.FormSigned, plural(stats.FormSigned), stats.FormSignedVisible)) + } + if stats.FormUnsigned > 0 { + *ss = append(*ss, fmt.Sprintf("%d unsigned form signature%s (%d visible)", stats.FormUnsigned, plural(stats.FormUnsigned), stats.FormUnsignedVisible)) + } + + if stats.PageSigned > 0 { + *ss = append(*ss, fmt.Sprintf("%d signed page signature%s (%d visible)", stats.PageSigned, plural(stats.PageSigned), stats.PageSignedVisible)) + } + if stats.PageUnsigned > 0 { + *ss = append(*ss, fmt.Sprintf("%d unsigned page signature%s (%d visible)", stats.PageUnsigned, plural(stats.PageUnsigned), stats.PageUnsignedVisible)) + } + + if stats.URSigned > 0 { + *ss = append(*ss, fmt.Sprintf("%d signed usage rights signature%s (%d visible)", stats.URSigned, plural(stats.URSigned), stats.URSignedVisible)) + } + if stats.URUnsigned > 0 { + *ss = append(*ss, fmt.Sprintf("%d unsigned usage rights signature%s (%d visible)", stats.URUnsigned, plural(stats.URUnsigned), stats.URUnsignedVisible)) + } + + if stats.DTSSigned > 0 { + *ss = append(*ss, fmt.Sprintf("%d signed doc timestamp signature%s (%d visible)", stats.DTSSigned, plural(stats.DTSSigned), stats.DTSSignedVisible)) + } + if stats.DTSUnsigned > 0 { + *ss = append(*ss, fmt.Sprintf("%d unsigned doc timestamp signature%s (%d visible)", stats.DTSUnsigned, plural(stats.DTSUnsigned), stats.DTSUnsignedVisible)) + } +} + +func digest(signValidResults []*model.SignatureValidationResult, full bool) []string { + var ss []string + + if full { + ss = append(ss, "") + for i, r := range signValidResults { + //ss = append(ss, fmt.Sprintf("%d. Sisgnature:\n", i+1)) + ss = append(ss, fmt.Sprintf("%d:", i+1)) + ss = append(ss, r.String()+"\n") + } + return ss + } + + if len(signValidResults) == 1 { + svr := signValidResults[0] + ss = append(ss, "") + ss = append(ss, fmt.Sprintf("1 %s", svr.Signature.String(svr.Status))) + ss = append(ss, fmt.Sprintf(" Status: %s", svr.Status)) + s := svr.Reason.String() + if svr.Reason == model.SignatureReasonInternal { + if len(svr.Problems) > 0 { + s = svr.Problems[0] + } + } + ss = append(ss, fmt.Sprintf(" Reason: %s", s)) + ss = append(ss, fmt.Sprintf(" Signed: %s", svr.SigningTime())) + return ss + } + + stats := signatureStats(signValidResults) + + ss = append(ss, "") + ss = append(ss, fmt.Sprintf("%d signatures present:", stats.Total)) + + statsCounter(stats, &ss) + + for i, svr := range signValidResults { + ss = append(ss, fmt.Sprintf("\n%d:", i+1)) + ss = append(ss, fmt.Sprintf(" Type: %s", svr.Signature.String(svr.Status))) + ss = append(ss, fmt.Sprintf(" Status: %s", svr.Status.String())) + s := svr.Reason.String() + if svr.Reason == model.SignatureReasonInternal { + if len(svr.Problems) > 0 { + s = svr.Problems[0] + } + } + ss = append(ss, fmt.Sprintf(" Reason: %s", s)) + ss = append(ss, fmt.Sprintf(" Signed: %s", svr.SigningTime())) + } + + return ss +} + +// ValidateSignatures validates signatures of inFile and returns the signature validation results. +func ValidateSignatures(inFile string, all bool, conf *model.Configuration) ([]*model.SignatureValidationResult, error) { + + if conf == nil { + conf = model.NewDefaultConfiguration() + } + conf.Cmd = model.VALIDATESIGNATURE + + if _, err := LoadCertificates(); err != nil { + return nil, err + } + + f, err := os.Open(inFile) + if err != nil { + return nil, err + } + + ctx, err := ReadValidateAndOptimize(f, conf) + if err != nil { + return nil, err + } + + if len(ctx.Signatures) == 0 && !ctx.SignatureExist && !ctx.AppendOnly { + return nil, errors.New("pdfcpu: No signatures present.") + } + + return pdfcpu.ValidateSignatures(f, ctx, all) +} + +// ValidateSignaturesFile validates signatures of inFile. +// all: processes all signatures meaning not only the authoritative/certified signature.. +// full: verbose output including cert chain and problems encountered. +func ValidateSignaturesFile(inFile string, all, full bool, conf *model.Configuration) ([]string, error) { + if conf == nil { + conf = model.NewDefaultConfiguration() + } + + signValidResults, err := ValidateSignatures(inFile, all, conf) + if err != nil { + return nil, err + } + + return digest(signValidResults, full), nil +} diff --git a/pkg/api/test/annotation_test.go b/pkg/api/test/annotation_test.go index 69af0dc3..c1397a31 100644 --- a/pkg/api/test/annotation_test.go +++ b/pkg/api/test/annotation_test.go @@ -29,55 +29,192 @@ import ( ) var textAnn model.AnnotationRenderer = model.NewTextAnnotation( - *types.NewRectangle(0, 0, 100, 100), - "Test Content", - "ID1", - "Title1", - 0, - &color.Gray, - nil, - "", - "", - false, - "Comment") + *types.NewRectangle(0, 0, 100, 100), // rect + 0, // apObjNr + "Text Annotation", // contents + "ID1", // id + "", // modDate + 0, // f + &color.Gray, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + 0, // borderRadX + 0, // borderRadY + 2, // borderWidth + false, // displayOpen + "Comment") // name + +var textAnnCJK model.AnnotationRenderer = model.NewTextAnnotation( + *types.NewRectangle(0, 100, 100, 200), // rect + 0, // apObjNr + "文字注释", // contents + "ID1CJK", // id + "", // modDate + 0, // f + &color.Gray, // col + "标题1", // title + nil, // popupIndRef + nil, // ca + "RC", // rc + "", // subject + 0, // borderRadX + 0, // borderRadY + 2, // borderWidth + true, // displayOpen + "Comment") // name + +var freeTextAnn model.AnnotationRenderer = model.NewFreeTextAnnotation( + *types.NewRectangle(200, 300, 400, 500), // rect + 0, // apObjNr + `Mac Preview shows "Contents" +line 2 +line 3`, // contents + "ID1", // id + "", // modDate + model.AnnLocked, // f + &color.Gray, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + `A.Reader renders rich text ("RC"). +line 2 +line 3`, + // ` + // + // + //

This is some rich text.

+ // + // `, // rich text (ignored by Mac Preview and rendered mediocre by Adobe Reader) + types.AlignCenter, // horizontal alignment + "Helvetica", // font name (TODO) + 12, // font size in points (TODO) + &color.Green, // font color + "", // DS (default style string) + nil, // Intent + nil, // callOutLine + nil, // callOutLineEndingStyle + 0, 0, 0, 0, // margin + 0, // borderWidth + model.BSSolid, // borderStyle + false, // cloudyBorder + 0) // cloudyBorderIntensity var linkAnn model.AnnotationRenderer = model.NewLinkAnnotation( - *types.NewRectangle(200, 0, 300, 100), - nil, - nil, - "https://pdfcpu.io", - "ID2", - 0, - 1, - model.BSSolid, - &color.Red, - true) + *types.NewRectangle(200, 0, 300, 100), // rect + 0, // apObjNr + "", // contents + "ID2", // id + "", // modDate + 0, // f + &color.Red, // borderCol + nil, // dest + "https://pdfcpu.io", // uri + nil, // quad + true, // border + 1, // borderWidth + model.BSSolid, // borderStyle +) var squareAnn model.AnnotationRenderer = model.NewSquareAnnotation( - *types.NewRectangle(300, 0, 350, 50), - "Square Annotation", - "ID3", - 0, - 1, - model.BSSolid, - &color.Blue, - false, - 0, - nil, - 0, 0, 0, 0) + *types.NewRectangle(300, 0, 350, 50), // rect + 0, // apObjNr + "Square Annotation", // contents + "ID3", // id + "", // modDate + 0, // f + &color.Gray, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + &color.Blue, // fillCol + 0, // MLeft + 0, // MTop + 0, // MRight + 0, // MBot + 1, // borderWidth + model.BSSolid, // borderStyle + false, // cloudyBorder + 0, // cloudyBorderIntensity +) + +var squareAnnCJK model.AnnotationRenderer = model.NewSquareAnnotation( + *types.NewRectangle(300, 50, 350, 100), // rect + 0, // apObjNr + "方形注释", // contents + "ID3CJK", // id + "", // modDate + 0, // f + &color.Gray, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + &color.Green, // fillCol + 0, // MLeft + 0, // MTop + 0, // MRight + 0, // MBot + 1, // borderWidth + model.BSDashed, // borderStyle + false, // cloudyBorder + 0, // cloudyBorderIntensity +) var circleAnn model.AnnotationRenderer = model.NewCircleAnnotation( - *types.NewRectangle(400, 0, 450, 50), - "Circle Annotation", - "ID4", - model.AnnLocked, - 3, - model.BSBeveled, - &color.Green, - true, - 1, - &color.Blue, - 10, 10, 10, 10) + *types.NewRectangle(400, 0, 450, 50), // rect + 0, // apObjNr + "Circle Annotation", // contents + "ID4", // id + "", // modDate + 0, // f + &color.Gray, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + &color.Blue, // fillCol + 0, // MLeft + 0, // MTop + 0, // MRight + 0, // MBot + 1, // borderWidth + model.BSSolid, // borderStyle + false, // cloudyBorder + 0, // cloudyBorderIntensity +) + +var circleAnnCJK model.AnnotationRenderer = model.NewCircleAnnotation( + *types.NewRectangle(400, 50, 450, 100), // rect + 0, // apObjNr + "圆圈注释", // contents + "ID4CJK", // id + "", // modDate + 0, // f + &color.Green, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + &color.Blue, // fillCol + 10, // MLeft + 10, // MTop + 10, // MRight + 10, // MBot + 1, // borderWidth + model.BSBeveled, // borderStyle + false, // cloudyBorder + 0, // cloudyBorderIntensity +) func annotationCount(t *testing.T, inFile string) int { t.Helper() @@ -434,24 +571,30 @@ func TestAddAnnotationsLowLevel(t *testing.T) { func TestAddLinkAnnotationWithDest(t *testing.T) { msg := "TestAddLinkAnnotationWithDest" + // Best viewed with Adobe Reader. + inFile := filepath.Join(inDir, "Walden.pdf") outFile := filepath.Join(samplesDir, "annotations", "LinkAnnotWithDestTopLeft.pdf") // Create internal link: // Add a 100x100 link rectangle on the bottom left corner of page 2. // Set destination to top left corner of page 1. + dest := &model.Destination{Typ: model.DestXYZ, PageNr: 1, Left: -1, Top: -1} internalLink := model.NewLinkAnnotation( - *types.NewRectangle(0, 0, 100, 100), - nil, - &model.Destination{Typ: model.DestXYZ, PageNr: 1, Left: -1, Top: -1}, - "", - "id", - 0, - 1, - model.BSSolid, - &color.Red, - true, + *types.NewRectangle(0, 0, 100, 100), // rect + 0, // apObjNr + "", // contents + "ID2", // id + "", // modDate + 0, // f + &color.Red, // borderCol + dest, // dest + "", // uri + nil, // quad + true, // border + 1, // borderWidth + model.BSSolid, // borderStyle ) err := api.AddAnnotationsFile(inFile, outFile, []string{"2"}, internalLink, nil, false) @@ -463,14 +606,21 @@ func TestAddLinkAnnotationWithDest(t *testing.T) { func TestAddAnnotationsFile(t *testing.T) { msg := "TestAddAnnotationsFile" + // Best viewed with Adobe Reader. + inFile := filepath.Join(inDir, "test.pdf") - outFile := filepath.Join(samplesDir, "annotations", "TestAnnotationsFile.pdf") + outFile := filepath.Join(samplesDir, "annotations", "Annotations.pdf") // Add text annotation. if err := api.AddAnnotationsFile(inFile, outFile, nil, textAnn, nil, false); err != nil { t.Fatalf("%s add: %v\n", msg, err) } + // Add CJK text annotation. + if err := api.AddAnnotationsFile(outFile, outFile, nil, textAnnCJK, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } + // Add link annotation. if err := api.AddAnnotationsFile(outFile, outFile, nil, linkAnn, nil, false); err != nil { t.Fatalf("%s add: %v\n", msg, err) @@ -481,17 +631,28 @@ func TestAddAnnotationsFile(t *testing.T) { t.Fatalf("%s add: %v\n", msg, err) } + // Add CJK square annotation. + if err := api.AddAnnotationsFile(outFile, outFile, nil, squareAnnCJK, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } + // Add circle annotation. if err := api.AddAnnotationsFile(outFile, outFile, nil, circleAnn, nil, false); err != nil { t.Fatalf("%s add: %v\n", msg, err) } + + // Add CJK circle annotation. + if err := api.AddAnnotationsFile(outFile, outFile, nil, circleAnnCJK, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } + } func TestAddAnnotations(t *testing.T) { msg := "TestAddAnnotations" inFile := filepath.Join(inDir, "test.pdf") - outFile := filepath.Join(samplesDir, "annotations", "TestAnnotations.pdf") + outFile := filepath.Join(outDir, "Annotations.pdf") // Create a context from inFile. ctx, err := api.ReadContextFile(inFile) @@ -501,14 +662,19 @@ func TestAddAnnotations(t *testing.T) { // Prepare annotations for page 1. m := map[int][]model.AnnotationRenderer{} - anns := make([]model.AnnotationRenderer, 4) + anns := make([]model.AnnotationRenderer, 7) + anns[0] = textAnn - anns[1] = linkAnn + anns[1] = textAnnCJK anns[2] = squareAnn - anns[3] = circleAnn + anns[3] = squareAnnCJK + anns[4] = circleAnn + anns[5] = circleAnnCJK + anns[6] = linkAnn + m[1] = anns - // Add 4 annotations to page 1. + // Add 7 annotations to page 1. if ok, err := pdfcpu.AddAnnotationsMap(ctx, m, false); err != nil || !ok { t.Fatalf("%s add: %v\n", msg, err) } @@ -519,3 +685,412 @@ func TestAddAnnotations(t *testing.T) { } } + +func TestPopupAnnotation(t *testing.T) { + msg := "TestPopupAnnotation" + + // Add a Markup annotation and a linked Popup annotation. + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "test.pdf") + outFile := filepath.Join(samplesDir, "annotations", "PopupAnnotation.pdf") + + incr := false + pageNr := 1 + + // Create a context. + ctx, err := api.ReadContextFile(inFile) + if err != nil { + t.Fatalf("%s readContext: %v\n", msg, err) + } + + // Add Markup annotation. + parentIndRef, textAnnotDict, err := pdfcpu.AddAnnotationToPage(ctx, pageNr, textAnn, incr) + if err != nil { + t.Fatalf("%s Add Text AnnotationToPage: %v\n", msg, err) + } + + // Add Markup annotation as parent of Popup annotation. + popupAnn := model.NewPopupAnnotation( + *types.NewRectangle(0, 0, 100, 100), // rect + 0, // apObjNr + "Popup content", // contents + "IDPopup", // id + "", // modDate + 0, // f + &color.Green, // col + 0, // borderRadX + 0, // borderRadY + 2, // borderWidth + parentIndRef, // parentIndRef, + false, // displayOpen + ) + + // Add Popup annotation. + popupIndRef, _, err := pdfcpu.AddAnnotationToPage(ctx, pageNr, popupAnn, incr) + if err != nil { + t.Fatalf("%s Add Popup AnnotationToPage: %v\n", msg, err) + } + + // Add Popup annotation to Markup annotation. + textAnnotDict["Popup"] = *popupIndRef + + // Write context to file. + if err := api.WriteContextFile(ctx, outFile); err != nil { + t.Fatalf("%s write: %v\n", msg, err) + } +} + +func TestInkAnnotation(t *testing.T) { + msg := "TestInkAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "test.pdf") + outFile := filepath.Join(samplesDir, "annotations", "InkAnnotation.pdf") + + p1 := model.InkPath{100., 542., 150., 492., 200., 542.} + p2 := model.InkPath{100, 592, 150, 592} + + inkAnn := model.NewInkAnnotation( + *types.NewRectangle(0, 0, 100, 100), // rect + 0, // apObjNr + "Ink content", // contents + "IDInk", // id + "", // modDate + 0, // f + &color.Red, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + []model.InkPath{p1, p2}, // InkList + 0, // borderWidth + model.BSSolid, // borderStyle + ) + + // Add Ink annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, inkAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} + +func TestHighlightAnnotation(t *testing.T) { + msg := "TestHighlightAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "testWithText.pdf") + outFile := filepath.Join(samplesDir, "annotations", "HighlightAnnotation.pdf") + + r := types.NewRectangle(205, 624.16, 400, 645.88) + + ql := types.NewQuadLiteralForRect(r) + + inkAnn := model.NewHighlightAnnotation( + *r, // rect + 0, // apObjNr + "Highlight content", // contents + "IDHighlight", // id + "", // modDate + model.AnnLocked, // f + &color.Yellow, // col + 0, // borderRadX + 0, // borderRadY + 2, // borderWidth + "Comment by Horst", // title + nil, // popupIndRef + nil, // ca + "", // rc + "Subject", // subject + types.QuadPoints{*ql}, // quad points + ) + + // Add Highlight annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, inkAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} + +func TestUnderlineAnnotation(t *testing.T) { + msg := "TestUnderlineAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "testWithText.pdf") + outFile := filepath.Join(samplesDir, "annotations", "UnderlineAnnotation.pdf") + + r := types.NewRectangle(205, 624.16, 400, 645.88) + + ql := types.NewQuadLiteralForRect(r) + + underlineAnn := model.NewUnderlineAnnotation( + *r, // rect + 0, // apObjNr + "Underline content", // contents + "IDUnderline", // id + "", // modDate + model.AnnLocked, // f + &color.Yellow, // col + 0, // borderRadX + 0, // borderRadY + 2, // borderWidth + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + types.QuadPoints{*ql}, // quad points + ) + + // Add Underline annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, underlineAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} + +func TestSquigglyAnnotation(t *testing.T) { + msg := "TestSquigglyAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "testWithText.pdf") + outFile := filepath.Join(samplesDir, "annotations", "SquigglyAnnotation.pdf") + + r := types.NewRectangle(205, 624.16, 400, 645.88) + + ql := types.NewQuadLiteralForRect(r) + + squigglyAnn := model.NewSquigglyAnnotation( + *r, // rect + 0, // apObjNr + "Squiggly content", // contents + "IDSquiggly", // id + "", // modDate + model.AnnLocked, // f + &color.Yellow, // col + 0, // borderRadX + 0, // borderRadY + 2, // borderWidth + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + types.QuadPoints{*ql}, // quad points + ) + + // Add Squiggly annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, squigglyAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} + +func TestStrikeOutAnnotation(t *testing.T) { + msg := "TestStrikeOutAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "testWithText.pdf") + outFile := filepath.Join(samplesDir, "annotations", "StrikeOutAnnotation.pdf") + + r := types.NewRectangle(205, 624.16, 400, 645.88) + + ql := types.NewQuadLiteralForRect(r) + + strikeOutAnn := model.NewStrikeOutAnnotation( + *r, // rect + 0, // apObjNr + "StrikeOut content", // contents + "IDStrikeOut", // id + "", // modDate + model.AnnLocked, // f + &color.Yellow, // col + 0, // borderRadX + 0, // borderRadY + 2, // borderWidth + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + types.QuadPoints{*ql}, // quad points + ) + + // Add StrikeOut annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, strikeOutAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} + +func TestFreeTextAnnotation(t *testing.T) { + msg := "TestFreeTextAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "test.pdf") + outFile := filepath.Join(samplesDir, "annotations", "FreeTextAnnotation.pdf") + + // Add Free text annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, freeTextAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} + +func TestPolyLineAnnotation(t *testing.T) { + msg := "TestPolyLineAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "test.pdf") + outFile := filepath.Join(samplesDir, "annotations", "PolyLineAnnotation.pdf") + + leButt := model.LEButt + leOpenArrow := model.LEOpenArrow + + polyLineAnn := model.NewPolyLineAnnotation( + *types.NewRectangle(30, 30, 110, 110), // rect + 0, // apObjNr + "PolyLine Annotation", // contents + "IDPolyLine", // id + "", // modDate + 0, // f + &color.Gray, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + types.NewNumberArray(30, 30, 110, 110, 110, 30), // vertices + nil, // path + nil, // intent + nil, // measure + &color.Green, // fillCol + 1, // borderWidth + model.BSDashed, // borderStyle + &leButt, // start lineEndingStyle + &leOpenArrow, // end lineEndingStyle + ) + + // Add PolyLine annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, polyLineAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} + +func TestPolygonAnnotation(t *testing.T) { + msg := "TestPolygonAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "test.pdf") + outFile := filepath.Join(samplesDir, "annotations", "PolygonAnnotation.pdf") + + polygonAnn := model.NewPolygonAnnotation( + *types.NewRectangle(30, 30, 110, 110), // rect + 0, // apObjNr + "Polygon Annotation", // contents + "IDPolygon", // id + "", // modDate + 0, // f + &color.Gray, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + types.NewNumberArray(30, 30, 110, 110, 110, 30), // vertices + nil, // path + nil, // intent + nil, // measure + &color.Green, // fillCol + 5, // borderWidth + model.BSDashed, // borderStyle + true, // cloudyBorder + 2) // cloudyBorderIntensity + + // Add Polygon annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, polygonAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} + +func TestLineAnnotation(t *testing.T) { + msg := "TestLineAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "test.pdf") + outFile := filepath.Join(samplesDir, "annotations", "LineAnnotation.pdf") + + leOpenArrow := model.LEOpenArrow + + lineAnn := model.NewLineAnnotation( + *types.NewRectangle(30, 30, 110, 110), // rect + 0, // apObjNr + "Diagonal", // contents + "IDLine", // id + "", // modDate + 0, // f + &color.DarkGray, // col + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + types.NewPoint(148.75, 140.33), // P1 + types.NewPoint(297.5, 280.66), // P2 + &leOpenArrow, // start lineEndingStyle + &leOpenArrow, // end lineEndingStyle + 50, // leader line length + 0, // leader line offset + 10, // leader line extension length + nil, // intent + nil, // measure + true, // caption + false, // caption position top + 0, // caption offset X + 0, // caption offset Y + nil, // fillCol + 1, // borderWidth + model.BSSolid) // borderStyle + + // Add line annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, lineAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} + +func TestCaretAnnotation(t *testing.T) { + msg := "TestCaretAnnotation" + + // Best viewed with Adobe Reader. + + inFile := filepath.Join(inDir, "test.pdf") + outFile := filepath.Join(samplesDir, "annotations", "CaretAnnotation.pdf") + + caretAnn := model.NewCaretAnnotation( + *types.NewRectangle(30, 30, 110, 110), // rect + 0, // apObjNr + "Caret Annotation", // contents + "IDCaret", // id + "", // modDate + 0, // f, + nil, // col + 0, // borderRadX + 0, // borderRadY + 0, // borderWidth + "Title1", // title + nil, // popupIndRef + nil, // ca + "", // rc + "", // subject + types.NewRectangle(20, 20, 20, 20), // RD + true) // paragraph symbol + + // Add line annotation. + if err := api.AddAnnotationsFile(inFile, outFile, nil, caretAnn, nil, false); err != nil { + t.Fatalf("%s add: %v\n", msg, err) + } +} diff --git a/pkg/api/test/api_test.go b/pkg/api/test/api_test.go index c559cbfd..14de21c7 100644 --- a/pkg/api/test/api_test.go +++ b/pkg/api/test/api_test.go @@ -61,6 +61,10 @@ func TestMain(m *testing.M) { samplesDir = filepath.Join("..", "..", "samples") conf = api.LoadConfiguration() + if os.Getenv("GITHUB_ACTIONS") == "true" { + conf.Offline = true + } + fmt.Printf("conf.Offline: %t\n", conf.Offline) // Install test user fonts from pkg/testdata/fonts. fonts, err := userFonts(filepath.Join(inDir, "fonts")) @@ -186,6 +190,8 @@ func TestValidate(t *testing.T) { msg := "TestValidate" inFile := filepath.Join(inDir, "Acroforms2.pdf") + //log.SetDefaultStatsLogger() + // Validate inFile. if err := api.ValidateFile(inFile, nil); err != nil { t.Fatalf("%s: %v\n", msg, err) @@ -230,7 +236,7 @@ func TestInfo(t *testing.T) { } defer f.Close() - info, err := api.PDFInfo(f, inFile, nil, conf) + info, err := api.PDFInfo(f, inFile, nil, true, conf) if err != nil { t.Fatalf("%s: %v\n", msg, err) } diff --git a/pkg/api/test/attachment_test.go b/pkg/api/test/attachment_test.go index ed8ec614..449be26d 100644 --- a/pkg/api/test/attachment_test.go +++ b/pkg/api/test/attachment_test.go @@ -17,6 +17,7 @@ limitations under the License. package test import ( + "fmt" "io" "os" "path/filepath" @@ -260,3 +261,35 @@ func TestAttachmentsLowLevel(t *testing.T) { removeAttachment(t, msg, outFile, a, ctx) } + +func TestSanitizePath(t *testing.T) { + + msg := "TestSanitizePath" + + testPaths := []string{ + "", + ".", + "..", + "../..", + "foo/.", + "bar/..", + "foo/bar/.", + "foo/bar/", + "foo/./bar/..", + "foo/./bar/./..", + "foo/./bar/../.", + "foo/./bar/../..", + "foo/./bar/", + "foo/../bar/..", + "docs/report.pdf", + "../../etc/passwd", + "/etc/passwd", + "subdir/../bar//../file.txt", + } + + for _, path := range testPaths { + result := api.SanitizePath(path) + fmt.Printf("%s: %q -> %q \n", msg, path, result) + } + +} diff --git a/pkg/api/test/booklet_test.go b/pkg/api/test/booklet_test.go index 7977fd43..32861f4d 100644 --- a/pkg/api/test/booklet_test.go +++ b/pkg/api/test/booklet_test.go @@ -156,7 +156,7 @@ func TestBooklet(t *testing.T) { []string{filepath.Join(inDir, "bookletTest.pdf")}, filepath.Join(outDir, "BookletFromPDFLetter_2Up_perfectbound.pdf"), []string{"1-24"}, - "p:LetterP, g:on, btype:perfectbound", + "p:LetterP, g:on, btype:perfectbound, ma:10, bgcol:#f7e6c7", "points", 2, false, @@ -165,27 +165,18 @@ func TestBooklet(t *testing.T) { []string{filepath.Join(inDir, "bookletTest.pdf")}, filepath.Join(outDir, "BookletFromPDFLedger_6Up.pdf"), []string{"1-24"}, - "p:LedgerP, g:on", + "p:LedgerP, g:on, ma:10, bgcol:#f7e6c7", "points", 6, false, }, - {"TestBookletFromPDF_8up", - []string{filepath.Join(inDir, "bookletTest.pdf")}, - filepath.Join(outDir, "BookletFromPDFLedger_8Up.pdf"), - []string{"1-32"}, - "p:LedgerP, g:on", - "points", - 8, - false, - }, // misc orientations and booklet types on 4-up {"TestBookletFromPDF_4up_portrait_short", []string{filepath.Join(inDir, "bookletTest.pdf")}, filepath.Join(outDir, "BookletFromPDFLedger_4Up_portrait_short.pdf"), []string{"1-24"}, - "p:LedgerP, g:on, binding:short", + "p:LedgerP, g:on, binding:short, ma:10, bgcol:#f7e6c7", "points", 4, false, @@ -194,7 +185,7 @@ func TestBooklet(t *testing.T) { []string{filepath.Join(inDir, "bookletTestLandscape.pdf")}, filepath.Join(outDir, "BookletFromPDFLedger_4Up_landscape_long.pdf"), []string{"1-24"}, - "p:LedgerL, g:on", + "p:LedgerL, g:on, ma:10, bgcol:#f7e6c7", "points", 4, false, @@ -203,7 +194,7 @@ func TestBooklet(t *testing.T) { []string{filepath.Join(inDir, "bookletTestLandscape.pdf")}, filepath.Join(outDir, "BookletFromPDFLedger_4Up_landscape_short.pdf"), []string{"1-24"}, - "p:LedgerL, g:on, binding:short", + "p:LedgerL, g:on, binding:short, ma:10, bgcol:#f7e6c7", "points", 4, false, @@ -212,7 +203,7 @@ func TestBooklet(t *testing.T) { []string{filepath.Join(inDir, "bookletTest.pdf")}, filepath.Join(outDir, "BookletFromPDFLedger_4Up_portrait_long_advanced.pdf"), []string{"1-24"}, - "p:LedgerP, g:on, btype:bookletadvanced", + "p:LedgerP, g:on, btype:bookletadvanced, ma:10, bgcol:#f7e6c7", "points", 4, false, @@ -221,7 +212,7 @@ func TestBooklet(t *testing.T) { []string{filepath.Join(inDir, "bookletTestLandscape.pdf")}, filepath.Join(outDir, "BookletFromPDFLedger_4Up_landscape_short_advanced.pdf"), []string{"1-24"}, - "p:LedgerL, g:on, binding:short, btype:bookletadvanced", + "p:LedgerL, g:on, binding:short, btype:bookletadvanced, ma:10, bgcol:#f7e6c7", "points", 4, false, @@ -230,16 +221,53 @@ func TestBooklet(t *testing.T) { []string{filepath.Join(inDir, "bookletTest.pdf")}, filepath.Join(outDir, "BookletFromPDFLedger_4Up_perfectbound.pdf"), []string{"1-24"}, - "p:LedgerP, g:on, btype:perfectbound", + "p:LedgerP, g:on, btype:perfectbound, ma:10, bgcol:#f7e6c7", "points", 4, false, }, + // 8up + {"TestBookletFromPDF8Up", + []string{filepath.Join(inDir, "bookletTestA6.pdf")}, + filepath.Join(outDir, "BookletFromPDF8Up.pdf"), + nil, + "p:A3, g:on, ma:10, bgcol:#f7e6c7", + "points", + 8, + false, + }, + {"TestBookletFromPDF8UpPortraitShort", + []string{filepath.Join(inDir, "bookletTestA6.pdf")}, + filepath.Join(outDir, "BookletFromPDF8UpPortraitShort.pdf"), + nil, + "p:A3, binding:short, g:on, ma:10, bgcol:#f7e6c7", + "points", + 8, + false, + }, + {"TestBookletFromPDF8UpLandscapeLong", + []string{filepath.Join(inDir, "bookletTestA6L.pdf")}, + filepath.Join(outDir, "BookletFromPDF8UpLandscapeLong.pdf"), + nil, + "p:A3, binding:long, g:on, ma:10, bgcol:#f7e6c7", + "points", + 8, + false, + }, + {"TestBookletFromPDF8UpLandscapeShort", + []string{filepath.Join(inDir, "bookletTestA6L.pdf")}, + filepath.Join(outDir, "BookletFromPDF8UpLandscapeShort.pdf"), + nil, + "p:A3, binding:short, g:on, ma:10, bgcol:#f7e6c7", + "points", + 8, + false, + }, + // 2-up multi folio booklet from PDF on A4 using 8 sheets per folio // using the default foliosize:8 // Here we print 2 complete folios (2 x 8 sheets) + 1 partial folio - // multi folio only makes sense for n = 2 // See also https://www.instructables.com/How-to-bind-your-own-Hardback-Book/ {"TestHardbackBookFromPDF", []string{filepath.Join(inDir, "WaldenFull.pdf")}, @@ -251,8 +279,10 @@ func TestBooklet(t *testing.T) { false, }, } { - conf := model.NewDefaultConfiguration() - conf.SetUnit(tt.unit) - testBooklet(t, tt.msg, tt.inFiles, tt.outFile, tt.selectedPages, tt.desc, tt.n, tt.isImg, conf) + t.Run(tt.msg, func(subTest *testing.T) { + conf := model.NewDefaultConfiguration() + conf.SetUnit(tt.unit) + testBooklet(subTest, tt.msg, tt.inFiles, tt.outFile, tt.selectedPages, tt.desc, tt.n, tt.isImg, conf) + }) } } diff --git a/pkg/api/test/bookmark_test.go b/pkg/api/test/bookmark_test.go index 51e44272..98c243a2 100644 --- a/pkg/api/test/bookmark_test.go +++ b/pkg/api/test/bookmark_test.go @@ -66,15 +66,24 @@ func TestListBookmarks(t *testing.T) { } } -func InactiveTestAddDuplicateBookmarks(t *testing.T) { +func TestAddDuplicateBookmarks(t *testing.T) { msg := "TestAddDuplicateBookmarks" inFile := filepath.Join(inDir, "CenterOfWhy.pdf") outFile := filepath.Join("..", "..", "samples", "bookmarks", "bookmarkDuplicates.pdf") bms := []pdfcpu.Bookmark{ - {PageFrom: 2, Title: "Duplicate Name"}, - {PageFrom: 3, Title: "Duplicate Name"}, - {PageFrom: 5, Title: "Duplicate Name"}, + {PageFrom: 1, Title: "Parent1", + Kids: []pdfcpu.Bookmark{ + {PageFrom: 2, Title: "kid1"}, + {PageFrom: 3, Title: "kid2"}, + }, + }, + {PageFrom: 4, Title: "Parent2", + Kids: []pdfcpu.Bookmark{ + {PageFrom: 5, Title: "kid1"}, + {PageFrom: 6, Title: "kid2"}, + }, + }, } replace := true // Replace existing bookmarks. diff --git a/pkg/api/test/certificate_test.go b/pkg/api/test/certificate_test.go new file mode 100644 index 00000000..51e6f7ac --- /dev/null +++ b/pkg/api/test/certificate_test.go @@ -0,0 +1,34 @@ +/* + Copyright 2025 The pdfcpu Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package test + +import ( + "testing" + + "github.com/angel-one/pdfcpu/pkg/api" +) + +func TestListCertificates(t *testing.T) { + msg := "TestListCertificates" + + n, err := api.LoadCertificates() + if err != nil { + t.Fatalf("%s: %v\n", msg, err) + } + + t.Logf("Loaded %d certs", n) +} diff --git a/pkg/api/test/encryption_test.go b/pkg/api/test/encryption_test.go index a6e4e2ad..9478fd8f 100644 --- a/pkg/api/test/encryption_test.go +++ b/pkg/api/test/encryption_test.go @@ -189,6 +189,22 @@ func TestEncryption(t *testing.T) { } } +func TestPDF20Encryption(t *testing.T) { + // PDF 2.0 encryption assumes aes/256. + for _, fileName := range []string{ + "i277.pdf", + "imageWithBPC.pdf", + "pageLevelOutputIntent.pdf", + "SimplePDF2.0.pdf", + "utf8stringAndAnnotation.pdf", + "utf8test.pdf", + "viaIncrementalSave.pdf", + "withOffsetStart.pdf", + } { + testEncryption(t, filepath.Join("pdf20", fileName), "aes", 256) + } +} + func TestSetPermissions(t *testing.T) { msg := "TestSetPermissions" inFile := filepath.Join(inDir, "5116.DCT_Filter.pdf") diff --git a/pkg/api/test/extract_test.go b/pkg/api/test/extract_test.go index b8830ab6..6ab511fb 100644 --- a/pkg/api/test/extract_test.go +++ b/pkg/api/test/extract_test.go @@ -201,7 +201,7 @@ func TestExtractFontsLowLevel(t *testing.T) { // Extract fonts for page 1. i := 1 - ff, err := pdfcpu.ExtractPageFonts(ctx, i) + ff, err := pdfcpu.ExtractPageFonts(ctx, 1, types.IntSet{}, types.IntSet{}) if err != nil { t.Fatalf("%s extractPageFonts(%d): %v\n", msg, i, err) } @@ -227,7 +227,7 @@ func TestExtractPages(t *testing.T) { func TestExtractPagesLowLevel(t *testing.T) { msg := "TestExtractPagesLowLevel" inFile := filepath.Join(inDir, "TheGoProgrammingLanguageCh1.pdf") - outFile := filepath.Join(outDir, "MyExtractedAndProcessedSinglePage.pdf") + outFile := "MyExtractedAndProcessedSinglePage.pdf" // Create a context. ctx, err := api.ReadContextFile(inFile) @@ -237,17 +237,16 @@ func TestExtractPagesLowLevel(t *testing.T) { // Extract page 1. i := 1 - ctxNew, err := pdfcpu.ExtractPage(ctx, i) + + r, err := api.ExtractPage(ctx, i) if err != nil { t.Fatalf("%s extractPage(%d): %v\n", msg, i, err) } - // Here you can process this single page PDF context. - - // Write context to file. - if err := api.WriteContextFile(ctxNew, outFile); err != nil { - t.Fatalf("%s write: %v\n", msg, err) + if err := api.WritePage(r, outDir, outFile, i); err != nil { + t.Fatalf("%s writePage(%d): %v\n", msg, i, err) } + } func TestExtractContent(t *testing.T) { @@ -266,7 +265,7 @@ func TestExtractContentLowLevel(t *testing.T) { // Create a context. ctx, err := api.ReadContextFile(inFile) if err != nil { - t.Fatalf("%s readContext: %v\n", msg, err) + t.Fatalf("%s read context: %v\n", msg, err) } // Extract page content for page 2. diff --git a/pkg/api/test/images_test.go b/pkg/api/test/images_test.go new file mode 100644 index 00000000..6ddfa9d8 --- /dev/null +++ b/pkg/api/test/images_test.go @@ -0,0 +1,131 @@ +/* +Copyright 2024 The pdf Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package test + +import ( + "path/filepath" + "testing" + + "github.com/angel-one/pdfcpu/pkg/api" +) + +func testUpdateImages(t *testing.T, msg string, inFile, imgFile, outFile string, objNr, pageNr int, id string) { + t.Helper() + + if err := api.UpdateImagesFile(inFile, imgFile, outFile, objNr, pageNr, id, conf); err != nil { + t.Fatalf("%s %s: %v\n", msg, outFile, err) + } + if err := api.ValidateFile(outFile, conf); err != nil { + t.Fatalf("%s: %v\n", msg, err) + } +} + +func TestUpdateImages(t *testing.T) { + + outDir := filepath.Join(samplesDir, "images") + inDir := outDir + + for _, tt := range []struct { + msg string + inFile string + imgFile string + outFile string + objNr int // by objNr + pageNr int // or by (pageNr, id) + id string + }{ + {"TestUpdateByObjNr", + "test.pdf", + "test_1_Im1.png", + "ImageUpdatedByObjNr.pdf", + 8, + 0, + ""}, + + {"TestUpdateByPageNrAndId", + "test.pdf", + "test_1_Im1.png", + "imageUpdatedByPageNrAndIdPage1.pdf", + 0, + 1, + "Im1"}, + + {"TestUpdateByPageNrAndId", + "test.pdf", + "test_1_Im1.png", + "imageUpdatedByPageNrAndIdPage2.pdf", + 0, + 2, + "Im1"}, + + {"TestUpdateByImageFileName", + "test.pdf", + "test_1_Im1.png", + "imageUpdatedByFileName.pdf", + 0, + 0, + ""}, + + {"TestUpdateByPageNrAndId", + "test.pdf", + "any.png", + "imageUpdatedByPageNrAndIdAny.pdf", + 0, + 1, + "Im1"}, + + {"TestUpdateByObjNrPNG", + "test.pdf", + "any.png", + "imageUpdatedByObjNrPNG.pdf", + 8, + 0, + ""}, + + {"TestUpdateByObjNrJPG", + "test.pdf", + "any.jpg", + "imageUpdatedByObjNrJPG.pdf", + 8, + 0, + ""}, + + {"TestUpdateByObjNrTIFF", + "test.pdf", + "any.tiff", + "imageUpdatedByObjNrTIFF.pdf", + 8, + 0, + ""}, + + {"TestUpdateByObjNrWEBP", + "test.pdf", + "any.webp", + "imageUpdatedByObjNrWEBP.pdf", + 8, + 0, + ""}, + } { + testUpdateImages(t, tt.msg, + filepath.Join(inDir, tt.inFile), + filepath.Join(outDir, tt.imgFile), + filepath.Join(outDir, tt.outFile), + tt.objNr, + tt.pageNr, + tt.id) + } +} diff --git a/pkg/api/test/keyword_test.go b/pkg/api/test/keyword_test.go index ebd8a840..58664b14 100644 --- a/pkg/api/test/keyword_test.go +++ b/pkg/api/test/keyword_test.go @@ -23,6 +23,7 @@ import ( "github.com/angel-one/pdfcpu/pkg/api" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" ) func listKeywordsFile(t *testing.T, fileName string, conf *model.Configuration) ([]string, error) { @@ -51,8 +52,9 @@ func listKeywords(t *testing.T, msg, fileName string, want []string) []string { if len(got) != len(want) { t.Fatalf("%s: list keywords %s: want %d got %d\n", msg, fileName, len(want), len(got)) } - for i, v := range got { - if v != want[i] { + + for _, v := range got { + if !types.MemberOf(v, want) { t.Fatalf("%s: list keywords %s: want %v got %v\n", msg, fileName, want, got) } } @@ -70,19 +72,22 @@ func TestKeywords(t *testing.T) { // # of keywords must be 0 listKeywords(t, msg, fileName, nil) - keywords := []string{"Ö", "keyword2"} - + keywords := []string{"Ö", "你好"} if err := api.AddKeywordsFile(fileName, "", keywords, nil); err != nil { t.Fatalf("%s add keywords: %v\n", msg, err) } - listKeywords(t, msg, fileName, keywords) - if err := api.RemoveKeywordsFile(fileName, "", []string{"keyword2"}, nil); err != nil { - t.Fatalf("%s remove 1 keyword: %v\n", msg, err) + keywords = []string{"world"} + if err := api.AddKeywordsFile(fileName, "", keywords, nil); err != nil { + t.Fatalf("%s add keywords: %v\n", msg, err) } + listKeywords(t, msg, fileName, []string{"Ö", "你好", "world"}) - listKeywords(t, msg, fileName, []string{"Ö"}) + if err := api.RemoveKeywordsFile(fileName, "", []string{"你好"}, nil); err != nil { + t.Fatalf("%s remove 1 keyword: %v\n", msg, err) + } + listKeywords(t, msg, fileName, []string{"Ö", "world"}) if err := api.RemoveKeywordsFile(fileName, "", nil, nil); err != nil { t.Fatalf("%s remove all keywords: %v\n", msg, err) diff --git a/pkg/api/test/merge_test.go b/pkg/api/test/merge_test.go index 07425b2b..4c7edfc4 100644 --- a/pkg/api/test/merge_test.go +++ b/pkg/api/test/merge_test.go @@ -64,8 +64,8 @@ func TestMergeCreateZipped(t *testing.T) { // The actual usecase for this is the recombination of 2 PDF files representing even and odd pages of some PDF source. // See #716 - inFile2 := filepath.Join(inDir, "adobe_errata.pdf") inFile1 := filepath.Join(inDir, "Acroforms2.pdf") + inFile2 := filepath.Join(inDir, "adobe_errata.pdf") outFile := filepath.Join(outDir, "out.pdf") if err := api.MergeCreateZipFile(inFile1, inFile2, outFile, nil); err != nil { diff --git a/pkg/api/test/page_test.go b/pkg/api/test/page_test.go index a6b1338e..f8896e3a 100644 --- a/pkg/api/test/page_test.go +++ b/pkg/api/test/page_test.go @@ -21,6 +21,8 @@ import ( "testing" "github.com/angel-one/pdfcpu/pkg/api" + "github.com/angel-one/pdfcpu/pkg/pdfcpu" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" ) func TestInsertRemovePages(t *testing.T) { @@ -34,7 +36,7 @@ func TestInsertRemovePages(t *testing.T) { } // Insert an empty page before pages 1 and 2. - if err := api.InsertPagesFile(inFile, outFile, []string{"-2"}, true, nil); err != nil { + if err := api.InsertPagesFile(inFile, outFile, []string{"-2"}, true, nil, nil); err != nil { t.Fatalf("%s %s: %v\n", msg, outFile, err) } if err := api.ValidateFile(outFile, nil); err != nil { @@ -63,3 +65,36 @@ func TestInsertRemovePages(t *testing.T) { t.Fatalf("%s %s: pageCount want:%d got:%d\n", msg, inFile, n1, n2) } } + +func TestInsertCustomBlankPage(t *testing.T) { + msg := "TestInsertCustomBlankPage" + inFile := filepath.Join(inDir, "Acroforms2.pdf") + outFile := filepath.Join(outDir, "test.pdf") + + selectedPages := []string{"2"} + + before := false + + pageConf, err := pdfcpu.ParsePageConfiguration("f:A5L", conf.Unit) + if err != nil { + t.Fatalf("%s %s: %v\n", msg, outFile, err) + } + + // Insert an empty A5 page in landscape mode after page 5. + if err := api.InsertPagesFile(inFile, outFile, selectedPages, before, pageConf, conf); err != nil { + t.Fatalf("%s %s: %v\n", msg, outFile, err) + } + + selectedPages = []string{"odd"} + + pageConf, err = pdfcpu.ParsePageConfiguration("dim:5 10", types.CENTIMETRES) + if err != nil { + t.Fatalf("%s %s: %v\n", msg, outFile, err) + } + + // Insert an empty page with dimensions 5 x 10 cm after every odd page. + if err := api.InsertPagesFile(inFile, outFile, selectedPages, before, pageConf, conf); err != nil { + t.Fatalf("%s %s: %v\n", msg, outFile, err) + } + +} diff --git a/pkg/api/test/property_test.go b/pkg/api/test/property_test.go index 3ef3b5ed..4c0851b6 100644 --- a/pkg/api/test/property_test.go +++ b/pkg/api/test/property_test.go @@ -83,18 +83,18 @@ func TestProperties(t *testing.T) { // # of properties must be 0 listProperties(t, msg, fileName, nil) - properties := map[string]string{"name1": "value1", "nameÖ": "valueö"} + properties := map[string]string{"name1": "value1", "nameÖ": "valueö", "cjkv": "你好"} if err := api.AddPropertiesFile(fileName, "", properties, nil); err != nil { t.Fatalf("%s add properties: %v\n", msg, err) } - listProperties(t, msg, fileName, []string{"name1 = value1", "nameÖ = valueö"}) + listProperties(t, msg, fileName, []string{"cjkv = 你好", "name1 = value1", "nameÖ = valueö"}) if err := api.RemovePropertiesFile(fileName, "", []string{"nameÖ"}, nil); err != nil { t.Fatalf("%s remove 1 property: %v\n", msg, err) } - listProperties(t, msg, fileName, []string{"name1 = value1"}) + listProperties(t, msg, fileName, []string{"cjkv = 你好", "name1 = value1"}) if err := api.RemovePropertiesFile(fileName, "", nil, nil); err != nil { t.Fatalf("%s remove all properties: %v\n", msg, err) diff --git a/pkg/api/test/selectPages_test.go b/pkg/api/test/selectPages_test.go index 92c4d5e2..1b367cfe 100644 --- a/pkg/api/test/selectPages_test.go +++ b/pkg/api/test/selectPages_test.go @@ -144,6 +144,7 @@ func TestSelectedPages(t *testing.T) { testSelectedPages("l,even", pageCount, "01011", t) testSelectedPages("1-l,!2-l-1", pageCount, "10001", t) + testSelectedPages("1-l,!2-l-1", pageCount, "10001", t) } func collectedPagesString(cp []int) string { @@ -188,7 +189,6 @@ func TestCollectedPages(t *testing.T) { testCollectedPages("3", pageCount, "[3]", t) testCollectedPages("4", pageCount, "[4]", t) testCollectedPages("5", pageCount, "[5]", t) - testCollectedPages("6", pageCount, "[]", t) testCollectedPages("-3", pageCount, "[1 2 3]", t) testCollectedPages("3-", pageCount, "[3 4 5]", t) @@ -201,18 +201,12 @@ func TestCollectedPages(t *testing.T) { testCollectedPages("5-7", pageCount, "[5]", t) testCollectedPages("4-", pageCount, "[4 5]", t) testCollectedPages("5-", pageCount, "[5]", t) - testCollectedPages("!4", pageCount, "[]", t) testCollectedPages("-l", pageCount, "[1 2 3 4 5]", t) testCollectedPages("-l-1", pageCount, "[1 2 3 4]", t) testCollectedPages("2-l", pageCount, "[2 3 4 5]", t) testCollectedPages("2-l-2", pageCount, "[2 3]", t) testCollectedPages("2-l-3", pageCount, "[2]", t) - testCollectedPages("2-l-4", pageCount, "[]", t) - testCollectedPages("!l", pageCount, "[]", t) - testCollectedPages("nl", pageCount, "[]", t) - testCollectedPages("!l-2", pageCount, "[]", t) - testCollectedPages("nl-2", pageCount, "[]", t) testCollectedPages("l", pageCount, "[5]", t) testCollectedPages("l-1", pageCount, "[4]", t) testCollectedPages("l-1-", pageCount, "[4 5]", t) @@ -226,6 +220,4 @@ func TestCollectedPages(t *testing.T) { testCollectedPages("1-,!l", pageCount, "[1 2 3 4]", t) testCollectedPages("1-,nl", pageCount, "[1 2 3 4]", t) - testSelectedPages("1-l,!2-l-1", pageCount, "10001", t) - } diff --git a/pkg/api/test/sign_test.go b/pkg/api/test/sign_test.go new file mode 100644 index 00000000..72316ba2 --- /dev/null +++ b/pkg/api/test/sign_test.go @@ -0,0 +1,107 @@ +/* +Copyright 2025 The pdf Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package test + +import ( + "fmt" + "path/filepath" + "testing" + + "github.com/angel-one/pdfcpu/pkg/api" +) + +func logResults(ss []string) { + for _, s := range ss { + fmt.Println(s) + } +} + +func TestValidateSignature_X509_RSA_SHA1(t *testing.T) { + msg := "ValidateSignature_X509_RSA_SHA1" + + // You may provide your signed PDFs in this dir. + dir := filepath.Join(samplesDir, "signatures", "adbe.x509.rsa_sha1") + + for _, fn := range AllPDFs(t, dir) { + inFile := filepath.Join(dir, fn) + fmt.Println("\nvalidate signatures of " + inFile) + all := true + full := false + ss, err := api.ValidateSignaturesFile(inFile, all, full, conf) + if err != nil { + t.Fatalf("%s: %v\n", msg, err) + } + logResults(ss) + } +} + +func TestValidateSignature_PKCS7_SHA1(t *testing.T) { + msg := "ValidateSignature_PKCS7_SHA1" + + // You may provide your signed PDFs in this dir. + dir := filepath.Join(samplesDir, "signatures", "adbe.pkcs7.sha1") + + for _, fn := range AllPDFs(t, dir) { + inFile := filepath.Join(dir, fn) + fmt.Println("validate signatures of " + inFile) + all := true + full := false + ss, err := api.ValidateSignaturesFile(inFile, all, full, conf) + if err != nil { + t.Fatalf("%s: %v\n", msg, err) + } + logResults(ss) + } +} + +func TestValidateSignature_PKCS7_Detached(t *testing.T) { + msg := "ValidateSignature_PKCS7_Detached" + + // You may provide your signed PDFs in this dir. + dir := filepath.Join(samplesDir, "signatures", "adbe.pkcs7.detached") + + for _, fn := range AllPDFs(t, dir) { + inFile := filepath.Join(dir, fn) + fmt.Println("\nvalidate signatures of " + inFile) + all := true + full := true + ss, err := api.ValidateSignaturesFile(inFile, all, full, conf) + if err != nil { + t.Fatalf("%s: %v\n", msg, err) + } + logResults(ss) + } +} + +func TestValidateSignature_ETSI_CAdES_Detached(t *testing.T) { + msg := "ValidateSignature_ETSI_CAdES_Detached" + + // You may provide your signed PDFs in this dir. + dir := filepath.Join(samplesDir, "signatures", "ETSI.CAdES.detached") + + for _, fn := range AllPDFs(t, dir) { + inFile := filepath.Join(dir, fn) + fmt.Println("\nvalidate signatures of " + inFile) + all := true + full := true + ss, err := api.ValidateSignaturesFile(inFile, all, full, conf) + if err != nil { + t.Fatalf("%s: %v\n", msg, err) + } + logResults(ss) + } +} diff --git a/pkg/api/test/stampVersatile_test.go b/pkg/api/test/stampVersatile_test.go index 3cafe061..57f415a1 100644 --- a/pkg/api/test/stampVersatile_test.go +++ b/pkg/api/test/stampVersatile_test.go @@ -77,7 +77,7 @@ func TestAlternatingPageNumbersViaWatermarkMap(t *testing.T) { t.Fatalf("%s %s: %v\n", msg, outFile, err) } - // Add a "Draft" stamp with opacity 0.6 along the 1st diagonale in light blue using Courier. + // Add a "Draft" stamp with opacity 0.6 along the 1st diagonal in light blue using Courier. if err := api.AddTextWatermarksFile(outFile, outFile, nil, true, "Draft", "fo:Courier, scale:.9, fillcol:#00aacc, op:.6", nil); err != nil { t.Fatalf("%s %s: %v\n", msg, outFile, err) } @@ -134,7 +134,7 @@ func TestAlternatingPageNumbersViaWatermarkMapLowLevel(t *testing.T) { t.Fatalf("%s %s: %v\n", msg, outFile, err) } - // Add a "Draft" stamp with opacity 0.6 along the 1st diagonale in light blue using Courier. + // Add a "Draft" stamp with opacity 0.6 along the 1st diagonal in light blue using Courier. wm, err = api.TextWatermark("Draft", "fo:Courier, scale:.9, fillcol:#00aacc, op:.6", true, false, unit) if err != nil { t.Fatalf("%s %s: %v\n", msg, outFile, err) @@ -204,7 +204,7 @@ func TestAlternatingPageNumbersViaWatermarkSliceMap(t *testing.T) { wms = append(wms, wm) // 3rd watermark on page - // Add a "Draft" stamp with opacity 0.6 along the 1st diagonale in light blue using Courier. + // Add a "Draft" stamp with opacity 0.6 along the 1st diagonal in light blue using Courier. text = "Draft" desc = fmt.Sprintf("fo:Courier, scale:.9, fillcol:#00aacc, op:%f", opacity) wm, err = api.TextWatermark(text, desc, onTop, update, unit) diff --git a/pkg/api/test/stamp_test.go b/pkg/api/test/stamp_test.go index 99be4423..763420f0 100644 --- a/pkg/api/test/stamp_test.go +++ b/pkg/api/test/stamp_test.go @@ -540,8 +540,8 @@ func hasWatermarks(inFile string, t *testing.T) bool { return ok } -func TestStampingLifecyle(t *testing.T) { - msg := "TestStampingLifecyle" +func TestStampingLifecycle(t *testing.T) { + msg := "TestStampingLifecycle" inFile := filepath.Join(inDir, "Acroforms2.pdf") outFile := filepath.Join(outDir, "stampLC.pdf") onTop := true // we are testing stamps diff --git a/pkg/api/validate.go b/pkg/api/validate.go index 4df1c59b..8d2bd4db 100644 --- a/pkg/api/validate.go +++ b/pkg/api/validate.go @@ -23,6 +23,7 @@ import ( "time" "github.com/angel-one/pdfcpu/pkg/log" + "github.com/angel-one/pdfcpu/pkg/pdfcpu" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/pkg/errors" ) @@ -56,6 +57,15 @@ func Validate(rs io.ReadSeeker, conf *model.Configuration) error { err = errors.Wrap(err, fmt.Sprintf("validation error (obj#:%d)%s", ctx.CurObj, s)) } + if err == nil { + if conf.Optimize { + if log.CLIEnabled() { + log.CLI.Println("optimizing...") + } + err = pdfcpu.OptimizeXRefTable(ctx) + } + } + dur2 := time.Since(from2).Seconds() dur := time.Since(from1).Seconds() diff --git a/pkg/api/viewerPreferences.go b/pkg/api/viewerPreferences.go index b43f6c4e..c3a0370d 100644 --- a/pkg/api/viewerPreferences.go +++ b/pkg/api/viewerPreferences.go @@ -48,7 +48,7 @@ func ViewerPreferences(rs io.ReadSeeker, conf *model.Configuration) (*model.View return nil, nil, err } - v := ctx.Version() + v := ctx.XRefTable.Version() return ctx.ViewerPref, &v, nil } @@ -98,7 +98,7 @@ func ListViewerPreferences(rs io.ReadSeeker, all bool, conf *model.Configuration return []string{"No viewer preferences available."}, nil } - vp1, err := model.ViewerPreferencesWithDefaults(ctx.ViewerPref, ctx.Version()) + vp1, err := model.ViewerPreferencesWithDefaults(ctx.ViewerPref, ctx.XRefTable.Version()) if err != nil { return nil, err } @@ -183,7 +183,7 @@ func SetViewerPreferences(rs io.ReadSeeker, w io.Writer, vp model.ViewerPreferen return err } - version := ctx.Version() + version := ctx.XRefTable.Version() if err := vp.Validate(version); err != nil { return err diff --git a/pkg/cli/cli.go b/pkg/cli/cli.go index c81bc003..b0b8c4b2 100644 --- a/pkg/cli/cli.go +++ b/pkg/cli/cli.go @@ -115,7 +115,7 @@ func InsertPages(cmd *Command) ([]string, error) { if cmd.Mode == model.INSERTPAGESAFTER { before = false } - return nil, api.InsertPagesFile(*cmd.InFile, *cmd.OutFile, cmd.PageSelection, before, cmd.Conf) + return nil, api.InsertPagesFile(*cmd.InFile, *cmd.OutFile, cmd.PageSelection, before, cmd.PageConf, cmd.Conf) } // RemovePages removes selected pages. @@ -185,7 +185,7 @@ func ExtractAttachments(cmd *Command) ([]string, error) { // ListInfo gathers information about inFile and returns the result as []string. func ListInfo(cmd *Command) ([]string, error) { - return ListInfoFiles(cmd.InFiles, cmd.PageSelection, cmd.BoolVal1, cmd.Conf) + return ListInfoFiles(cmd.InFiles, cmd.PageSelection, cmd.BoolVal1, cmd.BoolVal2, cmd.Conf) } // CreateCheatSheetsFonts creates single page PDF cheat sheets for user fonts in current dir. @@ -275,6 +275,24 @@ func ListImages(cmd *Command) ([]string, error) { return ListImagesFile(cmd.InFiles, cmd.PageSelection, cmd.Conf) } +// UpdateImages replaces image objects. +func UpdateImages(cmd *Command) ([]string, error) { + var ( + objNr int + pageNr int + id string + ) + if cmd.IntVal > 0 { + if cmd.StringVal != "" { + pageNr = cmd.IntVal + id = cmd.StringVal + } else { + objNr = cmd.IntVal + } + } + return nil, api.UpdateImagesFile(cmd.InFiles[0], cmd.InFiles[1], *cmd.OutFile, objNr, pageNr, id, cmd.Conf) +} + // Dump known object to stdout. func Dump(cmd *Command) ([]string, error) { mode := cmd.IntVals[0] @@ -422,3 +440,23 @@ func ResetViewerPreferences(cmd *Command) ([]string, error) { func Zoom(cmd *Command) ([]string, error) { return nil, api.ZoomFile(*cmd.InFile, *cmd.OutFile, cmd.PageSelection, cmd.Zoom, cmd.Conf) } + +// ListCertificates returns installed certificates. +func ListCertificates(cmd *Command) ([]string, error) { + return ListCertificatesAll(cmd.BoolVal1, cmd.Conf) +} + +// ListCertificates returns installed certificates. +func ImportCertificates(cmd *Command) ([]string, error) { + return api.ImportCertificates(cmd.InFiles) +} + +// InspectCertificates prints the certificate details. +func InspectCertificates(cmd *Command) ([]string, error) { + return api.InspectCertificates(cmd.InFiles) +} + +// ValidateSignatures validates contained digital signatures. +func ValidateSignatures(cmd *Command) ([]string, error) { + return api.ValidateSignaturesFile(*cmd.InFile, cmd.BoolVal1, cmd.BoolVal2, cmd.Conf) +} diff --git a/pkg/cli/cmd.go b/pkg/cli/cmd.go index e67561c4..bbb51e16 100644 --- a/pkg/cli/cmd.go +++ b/pkg/cli/cmd.go @@ -55,6 +55,7 @@ type Command struct { Zoom *model.Zoom Watermark *model.Watermark ViewerPreferences *model.ViewerPreferences + PageConf *pdfcpu.PageConfiguration Conf *model.Configuration } @@ -110,6 +111,7 @@ var cmdMap = map[model.CommandMode]func(cmd *Command) ([]string, error){ model.LISTANNOTATIONS: processPageAnnotations, model.REMOVEANNOTATIONS: processPageAnnotations, model.LISTIMAGES: processImages, + model.UPDATEIMAGES: processImages, model.DUMP: Dump, model.CREATE: Create, model.LISTFORMFIELDS: processForm, @@ -138,6 +140,10 @@ var cmdMap = map[model.CommandMode]func(cmd *Command) ([]string, error){ model.SETVIEWERPREFERENCES: processViewerPreferences, model.RESETVIEWERPREFERENCES: processViewerPreferences, model.ZOOM: Zoom, + model.LISTCERTIFICATES: processCertificates, + model.INSPECTCERTIFICATES: processCertificates, + model.IMPORTCERTIFICATES: processCertificates, + model.VALIDATESIGNATURES: processSignatures, } // ValidateCommand creates a new command to validate a file. @@ -515,7 +521,7 @@ func ImportImagesCommand(imageFiles []string, outFile string, imp *pdfcpu.Import } // InsertPagesCommand creates a new command to insert a blank page before or after selected pages. -func InsertPagesCommand(inFile, outFile string, pageSelection []string, conf *model.Configuration, mode string) *Command { +func InsertPagesCommand(inFile, outFile string, pageSelection []string, conf *model.Configuration, mode string, pageConf *pdfcpu.PageConfiguration) *Command { if conf == nil { conf = model.NewDefaultConfiguration() } @@ -529,6 +535,7 @@ func InsertPagesCommand(inFile, outFile string, pageSelection []string, conf *mo InFile: &inFile, OutFile: &outFile, PageSelection: pageSelection, + PageConf: pageConf, Conf: conf} } @@ -592,7 +599,7 @@ func BookletCommand(inFiles []string, outFile string, pageSelection []string, nu } // InfoCommand creates a new command to output information about inFile. -func InfoCommand(inFiles []string, pageSelection []string, json bool, conf *model.Configuration) *Command { +func InfoCommand(inFiles []string, pageSelection []string, fonts, json bool, conf *model.Configuration) *Command { if conf == nil { conf = model.NewDefaultConfiguration() } @@ -601,7 +608,8 @@ func InfoCommand(inFiles []string, pageSelection []string, json bool, conf *mode Mode: model.LISTINFO, InFiles: inFiles, PageSelection: pageSelection, - BoolVal1: json, + BoolVal1: fonts, + BoolVal2: json, Conf: conf} } @@ -835,6 +843,22 @@ func ListImagesCommand(inFiles []string, pageSelection []string, conf *model.Con Conf: conf} } +// UpdateImagesCommand creates a new command to update images. +func UpdateImagesCommand(inFile, imageFile, outFile string, objNrOrPageNr int, id string, conf *model.Configuration) *Command { + if conf == nil { + conf = model.NewDefaultConfiguration() + } + conf.Cmd = model.UPDATEIMAGES + + return &Command{ + Mode: model.UPDATEIMAGES, + InFiles: []string{inFile, imageFile}, + OutFile: &outFile, + IntVal: objNrOrPageNr, + StringVal: id, + Conf: conf} +} + // DumpCommand creates a new command to dump objects on stdout. func DumpCommand(inFilePDF string, vals []int, conf *model.Configuration) *Command { if conf == nil { @@ -1226,3 +1250,53 @@ func ZoomCommand(inFile, outFile string, pageSelection []string, zoom *model.Zoo Zoom: zoom, Conf: conf} } + +// ListCertificatesCommand creates a new command to list installed certificates. +func ListCertificatesCommand(json bool, conf *model.Configuration) *Command { + if conf == nil { + conf = model.NewDefaultConfiguration() + } + conf.Cmd = model.LISTCERTIFICATES + return &Command{ + Mode: model.LISTCERTIFICATES, + BoolVal1: json, + Conf: conf} +} + +// InspectCertificatesCommand creates a new command to inspect certificates. +func InspectCertificatesCommand(inFiles []string, conf *model.Configuration) *Command { + if conf == nil { + conf = model.NewDefaultConfiguration() + } + conf.Cmd = model.INSPECTCERTIFICATES + return &Command{ + Mode: model.INSPECTCERTIFICATES, + InFiles: inFiles, + Conf: conf} +} + +// ImportCertificatesCommand creates a new command to import certificates. +func ImportCertificatesCommand(inFiles []string, conf *model.Configuration) *Command { + if conf == nil { + conf = model.NewDefaultConfiguration() + } + conf.Cmd = model.IMPORTCERTIFICATES + return &Command{ + Mode: model.IMPORTCERTIFICATES, + InFiles: inFiles, + Conf: conf} +} + +// ValidateSignaturesCommand creates a new command to validate encountered digital signatures. +func ValidateSignaturesCommand(inFile string, all, full bool, conf *model.Configuration) *Command { + if conf == nil { + conf = model.NewDefaultConfiguration() + } + conf.Cmd = model.VALIDATESIGNATURES + return &Command{ + Mode: model.VALIDATESIGNATURES, + InFile: &inFile, + BoolVal1: all, + BoolVal2: full, + Conf: conf} +} diff --git a/pkg/cli/list.go b/pkg/cli/list.go index 07d08f7d..502b30d8 100644 --- a/pkg/cli/list.go +++ b/pkg/cli/list.go @@ -18,13 +18,17 @@ limitations under the License. package cli import ( + "crypto/x509" "encoding/json" + "encoding/pem" "fmt" "io" "math" "os" + "path/filepath" "sort" "strconv" + "strings" "time" "github.com/angel-one/pdfcpu/pkg/api" @@ -33,6 +37,7 @@ import ( "github.com/angel-one/pdfcpu/pkg/pdfcpu/form" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" + "github.com/hhrutter/pkcs7" "github.com/pkg/errors" ) @@ -211,7 +216,7 @@ func listImages(rs io.ReadSeeker, selectedPages []string, conf *model.Configurat } conf.Cmd = model.LISTIMAGES - ctx, err := api.ReadAndValidate(rs, conf) + ctx, err := api.ReadValidateAndOptimize(rs, conf) if err != nil { return nil, err } @@ -260,14 +265,14 @@ func ListImagesFile(inFiles []string, selectedPages []string, conf *model.Config } // ListInfoFile returns formatted information about inFile. -func ListInfoFile(inFile string, selectedPages []string, conf *model.Configuration) ([]string, error) { +func ListInfoFile(inFile string, selectedPages []string, fonts bool, conf *model.Configuration) ([]string, error) { f, err := os.Open(inFile) if err != nil { return nil, err } defer f.Close() - info, err := api.PDFInfo(f, inFile, selectedPages, conf) + info, err := api.PDFInfo(f, inFile, selectedPages, fonts, conf) if err != nil { return nil, err } @@ -277,7 +282,7 @@ func ListInfoFile(inFile string, selectedPages []string, conf *model.Configurati return nil, err } - ss, err := pdfcpu.ListInfo(info, pages) + ss, err := pdfcpu.ListInfo(info, pages, fonts) if err != nil { return nil, err } @@ -352,7 +357,7 @@ func jsonInfo(info *pdfcpu.PDFInfo, pages types.IntSet) (map[string]model.PageBo return nil, dims } -func listInfoFilesJSON(inFiles []string, selectedPages []string, conf *model.Configuration) ([]string, error) { +func listInfoFilesJSON(inFiles []string, selectedPages []string, fonts bool, conf *model.Configuration) ([]string, error) { var infos []*pdfcpu.PDFInfo for _, fn := range inFiles { @@ -363,7 +368,7 @@ func listInfoFilesJSON(inFiles []string, selectedPages []string, conf *model.Con } defer f.Close() - info, err := api.PDFInfo(f, fn, selectedPages, conf) + info, err := api.PDFInfo(f, fn, selectedPages, fonts, conf) if err != nil { return nil, err } @@ -395,10 +400,10 @@ func listInfoFilesJSON(inFiles []string, selectedPages []string, conf *model.Con } // ListInfoFiles returns formatted information about inFiles. -func ListInfoFiles(inFiles []string, selectedPages []string, json bool, conf *model.Configuration) ([]string, error) { +func ListInfoFiles(inFiles []string, selectedPages []string, fonts, json bool, conf *model.Configuration) ([]string, error) { if json { - return listInfoFilesJSON(inFiles, selectedPages, conf) + return listInfoFilesJSON(inFiles, selectedPages, fonts, conf) } var ss []string @@ -407,7 +412,7 @@ func ListInfoFiles(inFiles []string, selectedPages []string, json bool, conf *mo if i > 0 { ss = append(ss, "") } - ssx, err := ListInfoFile(fn, selectedPages, conf) + ssx, err := ListInfoFile(fn, selectedPages, fonts, conf) if err != nil { if len(inFiles) == 1 { return nil, err @@ -446,10 +451,6 @@ func listPermissions(rs io.ReadSeeker, conf *model.Configuration) ([]string, err return nil, err } - if ctx.Version() == model.V20 { - return nil, pdfcpu.ErrUnsupportedVersion - } - return pdfcpu.Permissions(ctx), nil } @@ -545,3 +546,128 @@ func ListBookmarksFile(inFile string, conf *model.Configuration) ([]string, erro return listBookmarks(f, conf) } + +func listPEM(fName string) (int, error) { + bb, err := os.ReadFile(fName) + if err != nil { + fmt.Printf("%v\n", err) + return 0, err + } + + if len(bb) == 0 { + //return 0, errors.Errorf("%s is empty\n", filepath.Base(fName)) + return 0, errors.New("is empty\n") + } + + ss := []string{} + for len(bb) > 0 { + var block *pem.Block + block, bb = pem.Decode(bb) + if block == nil { + break + } + if block.Type != "CERTIFICATE" || len(block.Headers) != 0 { + continue + } + + certBytes := block.Bytes + cert, err := x509.ParseCertificate(certBytes) + if err != nil { + fmt.Printf("%v\n", err) + continue + } + ss = append(ss, model.CertString(cert)) + } + + sort.Strings(ss) + for i, s := range ss { + fmt.Printf("%03d:\n%s\n", i+1, s) + } + + return len(ss), nil +} + +func listP7C(fName string) (int, error) { + bb, err := os.ReadFile(fName) + if err != nil { + fmt.Printf("%v\n", err) + return 0, err + } + + if len(bb) == 0 { + //return 0, errors.Errorf("%s is empty\n", filepath.Base(fName)) + return 0, errors.New("is empty\n") + } + + // // Check if the data starts with PEM markers (for Base64 encoding) + // if isPEM(data) { + // // If the file is Base64 encoded (PEM format), decode it + // decodedData, err := base64.StdEncoding.DecodeString(string(data)) + // if err != nil { + // log.Fatalf("Error decoding Base64: %v", err) + // } + // data = decodedData + // } + + p7, err := pkcs7.Parse(bb) + if err != nil { + return 0, err + } + + ss := []string{} + for _, cert := range p7.Certificates { + ss = append(ss, model.CertString(cert)) + } + + sort.Strings(ss) + for i, s := range ss { + fmt.Printf("%03d:\n%s\n", i+1, s) + } + + return len(ss), nil +} + +// ListCertificatesAll returns formatted information about installed certificates. +func ListCertificatesAll(json bool, conf *model.Configuration) ([]string, error) { + // Process *.pem and *.p7c + fmt.Printf("certDir: %s\n", model.CertDir) + + if err := os.MkdirAll(model.CertDir, os.ModePerm); err != nil { + return nil, err + } + + count := 0 + + err := filepath.WalkDir(model.CertDir, func(path string, d os.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + if !model.IsPEM(path) && !model.IsP7C(path) { + return nil + } + + fmt.Printf("\n%s:\n", strings.TrimPrefix(path, model.CertDir)) + + if model.IsPEM(path) { + c, err := listPEM(path) + if err != nil { + fmt.Printf("%v\n", err) + } + count += c + return nil + } + c, err := listP7C(path) + if err != nil { + fmt.Printf("%v\n", err) + } + count += c + return nil + }) + + fmt.Printf("total installed certs: %d\n", count) + + return nil, err +} diff --git a/pkg/cli/process.go b/pkg/cli/process.go index ed17c263..f9d65d9e 100644 --- a/pkg/cli/process.go +++ b/pkg/cli/process.go @@ -38,19 +38,6 @@ func Process(cmd *Command) (out []string, err error) { return nil, errors.Errorf("pdfcpu: process: Unknown command mode %d\n", cmd.Mode) } -func processPageAnnotations(cmd *Command) (out []string, err error) { - switch cmd.Mode { - - case model.LISTANNOTATIONS: - out, err = ListAnnotations(cmd) - - case model.REMOVEANNOTATIONS: - out, err = RemoveAnnotations(cmd) - } - - return out, err -} - func processAttachments(cmd *Command) (out []string, err error) { switch cmd.Mode { @@ -70,38 +57,23 @@ func processAttachments(cmd *Command) (out []string, err error) { return out, err } -func processKeywords(cmd *Command) (out []string, err error) { - switch cmd.Mode { - - case model.LISTKEYWORDS: - out, err = ListKeywords(cmd) - - case model.ADDKEYWORDS: - out, err = AddKeywords(cmd) - - case model.REMOVEKEYWORDS: - out, err = RemoveKeywords(cmd) - - } - - return out, err -} - -func processProperties(cmd *Command) (out []string, err error) { +func processBookmarks(cmd *Command) (out []string, err error) { switch cmd.Mode { - case model.LISTPROPERTIES: - out, err = ListProperties(cmd) + case model.LISTBOOKMARKS: + return ListBookmarks(cmd) - case model.ADDPROPERTIES: - out, err = AddProperties(cmd) + case model.EXPORTBOOKMARKS: + return ExportBookmarks(cmd) - case model.REMOVEPROPERTIES: - out, err = RemoveProperties(cmd) + case model.IMPORTBOOKMARKS: + return ImportBookmarks(cmd) + case model.REMOVEBOOKMARKS: + return RemoveBookmarks(cmd) } - return out, err + return nil, nil } func processEncryption(cmd *Command) (out []string, err error) { @@ -123,46 +95,32 @@ func processEncryption(cmd *Command) (out []string, err error) { return nil, nil } -func processPermissions(cmd *Command) (out []string, err error) { - switch cmd.Mode { - - case model.LISTPERMISSIONS: - return ListPermissions(cmd) - - case model.SETPERMISSIONS: - return SetPermissions(cmd) - } - - return nil, nil -} - -func processPages(cmd *Command) (out []string, err error) { +func processForm(cmd *Command) (out []string, err error) { switch cmd.Mode { - case model.INSERTPAGESBEFORE, model.INSERTPAGESAFTER: - return InsertPages(cmd) + case model.LISTFORMFIELDS: + return ListFormFields(cmd) - case model.REMOVEPAGES: - return RemovePages(cmd) - } + case model.REMOVEFORMFIELDS: + return RemoveFormFields(cmd) - return nil, nil -} + case model.LOCKFORMFIELDS: + return LockFormFields(cmd) -func processPageBoundaries(cmd *Command) (out []string, err error) { - switch cmd.Mode { + case model.UNLOCKFORMFIELDS: + return UnlockFormFields(cmd) - case model.LISTBOXES: - return ListBoxes(cmd) + case model.RESETFORMFIELDS: + return ResetFormFields(cmd) - case model.ADDBOXES: - return AddBoxes(cmd) + case model.EXPORTFORMFIELDS: + return ExportFormFields(cmd) - case model.REMOVEBOXES: - return RemoveBoxes(cmd) + case model.FILLFORMFIELDS: + return FillFormFields(cmd) - case model.CROP: - return Crop(cmd) + case model.MULTIFILLFORMFIELDS: + return MultiFillFormFields(cmd) } return nil, nil @@ -173,56 +131,58 @@ func processImages(cmd *Command) (out []string, err error) { case model.LISTIMAGES: return ListImages(cmd) + + case model.UPDATEIMAGES: + return UpdateImages(cmd) } return nil, nil } -func processForm(cmd *Command) (out []string, err error) { +func processKeywords(cmd *Command) (out []string, err error) { switch cmd.Mode { - case model.LISTFORMFIELDS: - return ListFormFields(cmd) + case model.LISTKEYWORDS: + out, err = ListKeywords(cmd) - case model.REMOVEFORMFIELDS: - return RemoveFormFields(cmd) + case model.ADDKEYWORDS: + out, err = AddKeywords(cmd) - case model.LOCKFORMFIELDS: - return LockFormFields(cmd) + case model.REMOVEKEYWORDS: + out, err = RemoveKeywords(cmd) - case model.UNLOCKFORMFIELDS: - return UnlockFormFields(cmd) + } - case model.RESETFORMFIELDS: - return ResetFormFields(cmd) + return out, err +} - case model.EXPORTFORMFIELDS: - return ExportFormFields(cmd) +func processPageAnnotations(cmd *Command) (out []string, err error) { + switch cmd.Mode { - case model.FILLFORMFIELDS: - return FillFormFields(cmd) + case model.LISTANNOTATIONS: + out, err = ListAnnotations(cmd) - case model.MULTIFILLFORMFIELDS: - return MultiFillFormFields(cmd) + case model.REMOVEANNOTATIONS: + out, err = RemoveAnnotations(cmd) } - return nil, nil + return out, err } -func processBookmarks(cmd *Command) (out []string, err error) { +func processPageBoundaries(cmd *Command) (out []string, err error) { switch cmd.Mode { - case model.LISTBOOKMARKS: - return ListBookmarks(cmd) + case model.LISTBOXES: + return ListBoxes(cmd) - case model.EXPORTBOOKMARKS: - return ExportBookmarks(cmd) + case model.ADDBOXES: + return AddBoxes(cmd) - case model.IMPORTBOOKMARKS: - return ImportBookmarks(cmd) + case model.REMOVEBOXES: + return RemoveBoxes(cmd) - case model.REMOVEBOOKMARKS: - return RemoveBookmarks(cmd) + case model.CROP: + return Crop(cmd) } return nil, nil @@ -260,6 +220,49 @@ func processPageMode(cmd *Command) (out []string, err error) { return nil, nil } +func processPages(cmd *Command) (out []string, err error) { + switch cmd.Mode { + + case model.INSERTPAGESBEFORE, model.INSERTPAGESAFTER: + return InsertPages(cmd) + + case model.REMOVEPAGES: + return RemovePages(cmd) + } + + return nil, nil +} + +func processPermissions(cmd *Command) (out []string, err error) { + switch cmd.Mode { + + case model.LISTPERMISSIONS: + return ListPermissions(cmd) + + case model.SETPERMISSIONS: + return SetPermissions(cmd) + } + + return nil, nil +} + +func processProperties(cmd *Command) (out []string, err error) { + switch cmd.Mode { + + case model.LISTPROPERTIES: + out, err = ListProperties(cmd) + + case model.ADDPROPERTIES: + out, err = AddProperties(cmd) + + case model.REMOVEPROPERTIES: + out, err = RemoveProperties(cmd) + + } + + return out, err +} + func processViewerPreferences(cmd *Command) (out []string, err error) { switch cmd.Mode { @@ -275,3 +278,30 @@ func processViewerPreferences(cmd *Command) (out []string, err error) { return nil, nil } + +func processCertificates(cmd *Command) (out []string, err error) { + switch cmd.Mode { + + case model.LISTCERTIFICATES: + return ListCertificates(cmd) + + case model.INSPECTCERTIFICATES: + return InspectCertificates(cmd) + + case model.IMPORTCERTIFICATES: + return ImportCertificates(cmd) + + } + + return nil, nil +} + +func processSignatures(cmd *Command) (out []string, err error) { + switch cmd.Mode { + + case model.VALIDATESIGNATURES: + return ValidateSignatures(cmd) + } + + return nil, nil +} diff --git a/pkg/cli/test/certificate_test.go b/pkg/cli/test/certificate_test.go new file mode 100644 index 00000000..25ac462f --- /dev/null +++ b/pkg/cli/test/certificate_test.go @@ -0,0 +1,32 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package test + +import ( + "testing" + + "github.com/angel-one/pdfcpu/pkg/cli" +) + +func TestListCertificates(t *testing.T) { + msg := "TestListCertificates" + + cmd := cli.ListCertificatesCommand(false, conf) + if _, err := cli.Process(cmd); err != nil { + t.Fatalf("%s: %v\n", msg, err) + } +} diff --git a/pkg/cli/test/cli_test.go b/pkg/cli/test/cli_test.go index 543d88d0..84ced87b 100644 --- a/pkg/cli/test/cli_test.go +++ b/pkg/cli/test/cli_test.go @@ -153,7 +153,7 @@ func TestInfoCommand(t *testing.T) { msg := "TestInfoCommand" inFile := filepath.Join(inDir, "5116.DCT_Filter.pdf") - cmd := cli.InfoCommand([]string{inFile}, nil, true, conf) + cmd := cli.InfoCommand([]string{inFile}, nil, true, true, conf) if _, err := cli.Process(cmd); err != nil { t.Fatalf("%s: %v\n", msg, err) } diff --git a/pkg/cli/test/images_test.go b/pkg/cli/test/images_test.go new file mode 100644 index 00000000..4150994e --- /dev/null +++ b/pkg/cli/test/images_test.go @@ -0,0 +1,120 @@ +/* +Copyright 2024 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package test + +import ( + "path/filepath" + "testing" + + "github.com/angel-one/pdfcpu/pkg/cli" +) + +func testUpdateImages(t *testing.T, msg string, inFile, imgFile, outFile string, objNrOrPageNr int, id string) { + t.Helper() + + cmd := cli.UpdateImagesCommand(inFile, imgFile, outFile, objNrOrPageNr, id, conf) + if _, err := cli.Process(cmd); err != nil { + t.Fatalf("%s %s: %v\n", msg, inFile, err) + } + + if err := validateFile(t, outFile, conf); err != nil { + t.Fatalf("%s: %v\n", msg, err) + } +} + +func TestUpdateImages(t *testing.T) { + inDir := filepath.Join(samplesDir, "images") + + for _, tt := range []struct { + msg string + inFile string + imgFile string + outFile string + objNrOrPageNr int + id string + }{ + {"TestUpdateByObjNr", + "test.pdf", + "test_1_Im1.png", + "ImageUpdatedByObjNr.pdf", + 8, + ""}, + + {"TestUpdateByPageNrAndId", + "test.pdf", + "test_1_Im1.png", + "imageUpdatedByPageNrAndIdPage1.pdf", + 1, + "Im1"}, + + {"TestUpdateByPageNrAndId", + "test.pdf", + "test_1_Im1.png", + "imageUpdatedByPageNrAndIdPage2.pdf", + 2, + "Im1"}, + + {"TestUpdateByImageFileName", + "test.pdf", + "test_1_Im1.png", + "imageUpdatedByFileName.pdf", + 0, + ""}, + + {"TestUpdateByPageNrAndId", + "test.pdf", + "any.png", + "imageUpdatedByPageNrAndIdAny.pdf", + 1, + "Im1"}, + + {"TestUpdateByObjNrPNG", + "test.pdf", + "any.png", + "imageUpdatedByObjNrPNG.pdf", + 8, + ""}, + + {"TestUpdateByObjNrJPG", + "test.pdf", + "any.jpg", + "imageUpdatedByObjNrJPG.pdf", + 8, + ""}, + + {"TestUpdateByObjNrTIFF", + "test.pdf", + "any.tiff", + "imageUpdatedByObjNrTIFF.pdf", + 8, + ""}, + + {"TestUpdateByObjNrWEBP", + "test.pdf", + "any.webp", + "imageUpdatedByObjNrWEBP.pdf", + 8, + ""}, + } { + testUpdateImages(t, tt.msg, + filepath.Join(inDir, tt.inFile), + filepath.Join(inDir, tt.imgFile), + filepath.Join(outDir, tt.outFile), + tt.objNrOrPageNr, + tt.id) + } +} diff --git a/pkg/cli/test/keyword_test.go b/pkg/cli/test/keyword_test.go index ce8fc799..0a0c83c9 100644 --- a/pkg/cli/test/keyword_test.go +++ b/pkg/cli/test/keyword_test.go @@ -21,6 +21,7 @@ import ( "testing" "github.com/angel-one/pdfcpu/pkg/cli" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" ) func listKeywords(t *testing.T, msg, fileName string, want []string) []string { @@ -33,8 +34,9 @@ func listKeywords(t *testing.T, msg, fileName string, want []string) []string { if len(got) != len(want) { t.Fatalf("%s: list keywords %s: want %d got %d\n", msg, fileName, len(want), len(got)) } - for i, v := range got { - if v != want[i] { + + for _, v := range got { + if !types.MemberOf(v, want) { t.Fatalf("%s: list keywords %s: want %v got %v\n", msg, fileName, want, got) } } diff --git a/pkg/cli/test/nup_test.go b/pkg/cli/test/nup_test.go index a769b65f..c2ce1749 100644 --- a/pkg/cli/test/nup_test.go +++ b/pkg/cli/test/nup_test.go @@ -90,7 +90,7 @@ func TestNUpCommand(t *testing.T) { }, filepath.Join(outDir, "out1.pdf"), nil, - "form:Tabloid, bo:off, ma:0", + "form:Tabloid, bo:off, ma:0, enforce:off", "points", 6, true}, diff --git a/pkg/cli/test/page_test.go b/pkg/cli/test/page_test.go index 796eac05..5d777de1 100644 --- a/pkg/cli/test/page_test.go +++ b/pkg/cli/test/page_test.go @@ -35,7 +35,7 @@ func TestPagesCommand(t *testing.T) { } // Insert an empty page before pages 1 and 2. - cmd := cli.InsertPagesCommand(inFile, outFile, []string{"-2"}, conf, "before") + cmd := cli.InsertPagesCommand(inFile, outFile, []string{"-2"}, conf, "before", nil) if _, err := cli.Process(cmd); err != nil { t.Fatalf("%s %s: %v\n", msg, outFile, err) } diff --git a/pkg/cli/test/stamp_test.go b/pkg/cli/test/stamp_test.go index 713b4fe1..65f67c67 100644 --- a/pkg/cli/test/stamp_test.go +++ b/pkg/cli/test/stamp_test.go @@ -144,8 +144,8 @@ func TestAddWatermarks(t *testing.T) { } } -func TestStampingLifecyle(t *testing.T) { - msg := "TestStampingLifecyle" +func TestStampingLifecycle(t *testing.T) { + msg := "TestStampingLifecycle" inFile := filepath.Join(inDir, "Acroforms2.pdf") outFile := filepath.Join(outDir, "stampLC.pdf") onTop := true // we are testing stamps diff --git a/pkg/filter/ascii85Decode.go b/pkg/filter/ascii85Decode.go index af1d883f..3f195e07 100644 --- a/pkg/filter/ascii85Decode.go +++ b/pkg/filter/ascii85Decode.go @@ -48,6 +48,10 @@ func (f ascii85Decode) Encode(r io.Reader) (io.Reader, error) { // Decode implements decoding for an ASCII85Decode filter. func (f ascii85Decode) Decode(r io.Reader) (io.Reader, error) { + return f.DecodeLength(r, -1) +} + +func (f ascii85Decode) DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) { bb, err := getReaderBytes(r) if err != nil { @@ -71,8 +75,14 @@ func (f ascii85Decode) Decode(r io.Reader) (io.Reader, error) { decoder := ascii85.NewDecoder(bytes.NewReader(bb)) var b2 bytes.Buffer - if _, err := io.Copy(&b2, decoder); err != nil { - return nil, err + if maxLen < 0 { + if _, err := io.Copy(&b2, decoder); err != nil { + return nil, err + } + } else { + if _, err := io.CopyN(&b2, decoder, maxLen); err != nil { + return nil, err + } } return &b2, nil diff --git a/pkg/filter/asciiHexDecode.go b/pkg/filter/asciiHexDecode.go index 04a8bea1..5d0945a2 100644 --- a/pkg/filter/asciiHexDecode.go +++ b/pkg/filter/asciiHexDecode.go @@ -47,7 +47,10 @@ func (f asciiHexDecode) Encode(r io.Reader) (io.Reader, error) { // Decode implements decoding for an ASCIIHexDecode filter. func (f asciiHexDecode) Decode(r io.Reader) (io.Reader, error) { + return f.DecodeLength(r, -1) +} +func (f asciiHexDecode) DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) { bb, err := getReaderBytes(r) if err != nil { return nil, err @@ -70,9 +73,12 @@ func (f asciiHexDecode) Decode(r io.Reader) (io.Reader, error) { p = append(p, '0') } - dst := make([]byte, hex.DecodedLen(len(p))) + if maxLen < 0 { + maxLen = int64(hex.DecodedLen(len(p))) + } + dst := make([]byte, maxLen) - if _, err := hex.Decode(dst, p); err != nil { + if _, err := hex.Decode(dst, p[:maxLen*2]); err != nil { return nil, err } diff --git a/pkg/filter/ccittDecode.go b/pkg/filter/ccittDecode.go index 1b5793e0..964c8c7c 100644 --- a/pkg/filter/ccittDecode.go +++ b/pkg/filter/ccittDecode.go @@ -37,6 +37,10 @@ func (f ccittDecode) Encode(r io.Reader) (io.Reader, error) { // Decode implements decoding for a CCITTDecode filter. func (f ccittDecode) Decode(r io.Reader) (io.Reader, error) { + return f.DecodeLength(r, -1) +} + +func (f ccittDecode) DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) { if log.TraceEnabled() { log.Trace.Println("DecodeCCITT begin") } diff --git a/pkg/filter/dctDecode.go b/pkg/filter/dctDecode.go index db517337..593c3ccc 100644 --- a/pkg/filter/dctDecode.go +++ b/pkg/filter/dctDecode.go @@ -35,7 +35,10 @@ func (f dctDecode) Encode(r io.Reader) (io.Reader, error) { // Decode implements decoding for a DCTDecode filter. func (f dctDecode) Decode(r io.Reader) (io.Reader, error) { + return f.DecodeLength(r, -1) +} +func (f dctDecode) DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) { im, err := jpeg.Decode(r) if err != nil { return nil, err diff --git a/pkg/filter/filter.go b/pkg/filter/filter.go index 9f5cd860..7b1afdb5 100644 --- a/pkg/filter/filter.go +++ b/pkg/filter/filter.go @@ -45,6 +45,10 @@ var ErrUnsupportedFilter = errors.New("pdfcpu: filter not supported") type Filter interface { Encode(r io.Reader) (io.Reader, error) Decode(r io.Reader) (io.Reader, error) + // DecodeLength will decode at least maxLen bytes. For filters where decoding + // parts doesn't make sense (e.g. DCT), the whole stream is decoded. + // If maxLen < 0 is passed, the whole stream is decoded. + DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) } // NewFilter returns a filter for given filterName and an optional parameter dictionary. @@ -100,6 +104,10 @@ type baseFilter struct { parms map[string]int } +func SupportsDecodeParms(f string) bool { + return f == CCITTFax || f == LZW || f == Flate +} + func getReaderBytes(r io.Reader) ([]byte, error) { var bb []byte if buf, ok := r.(*bytes.Buffer); ok { diff --git a/pkg/filter/flateDecode.go b/pkg/filter/flateDecode.go index 2a75ffdd..c11c9b2b 100644 --- a/pkg/filter/flateDecode.go +++ b/pkg/filter/flateDecode.go @@ -20,6 +20,7 @@ import ( "bytes" "compress/zlib" "io" + "strings" "github.com/angel-one/pdfcpu/pkg/log" "github.com/pkg/errors" @@ -81,6 +82,10 @@ func (f flate) Encode(r io.Reader) (io.Reader, error) { // Decode implements decoding for a Flate filter. func (f flate) Decode(r io.Reader) (io.Reader, error) { + return f.DecodeLength(r, -1) +} + +func (f flate) DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) { if log.TraceEnabled() { log.Trace.Println("DecodeFlate begin") } @@ -92,12 +97,23 @@ func (f flate) Decode(r io.Reader) (io.Reader, error) { defer rc.Close() // Optional decode parameters need postprocessing. - return f.decodePostProcess(rc) + return f.decodePostProcess(rc, maxLen) } -func passThru(rin io.Reader) (*bytes.Buffer, error) { +func passThru(rin io.Reader, maxLen int64) (*bytes.Buffer, error) { var b bytes.Buffer - _, err := io.Copy(&b, rin) + var err error + if maxLen < 0 { + _, err = io.Copy(&b, rin) + } else { + _, err = io.CopyN(&b, rin, maxLen) + } + if err != nil && strings.Contains(err.Error(), "invalid checksum") { + if log.CLIEnabled() { + log.CLI.Println("skipped: truncated zlib stream") + } + err = nil + } if err == io.ErrUnexpectedEOF { // Workaround for missing support for partial flush in compress/flate. // See also https://github.com/golang/go/issues/31514 @@ -258,11 +274,26 @@ func (f flate) parameters() (colors, bpc, columns int, err error) { return colors, bpc, columns, nil } +func checkBufLen(b bytes.Buffer, maxLen int64) bool { + return maxLen < 0 || int64(b.Len()) < maxLen +} + +func process(w io.Writer, pr, cr []byte, predictor, colors, bytesPerPixel int) error { + d, err := processRow(pr, cr, predictor, colors, bytesPerPixel) + if err != nil { + return err + } + + _, err = w.Write(d) + + return err +} + // decodePostProcess -func (f flate) decodePostProcess(r io.Reader) (io.Reader, error) { +func (f flate) decodePostProcess(r io.Reader, maxLen int64) (io.Reader, error) { predictor, found := f.parms["Predictor"] if !found || predictor == PredictorNo { - return passThru(r) + return passThru(r, maxLen) } if !intMemberOf( @@ -299,7 +330,7 @@ func (f flate) decodePostProcess(r io.Reader) (io.Reader, error) { // Output buffer var b bytes.Buffer - for { + for checkBufLen(b, maxLen) { // Read decompressed bytes for one pixel row. n, err := io.ReadFull(r, cr) @@ -317,14 +348,8 @@ func (f flate) decodePostProcess(r io.Reader) (io.Reader, error) { return nil, errors.Errorf("pdfcpu: filter FlateDecode: read error, expected %d bytes, got: %d", m, n) } - d, err1 := processRow(pr, cr, predictor, colors, bytesPerPixel) - if err1 != nil { - return nil, err1 - } - - _, err1 = b.Write(d) - if err1 != nil { - return nil, err1 + if err := process(&b, pr, cr, predictor, colors, bytesPerPixel); err != nil { + return nil, err } if err == io.EOF { @@ -334,7 +359,7 @@ func (f flate) decodePostProcess(r io.Reader) (io.Reader, error) { pr, cr = cr, pr } - if b.Len()%rowSize > 0 { + if maxLen < 0 && b.Len()%rowSize > 0 { log.Info.Printf("failed postprocessing: %d %d\n", b.Len(), rowSize) return nil, errors.New("pdfcpu: filter FlateDecode: postprocessing failed") } diff --git a/pkg/filter/lzwDecode.go b/pkg/filter/lzwDecode.go index 407a0344..76886a11 100644 --- a/pkg/filter/lzwDecode.go +++ b/pkg/filter/lzwDecode.go @@ -59,6 +59,10 @@ func (f lzwDecode) Encode(r io.Reader) (io.Reader, error) { // Decode implements decoding for an LZWDecode filter. func (f lzwDecode) Decode(r io.Reader) (io.Reader, error) { + return f.DecodeLength(r, -1) +} + +func (f lzwDecode) DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) { if log.TraceEnabled() { log.Trace.Println("DecodeLZW begin") } @@ -77,7 +81,13 @@ func (f lzwDecode) Decode(r io.Reader) (io.Reader, error) { defer rc.Close() var b bytes.Buffer - written, err := io.Copy(&b, rc) + var written int64 + var err error + if maxLen < 0 { + written, err = io.Copy(&b, rc) + } else { + written, err = io.CopyN(&b, rc, maxLen) + } if err != nil { return nil, err } diff --git a/pkg/filter/runLengthDecode.go b/pkg/filter/runLengthDecode.go index 006653ae..2cbb0bcf 100644 --- a/pkg/filter/runLengthDecode.go +++ b/pkg/filter/runLengthDecode.go @@ -25,7 +25,8 @@ type runLengthDecode struct { baseFilter } -func (f runLengthDecode) decode(w io.ByteWriter, src []byte) { +func (f runLengthDecode) decode(w io.ByteWriter, src []byte, maxLen int64) { + var written int64 for i := 0; i < len(src); { b := src[i] @@ -37,14 +38,24 @@ func (f runLengthDecode) decode(w io.ByteWriter, src []byte) { if b < 0x80 { c := int(b) + 1 for j := 0; j < c; j++ { + if maxLen >= 0 && maxLen == written { + break + } + w.WriteByte(src[i]) + written++ i++ } continue } c := 257 - int(b) for j := 0; j < c; j++ { + if maxLen >= 0 && maxLen == written { + break + } + w.WriteByte(src[i]) + written++ } i++ } @@ -125,6 +136,10 @@ func (f runLengthDecode) Encode(r io.Reader) (io.Reader, error) { // Decode implements decoding for an RunLengthDecode filter. func (f runLengthDecode) Decode(r io.Reader) (io.Reader, error) { + return f.DecodeLength(r, -1) +} + +func (f runLengthDecode) DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) { b1, err := getReaderBytes(r) if err != nil { @@ -132,7 +147,7 @@ func (f runLengthDecode) Decode(r io.Reader) (io.Reader, error) { } var b2 bytes.Buffer - f.decode(&b2, b1) + f.decode(&b2, b1, maxLen) return &b2, nil } diff --git a/pkg/filter/runLengthDecode_test.go b/pkg/filter/runLengthDecode_test.go index 9c674347..1008cecc 100644 --- a/pkg/filter/runLengthDecode_test.go +++ b/pkg/filter/runLengthDecode_test.go @@ -71,7 +71,7 @@ func TestRunLengthEncoding(t *testing.T) { compare(t, enc.Bytes(), []byte(tt.enc)) var raw bytes.Buffer - f.decode(&raw, enc.Bytes()) + f.decode(&raw, enc.Bytes(), -1) compare(t, raw.Bytes(), []byte(tt.raw)) } diff --git a/pkg/font/metrics.go b/pkg/font/metrics.go index b5d5952e..109e7add 100644 --- a/pkg/font/metrics.go +++ b/pkg/font/metrics.go @@ -23,6 +23,7 @@ import ( "os" "path" "path/filepath" + "runtime/debug" "strconv" "strings" "sync" @@ -116,7 +117,7 @@ func (fd TTFLight) unicodeRangeBits(id string) []int { // Returns a slice of relevant unicodeRangeBits. // // This mapping is incomplete as we only cover unicode blocks of the most popular scripts. - // Please go to https://github.com/pdfcpu/pdfcpu/issues/new/choose for an extension request. + // Please go to https://github.com/angel-one/pdfcpu/issues/new/choose for an extension request. // // 0 Basic Latin 0000-007F // 1 Latin-1 Supplement 0080-00FF @@ -277,6 +278,7 @@ func CharWidth(fontName string, r rune) int { ttf, ok := UserFontMetrics[fontName] if !ok { fmt.Fprintf(os.Stderr, "pdfcpu: user font not loaded: %s\n", fontName) + debug.PrintStack() os.Exit(1) } diff --git a/pkg/pdfcpu/annotation.go b/pkg/pdfcpu/annotation.go index 8606e0ef..ecffc8b6 100644 --- a/pkg/pdfcpu/annotation.go +++ b/pkg/pdfcpu/annotation.go @@ -23,6 +23,7 @@ import ( "strings" "github.com/angel-one/pdfcpu/pkg/log" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/draw" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -122,18 +123,47 @@ func findAnnotByObjNr(objNr int, annots types.Array) (int, error) { return -1, nil } -func createAnnot(ctx *model.Context, ar model.AnnotationRenderer, pageIndRef *types.IndirectRef) (*types.IndirectRef, error) { - d, err := ar.RenderDict(ctx.XRefTable, *pageIndRef) +func createAnnot(ctx *model.Context, ar model.AnnotationRenderer, pageIndRef *types.IndirectRef) (*types.IndirectRef, types.Dict, error) { + d, err := ar.RenderDict(ctx.XRefTable, pageIndRef) if err != nil { - return nil, err + return nil, nil, err + } + indRef, err := ctx.IndRefForNewObject(d) + if err != nil { + return nil, nil, err } - return ctx.IndRefForNewObject(d) + return indRef, d, nil +} + +func linkAnnotation(xRefTable *model.XRefTable, d types.Dict, r *types.Rectangle, apObjNr int, contents, nm string, f model.AnnotationFlags) (model.AnnotationRenderer, error) { + var uri string + o, found := d.Find("A") + if found && o != nil { + d, err := xRefTable.DereferenceDict(o) + if err != nil { + if xRefTable.ValidationMode == model.ValidationStrict { + return nil, err + } + model.ShowSkipped("invalid link annotation entry \"A\"") + + } + if d != nil { + bb, err := xRefTable.DereferenceStringEntryBytes(d, "URI") + if err != nil { + return nil, err + } + if len(bb) > 0 { + uri = string(bb) + } + } + } + dest := (*model.Destination)(nil) // will not collect link dest during validation. + return model.NewLinkAnnotation(*r, apObjNr, contents, nm, "", f, nil, dest, uri, nil, false, 0, model.BSSolid), nil } // Annotation returns an annotation renderer. // Validation sets up a cache of annotation renderers. func Annotation(xRefTable *model.XRefTable, d types.Dict) (model.AnnotationRenderer, error) { - subtype := d.NameEntry("Subtype") o, _ := d.Find("Rect") @@ -142,16 +172,31 @@ func Annotation(xRefTable *model.XRefTable, d types.Dict) (model.AnnotationRende return nil, err } - r, err := xRefTable.RectForArray(arr) - if err != nil { - return nil, err + var r *types.Rectangle + + if len(arr) == 4 { + r, err = xRefTable.RectForArray(arr) + if err != nil { + return nil, err + } + } else if xRefTable.ValidationMode == model.ValidationRelaxed { + r = types.NewRectangle(0, 0, 0, 0) } - bb, err := d.StringEntryBytes("Contents") - if err != nil { - return nil, err + var apObjNr int + indRef := d.IndirectRefEntry("AP") + if indRef != nil { + apObjNr = indRef.ObjectNumber.Value() + } + + contents := "" + if c, ok := d["Contents"]; ok { + contents, err = xRefTable.DereferenceStringOrHexLiteral(c, model.V10, nil) + if err != nil { + return nil, err + } + contents = types.RemoveControlChars(contents) } - contents := string(bb) var nm string s := d.StringEntry("NM") // This is what pdfcpu refers to as the annotation id. @@ -170,36 +215,24 @@ func Annotation(xRefTable *model.XRefTable, d types.Dict) (model.AnnotationRende switch *subtype { case "Text": - ann = model.NewTextAnnotation(*r, contents, nm, "", f, nil, nil, "", "", true, "") + popupIndRef := d.IndirectRefEntry("Popup") + ann = model.NewTextAnnotation(*r, apObjNr, contents, nm, "", f, nil, "", popupIndRef, nil, "", "", 0, 0, 0, true, "") case "Link": - var uri string - o, found := d.Find("A") - if found && o != nil { - d, err := xRefTable.DereferenceDict(o) - if err != nil { - return nil, err - } - - bb, err := xRefTable.DereferenceStringEntryBytes(d, "URI") - if err != nil { - return nil, err - } - if len(bb) > 0 { - uri = string(bb) - } + ann, err = linkAnnotation(xRefTable, d, r, apObjNr, contents, nm, f) + if err != nil { + return nil, err } - dest := (*model.Destination)(nil) // will not collect link dest during validation. - ann = model.NewLinkAnnotation(*r, nil, dest, uri, nm, f, 0, model.BSSolid, nil, false) case "Popup": parentIndRef := d.IndirectRefEntry("Parent") - ann = model.NewPopupAnnotation(*r, nil, contents, nm, f, nil, parentIndRef) + ann = model.NewPopupAnnotation(*r, apObjNr, contents, nm, "", f, nil, 0, 0, 0, parentIndRef, false) // TODO handle remaining annotation types. default: - ann = model.NewAnnotationForRawType(*subtype, *r, contents, nil, nm, f, nil) + ann = model.NewAnnotationForRawType(*subtype, *r, apObjNr, contents, nm, "", f, nil, 0, 0, 0) + } return ann, nil @@ -234,6 +267,56 @@ func AnnotationsForSelectedPages(ctx *model.Context, selectedPages types.IntSet) return m } +func prepareHeader(horSep *[]int, maxLen *AnnotListMaxLengths, customAnnot bool) string { + s := " Obj# " + if maxLen.ObjNr > 4 { + s += strings.Repeat(" ", maxLen.ObjNr-4) + *horSep = append(*horSep, 10+maxLen.ObjNr-4) + } else { + *horSep = append(*horSep, 10) + } + + s += draw.VBar + " Id " + if maxLen.ID > 2 { + s += strings.Repeat(" ", maxLen.ID-2) + *horSep = append(*horSep, 4+maxLen.ID-2) + } else { + *horSep = append(*horSep, 4) + } + + s += draw.VBar + " Rect " + if maxLen.Rect > 4 { + s += strings.Repeat(" ", maxLen.Rect-4) + *horSep = append(*horSep, 6+maxLen.Rect-4) + } else { + *horSep = append(*horSep, 6) + } + + s += draw.VBar + " Content" + if maxLen.Content > 7 { + s += strings.Repeat(" ", maxLen.Content-7) + *horSep = append(*horSep, 8+maxLen.Content-7) + } else { + *horSep = append(*horSep, 8) + } + + if customAnnot { + s += draw.VBar + " Type" + if maxLen.Type > 4 { + s += strings.Repeat(" ", maxLen.Type-4) + *horSep = append(*horSep, 5+maxLen.Type-4) + } else { + *horSep = append(*horSep, 5) + } + } + + return s +} + +type AnnotListMaxLengths struct { + ObjNr, ID, Rect, Content, Type int +} + // ListAnnotations returns a formatted list of annotations. func ListAnnotations(annots map[int]model.PgAnnots) (int, []string, error) { var ( @@ -262,37 +345,73 @@ func ListAnnotations(annots map[int]model.PgAnnots) (int, []string, error) { for _, annType := range annTypes { annots := pageAnnots[model.AnnotTypes[annType]] - var ( - maxLenRect int - maxLenContent int - ) - maxLenID := 2 + + var maxLen AnnotListMaxLengths + maxLen.ID = 2 + maxLen.Content = len("Content") + maxLen.Type = len("Type") + var objNrs []int for objNr, ann := range annots.Map { objNrs = append(objNrs, objNr) - if len(ann.RectString()) > maxLenRect { - maxLenRect = len(ann.RectString()) + s := strconv.Itoa(objNr) + if len(s) > maxLen.ObjNr { + maxLen.ObjNr = len(s) } - if len(ann.ID()) > maxLenID { - maxLenID = len(ann.ID()) + if len(ann.RectString()) > maxLen.Rect { + maxLen.Rect = len(ann.RectString()) } - if len(ann.ContentString()) > maxLenContent { - maxLenContent = len(ann.ContentString()) + if len(ann.ID()) > maxLen.ID { + maxLen.ID = len(ann.ID()) + } + if len(ann.ContentString()) > maxLen.Content { + maxLen.Content = len(ann.ContentString()) + } + if len(ann.CustomTypeString()) > maxLen.Type { + maxLen.Type = len(ann.CustomTypeString()) } } sort.Ints(objNrs) ss = append(ss, "") ss = append(ss, fmt.Sprintf(" %s:", annType)) - s1 := (" obj# ") - s2 := fmt.Sprintf("%%%ds", maxLenRect) - s3 := fmt.Sprintf("%%%ds", maxLenID) - s4 := fmt.Sprintf("%%%ds", maxLenContent) - s := fmt.Sprintf(s1+s2+" "+s3+" "+s4, "rect", "id", "content") - ss = append(ss, s) - ss = append(ss, " "+strings.Repeat("=", len(s)-4)) + + horSep := []int{} + + // Render header. + ss = append(ss, prepareHeader(&horSep, &maxLen, annType == "Custom")) + + // Render separator. + ss = append(ss, draw.HorSepLine(horSep)) + + // Render content. for _, objNr := range objNrs { ann := annots.Map[objNr] - ss = append(ss, fmt.Sprintf(" %5d "+s2+" "+s3+" "+s4, objNr, ann.RectString(), ann.ID(), ann.ContentString())) + + s := strconv.Itoa(objNr) + fill1 := strings.Repeat(" ", maxLen.ObjNr-len(s)) + if maxLen.ObjNr < 4 { + fill1 += strings.Repeat(" ", 4-maxLen.ObjNr) + } + + s = ann.ID() + fill2 := strings.Repeat(" ", maxLen.ID-len(s)) + if maxLen.ID < 2 { + fill2 += strings.Repeat(" ", 2-maxLen.ID) + } + + s = ann.RectString() + fill3 := strings.Repeat(" ", maxLen.Rect-len(s)) + + if ann.Type() != model.AnnCustom { + ss = append(ss, fmt.Sprintf(" %s%d %s %s%s %s %s%s %s %s", + fill1, objNr, draw.VBar, fill2, ann.ID(), draw.VBar, fill3, ann.RectString(), draw.VBar, ann.ContentString())) + } else { + s = ann.ContentString() + fill4 := strings.Repeat(" ", maxLen.Content-len(s)) + ss = append(ss, fmt.Sprintf(" %s%d %s %s%s %s %s%s %s %s%s%s %s", + fill1, objNr, draw.VBar, fill2, ann.ID(), draw.VBar, fill3, ann.RectString(), draw.VBar, fill4, ann.ContentString(), draw.VBar, ann.CustomTypeString())) + } + j++ } } @@ -308,14 +427,14 @@ func addAnnotationToDirectObj( pageDict types.Dict, pageNr int, ar model.AnnotationRenderer, - incr bool) (bool, error) { + incr bool) error { i, err := findAnnotByID(ctx, ar.ID(), annots) if err != nil { - return false, err + return err } if i >= 0 { - return false, errors.Errorf("page %d: duplicate annotation with id:%s\n", pageNr, ar.ID()) + return errors.Errorf("page %d: duplicate annotation with id:%s\n", pageNr, ar.ID()) } pageDict.Update("Annots", append(annots, *annotIndRef)) if incr { @@ -323,7 +442,7 @@ func addAnnotationToDirectObj( ctx.Write.IncrementWithObjNr(pageDictIndRef.ObjectNumber.Value()) } ctx.EnsureVersionForWriting() - return true, nil + return nil } // AddAnnotation adds ar to pageDict. @@ -333,18 +452,18 @@ func AddAnnotation( pageDict types.Dict, pageNr int, ar model.AnnotationRenderer, - incr bool) (bool, error) { + incr bool) (*types.IndirectRef, types.Dict, error) { // Create xreftable entry for annotation. - annotIndRef, err := createAnnot(ctx, ar, pageDictIndRef) + annotIndRef, d, err := createAnnot(ctx, ar, pageDictIndRef) if err != nil { - return false, err + return nil, nil, err } // Add annotation to xreftable page annotation cache. err = addAnnotationToCache(ctx, ar, pageNr, annotIndRef.ObjectNumber.Value()) if err != nil { - return false, err + return nil, nil, err } if incr { @@ -360,33 +479,33 @@ func AddAnnotation( ctx.Write.IncrementWithObjNr(pageDictIndRef.ObjectNumber.Value()) } ctx.EnsureVersionForWriting() - return true, nil + return annotIndRef, d, nil } ir, ok := obj.(types.IndirectRef) if !ok { - return addAnnotationToDirectObj(ctx, obj.(types.Array), annotIndRef, pageDictIndRef, pageDict, pageNr, ar, incr) + return annotIndRef, d, addAnnotationToDirectObj(ctx, obj.(types.Array), annotIndRef, pageDictIndRef, pageDict, pageNr, ar, incr) } // Annots array is an IndirectReference. o, err := ctx.Dereference(ir) if err != nil || o == nil { - return false, err + return nil, nil, err } annots, _ := o.(types.Array) i, err := findAnnotByID(ctx, ar.ID(), annots) if err != nil { - return false, err + return nil, nil, err } if i >= 0 { - return false, errors.Errorf("page %d: duplicate annotation with id:%s\n", pageNr, ar.ID()) + return nil, nil, errors.Errorf("page %d: duplicate annotation with id:%s\n", pageNr, ar.ID()) } entry, ok := ctx.FindTableEntryForIndRef(&ir) if !ok { - return false, errors.Errorf("page %d: can't dereference Annots indirect reference(obj#:%d)\n", pageNr, ir.ObjectNumber) + return nil, nil, errors.Errorf("page %d: can't dereference Annots indirect reference(obj#:%d)\n", pageNr, ir.ObjectNumber) } entry.Object = append(annots, *annotIndRef) if incr { @@ -395,7 +514,21 @@ func AddAnnotation( } ctx.EnsureVersionForWriting() - return true, nil + return annotIndRef, d, nil +} + +func AddAnnotationToPage(ctx *model.Context, pageNr int, ar model.AnnotationRenderer, incr bool) (*types.IndirectRef, types.Dict, error) { + pageDictIndRef, err := ctx.PageDictIndRef(pageNr) + if err != nil { + return nil, nil, err + } + + d, err := ctx.DereferenceDict(*pageDictIndRef) + if err != nil { + return nil, nil, err + } + + return AddAnnotation(ctx, pageDictIndRef, d, pageNr, ar, incr) } // AddAnnotations adds ar to selected pages. @@ -424,11 +557,11 @@ func AddAnnotations(ctx *model.Context, selectedPages types.IntSet, ar model.Ann return false, err } - added, err := AddAnnotation(ctx, pageDictIndRef, d, k, ar, incr) + indRef, _, err := AddAnnotation(ctx, pageDictIndRef, d, k, ar, incr) if err != nil { return false, err } - if added { + if indRef != nil { ok = true } } @@ -460,11 +593,11 @@ func AddAnnotationsMap(ctx *model.Context, m map[int][]model.AnnotationRenderer, } for _, annot := range annots { - added, err := AddAnnotation(ctx, pageDictIndRef, d, i, annot, incr) + indRef, _, err := AddAnnotation(ctx, pageDictIndRef, d, i, annot, incr) if err != nil { return false, err } - if added { + if indRef != nil { ok = true } } diff --git a/pkg/pdfcpu/booklet.go b/pkg/pdfcpu/booklet.go index 3208d26d..99562c1c 100644 --- a/pkg/pdfcpu/booklet.go +++ b/pkg/pdfcpu/booklet.go @@ -19,6 +19,7 @@ package pdfcpu import ( "bytes" "fmt" + "math" "os" "strconv" "strings" @@ -43,6 +44,7 @@ func DefaultBookletConfig() *model.NUp { nup.FolioSize = 8 nup.BookletType = model.Booklet nup.BookletBinding = model.LongEdge + nup.Enforce = true return nup } @@ -68,11 +70,10 @@ func PDFBookletConfig(val int, desc string, conf *model.Configuration) (*model.N if err := ParseNUpValue(val, nup); err != nil { return nil, err } - // 6up and 8up special cases - if nup.IsBooklet() && val > 4 && nup.IsTopFoldBinding() { + // 6up special cases + if nup.IsBooklet() && val == 6 && nup.IsTopFoldBinding() { // You can't top fold a 6up with 3 rows. - // TODO: support this for 8up - return nup, fmt.Errorf("pdfcpu booklet: n>4 must have binding on side (portrait long-edge or landscape short-edge)") + return nup, fmt.Errorf("pdfcpu booklet: n=6 must have binding on side (portrait long-edge or landscape short-edge)") } // bookletadvanced if nup.BookletType == model.BookletAdvanced && val == 4 && nup.IsTopFoldBinding() { @@ -99,7 +100,10 @@ func getPageNumber(pageNumbers []int, n int) int { return pageNumbers[n] } -func nup2OutputPageNr(inputPageNr, inputPageCount int, pageNumbers []int) (int, bool) { +type pageNumberFunction func(inputPageNr int, pageCount int, pageNumbers []int, nup *model.NUp) (int, bool) + +func nup2OutputPageNr(inputPageNr, inputPageCount int, pageNumbers []int, _ *model.NUp) (int, bool) { + // (output page, input page) = [(1,n), (2,1), (3, n-1), (4, 2), (5, n-2), (6, 3), ...] var p int if inputPageNr%2 == 0 { p = inputPageCount - 1 - inputPageNr/2 @@ -383,12 +387,7 @@ func nupPerfectBound(positionNumber int, inputPageCount int, pageNumbers []int, return getPageNumber(pageNumbers, p-1), rotate // p is one-indexed and we want zero-indexed } -type bookletPage struct { - number int - rotate bool -} - -func sortSelectedPagesForBooklet(pages types.IntSet, nup *model.NUp) []bookletPage { +func GetBookletOrdering(pages types.IntSet, nup *model.NUp) []model.BookletPage { pageNumbers := sortSelectedPages(pages) pageCount := len(pageNumbers) @@ -401,46 +400,55 @@ func sortSelectedPagesForBooklet(pages types.IntSet, nup *model.NUp) []bookletPa pageCount += sheetPageCount - pageCount%sheetPageCount } - bookletPages := make([]bookletPage, pageCount) + if nup.MultiFolio { + bookletPages := make([]model.BookletPage, 0) + // folioSize is the number of sheets - each "folio" has two sides and two pages per side + nPagesPerSignature := nup.FolioSize * 4 + nSignaturesInBooklet := int(math.Ceil(float64(pageCount) / float64(nPagesPerSignature))) + for j := 0; j < nSignaturesInBooklet; j++ { + start := j * nPagesPerSignature + stop := (j + 1) * nPagesPerSignature + if stop > len(pageNumbers) { + // last signature may be short + stop = len(pageNumbers) + nPagesPerSignature = pageCount - start + } + bookletPages = append(bookletPages, getBookletPageOrdering(nup, pageNumbers[start:stop], nPagesPerSignature)...) + } + return bookletPages + } + return getBookletPageOrdering(nup, pageNumbers, pageCount) +} + +func getBookletPageOrdering(nup *model.NUp, pageNumbers []int, pageCount int) []model.BookletPage { + bookletPages := make([]model.BookletPage, pageCount) + var pageNumberFn pageNumberFunction switch nup.BookletType { case model.Booklet, model.BookletAdvanced: switch nup.N() { case 2: - // (output page, input page) = [(1,n), (2,1), (3, n-1), (4, 2), (5, n-2), (6, 3), ...] - for i := 0; i < pageCount; i++ { - pageNr, rotate := nup2OutputPageNr(i, pageCount, pageNumbers) - bookletPages[i].number = pageNr - bookletPages[i].rotate = rotate - } - + pageNumberFn = nup2OutputPageNr case 4: - for i := 0; i < pageCount; i++ { - pageNr, rotate := nup4OutputPageNr(i, pageCount, pageNumbers, nup) - bookletPages[i].number = pageNr - bookletPages[i].rotate = rotate - } + pageNumberFn = nup4OutputPageNr case 6: - for i := 0; i < pageCount; i++ { - pageNr, rotate := nupLRTBOutputPageNr(i, pageCount, pageNumbers, nup) - bookletPages[i].number = pageNr - bookletPages[i].rotate = rotate - } + pageNumberFn = nupLRTBOutputPageNr case 8: - for i := 0; i < pageCount; i++ { - pageNr, rotate := nup8OutputPageNr(i, pageCount, pageNumbers, nup) - bookletPages[i].number = pageNr - bookletPages[i].rotate = rotate + if nup.BookletBinding == model.ShortEdge { + pageNumberFn = nupLRTBOutputPageNr + } else { // long edge + pageNumberFn = nup8OutputPageNr } } case model.BookletPerfectBound: - for i := 0; i < pageCount; i++ { - pageNr, rotate := nupPerfectBound(i, pageCount, pageNumbers, nup) - bookletPages[i].number = pageNr - bookletPages[i].rotate = rotate - } + pageNumberFn = nupPerfectBound } + for i := 0; i < pageCount; i++ { + pageNr, rotate := pageNumberFn(i, pageCount, pageNumbers, nup) + bookletPages[i].Number = pageNr + bookletPages[i].Rotate = rotate + } return bookletPages } @@ -455,7 +463,7 @@ func bookletPages( formsResDict := types.NewDict() rr := nup.RectsForGrid() - for i, bp := range sortSelectedPagesForBooklet(selectedPages, nup) { + for i, bp := range GetBookletOrdering(selectedPages, nup) { if i > 0 && i%len(rr) == 0 { // Wrap complete page. @@ -468,7 +476,7 @@ func bookletPages( rDest := rr[i%len(rr)] - if bp.number == 0 { + if bp.Number == 0 { // This is an empty page at the end. if nup.BgColor != nil { draw.FillRectNoBorder(&buf, rDest, *nup.BgColor) @@ -476,7 +484,7 @@ func bookletPages( continue } - if err := ctx.NUpTilePDFBytesForPDF(bp.number, formsResDict, &buf, rDest, nup, bp.rotate); err != nil { + if err := ctx.NUpTilePDFBytesForPDF(bp.Number, formsResDict, &buf, rDest, nup, bp.Rotate); err != nil { return err } } @@ -503,7 +511,7 @@ func BookletFromImages(ctx *model.Context, fileNames []string, nup *model.NUp, p var buf bytes.Buffer rr := nup.RectsForGrid() - for i, bp := range sortSelectedPagesForBooklet(selectedPages, nup) { + for i, bp := range GetBookletOrdering(selectedPages, nup) { if i > 0 && i%len(rr) == 0 { @@ -518,7 +526,7 @@ func BookletFromImages(ctx *model.Context, fileNames []string, nup *model.NUp, p rDest := rr[i%len(rr)] - if bp.number == 0 { + if bp.Number == 0 { // This is an empty page at the end of a booklet. if nup.BgColor != nil { draw.FillRectNoBorder(&buf, rDest, *nup.BgColor) @@ -526,12 +534,12 @@ func BookletFromImages(ctx *model.Context, fileNames []string, nup *model.NUp, p continue } - f, err := os.Open(fileNames[bp.number-1]) + f, err := os.Open(fileNames[bp.Number-1]) if err != nil { return err } - imgIndRef, w, h, err := model.CreateImageResource(xRefTable, f, false, false) + imgIndRef, w, h, err := model.CreateImageResource(xRefTable, f) if err != nil { return err } @@ -549,8 +557,7 @@ func BookletFromImages(ctx *model.Context, fileNames []string, nup *model.NUp, p formsResDict.Insert(formResID, *formIndRef) // Append to content stream of booklet page i. - enforceOrientation := false - model.NUpTilePDFBytes(&buf, types.RectForDim(float64(w), float64(h)), rr[i%len(rr)], formResID, nup, bp.rotate, enforceOrientation) + model.NUpTilePDFBytes(&buf, types.RectForDim(float64(w), float64(h)), rr[i%len(rr)], formResID, nup, bp.Rotate) } // Wrap incomplete booklet page. @@ -587,27 +594,8 @@ func BookletFromPDF(ctx *model.Context, selectedPages types.IntSet, nup *model.N nup.PageDim = &types.Dim{Width: mb.Width(), Height: mb.Height()} - if nup.MultiFolio { - pages := types.IntSet{} - for _, i := range sortSelectedPages(selectedPages) { - pages[i] = true - if len(pages) == 4*nup.FolioSize { - if err = bookletPages(ctx, pages, nup, pagesDict, pagesIndRef); err != nil { - return err - } - pages = types.IntSet{} - } - } - if len(pages) > 0 { - if err = bookletPages(ctx, pages, nup, pagesDict, pagesIndRef); err != nil { - return err - } - } - - } else { - if err = bookletPages(ctx, selectedPages, nup, pagesDict, pagesIndRef); err != nil { - return err - } + if err = bookletPages(ctx, selectedPages, nup, pagesDict, pagesIndRef); err != nil { + return err } // Replace original pagesDict. diff --git a/pkg/pdfcpu/booklet_test.go b/pkg/pdfcpu/booklet_test.go index 942d0fab..fa495ba6 100644 --- a/pkg/pdfcpu/booklet_test.go +++ b/pkg/pdfcpu/booklet_test.go @@ -22,16 +22,52 @@ import ( ) type pageOrderResults struct { - id string - nup int - pageCount int - expectedPageOrder []int - papersize string - bookletType string - binding string + id string + nup int + pageCount int + expectedPageOrder []int + papersize string + bookletType string + binding string + useSignatures bool + nPagesPerSignature int } var bookletTestCases = []pageOrderResults{ + { + id: "2up", + nup: 2, + pageCount: 16, + expectedPageOrder: []int{ + 16, 1, + 15, 2, + 14, 3, + 13, 4, + 12, 5, + 11, 6, + 10, 7, + 9, 8, + }, + papersize: "A6", + bookletType: "booklet", + binding: "long", + }, + { + id: "2up with trailing blank pages", + nup: 2, + pageCount: 10, + expectedPageOrder: []int{ + 0, 1, + 0, 2, + 10, 3, + 9, 4, + 8, 5, + 7, 6, + }, + papersize: "A6", + bookletType: "booklet", + binding: "long", + }, // basic booklet sidefold test cases { id: "booklet portrait long edge", @@ -140,7 +176,7 @@ var bookletTestCases = []pageOrderResults{ }, // 8up test { - id: "8up", + id: "8up portrait long edge", nup: 8, pageCount: 32, expectedPageOrder: []int{ @@ -149,7 +185,43 @@ var bookletTestCases = []pageOrderResults{ 9, 22, 24, 11, 13, 18, 20, 15, 21, 10, 12, 23, 17, 14, 16, 19, }, - papersize: "A6", // portrait, long-edge binding + papersize: "A6", + bookletType: "booklet", + binding: "long", + }, + { + id: "8up portrait short edge", + nup: 8, + pageCount: 16, + expectedPageOrder: []int{ + 16, 1, 14, 3, 12, 5, 10, 7, + 2, 15, 4, 13, 6, 11, 8, 9, + }, + papersize: "A6", + bookletType: "booklet", + binding: "short", + }, + { + id: "8up landscape short edge", + nup: 8, + pageCount: 16, + expectedPageOrder: []int{ + 16, 1, 14, 3, 12, 5, 10, 7, + 2, 15, 4, 13, 6, 11, 8, 9, + }, + papersize: "A6L", + bookletType: "booklet", + binding: "short", + }, + { + id: "8up landscape long edge", + nup: 8, + pageCount: 16, + expectedPageOrder: []int{ + 1, 14, 16, 3, 5, 10, 12, 7, + 13, 2, 4, 15, 9, 6, 8, 11, + }, + papersize: "A6L", bookletType: "booklet", binding: "long", }, @@ -208,26 +280,95 @@ var bookletTestCases = []pageOrderResults{ bookletType: "perfectbound", binding: "long", }, + // signatures + { + id: "signatures 2up", + nup: 2, + pageCount: 16, + expectedPageOrder: []int{ + 12, 1, // signature 1 + 11, 2, + 10, 3, + 9, 4, + 8, 5, + 7, 6, + 16, 13, // signature 2, incomplete + 15, 14, + }, + papersize: "A6", + bookletType: "booklet", + binding: "long", + useSignatures: true, + nPagesPerSignature: 12, + }, + { + id: "signatures 4up", + nup: 4, + pageCount: 24, + expectedPageOrder: []int{ + 16, 1, 3, 14, // signature 1 + 2, 15, 13, 4, + 12, 5, 7, 10, + 6, 11, 9, 8, + 24, 17, 19, 22, // signature 2, incomplete + 18, 23, 21, 20, + }, + papersize: "A5", + bookletType: "booklet", + binding: "long", + useSignatures: true, + nPagesPerSignature: 16, + }, + { + id: "signatures 2up with trailing blank pages", + nup: 2, + pageCount: 18, + expectedPageOrder: []int{ + 12, 1, // signature 1 + 11, 2, + 10, 3, + 9, 4, + 8, 5, + 7, 6, + 0, 13, // signature 2, incomplete, with blanks + 0, 14, + 18, 15, + 17, 16, + }, + papersize: "A6", + bookletType: "booklet", + binding: "long", + useSignatures: true, + nPagesPerSignature: 12, + }, } func TestBookletPageOrder(t *testing.T) { for _, test := range bookletTestCases { - t.Run(test.id, func(t *testing.T) { - nup, err := PDFBookletConfig(test.nup, fmt.Sprintf("papersize:%s, btype:%s, binding: %s", test.papersize, test.bookletType, test.binding), nil) + t.Run(test.id, func(tt *testing.T) { + desc := fmt.Sprintf("papersize:%s, btype:%s, binding: %s", test.papersize, test.bookletType, test.binding) + if test.useSignatures { + desc += fmt.Sprintf(", multifolio:on, foliosize:%d", test.nPagesPerSignature/4) + } + nup, err := PDFBookletConfig(test.nup, desc, nil) if err != nil { - t.Fatal(err) + tt.Fatal(err) } pageNumbers := make(map[int]bool) for i := 0; i < test.pageCount; i++ { pageNumbers[i+1] = true } - pageOrder := make([]int, test.pageCount) - for i, p := range sortSelectedPagesForBooklet(pageNumbers, nup) { - pageOrder[i] = p.number + pageOrder := make([]int, len(test.expectedPageOrder)) + out := GetBookletOrdering(pageNumbers, nup) + if len(test.expectedPageOrder) != len(out) { + tt.Fatalf("page order output has the wrong length, expected %d but got %d", len(test.expectedPageOrder), len(out)) + } + for i, p := range out { + pageOrder[i] = p.Number } for i, expected := range test.expectedPageOrder { if pageOrder[i] != expected { - t.Fatal("incorrect page order\nexpected:", arrayToString(test.expectedPageOrder), "\n got:", arrayToString(pageOrder)) + tt.Fatal("incorrect page order\nexpected:", arrayToString(test.expectedPageOrder), "\n got:", arrayToString(pageOrder)) } } }) diff --git a/pkg/pdfcpu/bookmark.go b/pkg/pdfcpu/bookmark.go index 0b5c6afb..77543f9e 100644 --- a/pkg/pdfcpu/bookmark.go +++ b/pkg/pdfcpu/bookmark.go @@ -32,9 +32,9 @@ import ( ) var ( - errNoBookmarks = errors.New("pdfcpu: no bookmarks available") - errCorruptedBookmarks = errors.New("pdfcpu: corrupt bookmark") - errExistingBookmarks = errors.New("pdfcpu: existing bookmarks") + errNoBookmarks = errors.New("pdfcpu: no bookmarks available") + errInvalidBookmark = errors.New("pdfcpu: invalid bookmark") + errExistingBookmarks = errors.New("pdfcpu: existing bookmarks") ) type Header struct { @@ -94,30 +94,12 @@ func (bm Bookmark) Style() int { return i } -func positionToFirstBookmark(ctx *model.Context) (types.Dict, *types.IndirectRef, error) { - - // Position to first bookmark on top most level with more than 1 bookmarks. - // Default to top most single bookmark level. - +func positionToFirstBookmark(ctx *model.Context) (*types.IndirectRef, error) { d := ctx.Outlines if d == nil { - return nil, nil, errNoBookmarks + return nil, errNoBookmarks } - - first := d.IndirectRefEntry("First") - last := d.IndirectRefEntry("Last") - - var err error - - for first != nil && last != nil && *first == *last { - if d, err = ctx.DereferenceDict(*first); err != nil { - return nil, nil, err - } - first = d.IndirectRefEntry("First") - last = d.IndirectRefEntry("Last") - } - - return d, first, nil + return d.IndirectRefEntry("First"), nil } func outlineItemTitle(s string) string { @@ -131,43 +113,83 @@ func outlineItemTitle(s string) string { return sb.String() } -// PageObjFromDestinationArray returns an IndirectRef of the destinations page. -func PageObjFromDestination(ctx *model.Context, dest types.Object) (*types.IndirectRef, error) { - var ( - err error - ir types.IndirectRef - arr types.Array - ) +func destArray(ctx *model.Context, dest types.Object) (types.Array, error) { switch dest := dest.(type) { case types.Name: - arr, err = ctx.DereferenceDestArray(dest.Value()) - if err == nil { - ir = arr[0].(types.IndirectRef) - } + return ctx.DereferenceDestArray(dest.Value()) case types.StringLiteral: s, err := types.StringLiteralToString(dest) if err != nil { return nil, err } - arr, err = ctx.DereferenceDestArray(s) - if err == nil { - ir = arr[0].(types.IndirectRef) - } + return ctx.DereferenceDestArray(s) case types.HexLiteral: s, err := types.HexLiteralToString(dest) if err != nil { return nil, err } - arr, err = ctx.DereferenceDestArray(s) - if err == nil { - ir = arr[0].(types.IndirectRef) - } + return ctx.DereferenceDestArray(s) case types.Array: - if dest[0] != nil { - ir = dest[0].(types.IndirectRef) + return dest, nil + } + return nil, errors.Errorf("unable to resolve destination array %v\n", dest) +} + +// PageNrFromDestination returns the page number of a destination. +func PageNrFromDestination(ctx *model.Context, dest types.Object) (int, error) { + arr, err := destArray(ctx, dest) + if err != nil && ctx.XRefTable.ValidationMode == model.ValidationRelaxed { + return 0, nil + } + + if i, ok := arr[0].(types.Integer); ok { + return i.Value(), nil + } + + if ir, ok := arr[0].(types.IndirectRef); ok { + return ctx.PageNumber(ir.ObjectNumber.Value()) + } + + return 0, errors.Errorf("unable to extract dest pageNr of %v\n", dest) +} + +func title(ctx *model.Context, d types.Dict) (string, error) { + obj, err := ctx.Dereference(d["Title"]) + if err != nil { + return "", err + } + + s, err := model.Text(obj) + if err != nil { + if ctx.XRefTable.ValidationMode == model.ValidationStrict { + return "", err } + return "", nil } - return &ir, err + + return outlineItemTitle(s), nil +} + +func bookmark(d types.Dict, title string, pageFrom int, parent *Bookmark) Bookmark { + bm := Bookmark{ + Title: title, + PageFrom: pageFrom, + Parent: parent, + Bold: false, + Italic: false, + } + + if arr := d.ArrayEntry("C"); len(arr) == 3 { + col := color.NewSimpleColorForArray(arr) + bm.Color = &col + } + + if f := d.IntEntry("F"); f != nil { + bm.Bold = *f&0x02 > 0 + bm.Italic = *f&0x01 > 0 + } + + return bm } // BookmarksForOutlineItem returns the bookmarks tree for an outline item. @@ -186,18 +208,15 @@ func BookmarksForOutlineItem(ctx *model.Context, item *types.IndirectRef, parent return nil, err } - obj, err := ctx.Dereference(d["Title"]) + title, err := title(ctx, d) if err != nil { return nil, err } - s, err := model.Text(obj) - if err != nil { - return nil, err + if title == "" { + continue } - title := outlineItemTitle(s) - // Retrieve page number out of a destination via "Dest" or "Goto Action". dest, destFound := d["Dest"] if !destFound { @@ -213,20 +232,12 @@ func BookmarksForOutlineItem(ctx *model.Context, item *types.IndirectRef, parent dest = act.(types.Dict)["D"] } - obj, err = ctx.Dereference(dest) + obj, err := ctx.Dereference(dest) if err != nil { return nil, err } - ir, err := PageObjFromDestination(ctx, obj) - if err != nil { - return nil, err - } - if ir == nil { - continue - } - - pageFrom, err := ctx.PageNumber(ir.ObjectNumber.Value()) + pageFrom, err := PageNrFromDestination(ctx, obj) if err != nil { return nil, err } @@ -239,32 +250,16 @@ func BookmarksForOutlineItem(ctx *model.Context, item *types.IndirectRef, parent } } - newBookmark := Bookmark{ - Title: title, - PageFrom: pageFrom, - Parent: parent, - Bold: false, - Italic: false, - } - - if arr := d.ArrayEntry("C"); len(arr) == 3 { - col := color.NewSimpleColorForArray(arr) - newBookmark.Color = &col - } - - if f := d.IntEntry("F"); f != nil { - newBookmark.Bold = *f&0x02 > 0 - newBookmark.Italic = *f&0x01 > 0 - } + bm := bookmark(d, title, pageFrom, parent) first := d["First"] if first != nil { indRef := first.(types.IndirectRef) - kids, _ := BookmarksForOutlineItem(ctx, &indRef, &newBookmark) - newBookmark.Kids = kids + kids, _ := BookmarksForOutlineItem(ctx, &indRef, &bm) + bm.Kids = kids } - bms = append(bms, newBookmark) + bms = append(bms, bm) } return bms, nil @@ -277,7 +272,7 @@ func Bookmarks(ctx *model.Context) ([]Bookmark, error) { return nil, err } - _, first, err := positionToFirstBookmark(ctx) + first, err := positionToFirstBookmark(ctx) if err != nil { if err != errNoBookmarks { return nil, err @@ -311,7 +306,7 @@ func BookmarkList(ctx *model.Context) ([]string, error) { return nil, err } - if bms == nil { + if len(bms) == 0 { return []string{"no bookmarks available"}, nil } @@ -323,7 +318,7 @@ func ExportBookmarks(ctx *model.Context, source string) (*BookmarkTree, error) { if err != nil { return nil, err } - if bms == nil { + if len(bms) == 0 { return nil, nil } @@ -365,7 +360,7 @@ func bmDict(ctx *model.Context, bm Bookmark, parent types.IndirectRef) (types.Di var o types.Object = *ir - s, err := types.EscapeUTF16String(bm.Title) + s, err := types.EscapedUTF16String(bm.Title) if err != nil { return nil, err } @@ -404,11 +399,11 @@ func createOutlineItemDict(ctx *model.Context, bms []Bookmark, parent *types.Ind for i, bm := range bms { if i == 0 && parentPageNr != nil && bm.PageFrom < *parentPageNr { - return nil, nil, 0, 0, errCorruptedBookmarks + return nil, nil, 0, 0, errInvalidBookmark } if i > 0 && bm.PageFrom < bms[i-1].PageFrom { - return nil, nil, 0, 0, errCorruptedBookmarks + return nil, nil, 0, 0, errInvalidBookmark } total++ @@ -463,11 +458,49 @@ func createOutlineItemDict(ctx *model.Context, bms []Bookmark, parent *types.Ind return first, irPrev, total, visible, nil } +func cleanupDestinations(ctx *model.Context, dNamesEmpty bool) error { + if dNamesEmpty { + delete(ctx.Names, "Dests") + if err := ctx.RemoveNameTree("Dests"); err != nil { + return err + } + } + + if ctx.Dests != nil && len(ctx.Dests) == 0 { + delete(ctx.RootDict, "Dests") + } + + return nil +} + +func removeDest(ctx *model.Context, name string) (bool, bool, error) { + var ( + dNamesEmpty, ok bool + err error + ) + if dNames := ctx.Names["Dests"]; dNames != nil { + // Remove destName from dest nametree. + dNamesEmpty, ok, err = dNames.Remove(ctx.XRefTable, name) + if err != nil { + return false, false, err + } + } + + if !ok { + if ctx.Dests != nil { + // Remove destName from named destinations. + ok = ctx.Dests.Delete(name) != nil + } + } + + return dNamesEmpty, ok, err +} + func removeNamedDests(ctx *model.Context, item *types.IndirectRef) error { var ( - d types.Dict - err error - empty, ok bool + d types.Dict + err error + dNamesEmpty, ok bool ) for ir := item; ir != nil; ir = d.IndirectRefEntry("Next") { @@ -498,9 +531,7 @@ func removeNamedDests(ctx *model.Context, item *types.IndirectRef) error { continue } - // Remove destName from dest nametree. - // TODO also try to remove from any existing root.Dests - empty, ok, err = ctx.Names["Dests"].Remove(ctx.XRefTable, s) + dNamesEmpty, ok, err = removeDest(ctx, s) if err != nil { return err } @@ -519,19 +550,12 @@ func removeNamedDests(ctx *model.Context, item *types.IndirectRef) error { } } - if empty { - delete(ctx.Names, "Dests") - if err := ctx.RemoveNameTree("Dests"); err != nil { - return err - } - } - - return nil + return cleanupDestinations(ctx, dNamesEmpty) } // RemoveBookmarks erases all outlines from ctx. func RemoveBookmarks(ctx *model.Context) (bool, error) { - _, first, err := positionToFirstBookmark(ctx) + first, err := positionToFirstBookmark(ctx) if err != nil { if err != errNoBookmarks { return false, err diff --git a/pkg/pdfcpu/certificate.go b/pkg/pdfcpu/certificate.go new file mode 100644 index 00000000..d5a35ab1 --- /dev/null +++ b/pkg/pdfcpu/certificate.go @@ -0,0 +1,257 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pdfcpu + +import ( + "bytes" + "crypto/x509" + "encoding/base64" + "encoding/pem" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/hhrutter/pkcs7" + "github.com/pkg/errors" +) + +var ErrUnknownFileType = errors.New("pdfcpu: unsupported file type") + +func loadSingleCertFile(filename string) (*x509.Certificate, error) { + bb, err := os.ReadFile(filename) + if err != nil { + return nil, err + } + + block, _ := pem.Decode(bb) + if block != nil && block.Type == "CERTIFICATE" { + return x509.ParseCertificate(block.Bytes) + } + + // DER + return x509.ParseCertificate(bb) +} + +func loadCertsFromPEM(filename string) ([]*x509.Certificate, error) { + bb, err := os.ReadFile(filename) + if err != nil { + return nil, err + } + + var certs []*x509.Certificate + + for len(bb) > 0 { + var block *pem.Block + block, bb = pem.Decode(bb) + if block == nil { + break + } + if block.Type != "CERTIFICATE" || len(block.Headers) != 0 { + continue + } + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return nil, err + } + certs = append(certs, cert) + } + + return certs, nil +} + +const PKCS7_PREFIX = "-----BEGIN PKCS7-----" +const PKCS7_SUFFIX = "-----END PKCS7-----" + +func isPEMEncoded(s string) bool { + s = strings.TrimRight(s, " \t\r\n") + return strings.HasPrefix(s, PKCS7_PREFIX) && strings.HasSuffix(s, PKCS7_SUFFIX) +} + +func decodePKCS7Block(s string) ([]byte, error) { + start := strings.Index(s, PKCS7_PREFIX) + end := strings.Index(s, PKCS7_SUFFIX) + + if start == -1 || end == -1 || end <= start { + return nil, fmt.Errorf("decodePKCS7Block: PEM block not found") + } + + s = s[start+len(PKCS7_PREFIX) : end] + s = strings.TrimSpace(s) + + return base64.StdEncoding.DecodeString(s) +} + +func loadCertsFromP7C(filename string) ([]*x509.Certificate, error) { + bb, err := os.ReadFile(filename) + if err != nil { + return nil, err + } + + s := string(bb) + if isPEMEncoded(s) { + bb, err = decodePKCS7Block(s) + if err != nil { + return nil, err + } + } // else DER (binary) + + p7, err := pkcs7.Parse(bb) + if err != nil { + return nil, err + } + + return p7.Certificates, nil +} + +func LoadCertificates(filename string) ([]*x509.Certificate, error) { + ext := strings.ToLower(filepath.Ext(filename)) + switch ext { + case ".crt", ".cer": + cert, err := loadSingleCertFile(filename) + if err != nil { + return nil, err + } + return []*x509.Certificate{cert}, nil + case ".p7c": + return loadCertsFromP7C(filename) + case ".pem": + return loadCertsFromPEM(filename) + default: + return nil, ErrUnknownFileType + } +} + +func loadCertificatesToCertPool(path string, certPool *x509.CertPool, n *int) error { + certs, err := LoadCertificates(path) + if err != nil { + if err == ErrUnknownFileType { + return nil + } + return err + } + for _, cert := range certs { + certPool.AddCert(cert) + } + *n += len(certs) + return nil +} + +func LoadCertificatesToCertPool(dir string, certPool *x509.CertPool) (int, error) { + n := 0 + err := filepath.WalkDir(dir, func(path string, d os.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + return loadCertificatesToCertPool(path, certPool, &n) + }) + return n, err +} + +func saveCertsAsPEM(certs []*x509.Certificate, filename string, overwrite bool) (bool, error) { + if len(certs) == 0 { + return false, errors.New("no certificates to save") + } + + if !overwrite { + if _, err := os.Stat(filename); err == nil { + return false, nil + } + } + + file, err := os.Create(filename) + if err != nil { + return false, fmt.Errorf("failed to create file: %w", err) + } + defer file.Close() + + for _, cert := range certs { + block := &pem.Block{ + Type: "CERTIFICATE", + Bytes: cert.Raw, + } + if err := pem.Encode(file, block); err != nil { + return false, err + } + } + + return true, nil +} + +func saveCertsAsP7C(certs []*x509.Certificate, filename string, overwrite bool) (bool, error) { + // TODO encodeBase64 bool (PEM) + + if len(certs) == 0 { + return false, errors.New("no certificates to save") + } + + p7, err := pkcs7.NewSignedData(nil) + if err != nil { + return false, err + } + + for _, cert := range certs { + p7.AddCertificate(cert) + } + + bb, err := p7.Finish() + if err != nil { + return false, err + } + + return Write(bytes.NewReader(bb), filename, overwrite) +} + +func ImportCertificate(inFile string, overwrite bool) (int, bool, error) { + certs, err := LoadCertificates(inFile) + if err != nil { + return 0, false, err + } + + // We have validated the incoming cert info. + + enforceP7C := true // takes less disk space + + base := filepath.Base(inFile) + outFileNoExt := base[:len(base)-len(filepath.Ext(base))] + outFile := outFileNoExt + ".p7c" + outFile = filepath.Join(model.CertDir, outFile) + + if enforceP7C { + // Write certs as .p7c to certDir. + ok, err := saveCertsAsP7C(certs, outFile, overwrite) + if err != nil { + return 0, false, err + } + return len(certs), ok, nil + } + + // Copy inFile to certDir (may be .pem or p7c) + ok, err := CopyFile(inFile, outFile, overwrite) + if err != nil { + return 0, false, err + } + return len(certs), ok, nil +} + +func InspectCertificate(cert *x509.Certificate) (string, error) { + return model.CertString(cert), nil +} diff --git a/pkg/pdfcpu/color/color.go b/pkg/pdfcpu/color/color.go index b66ac4d3..0a77ee1b 100644 --- a/pkg/pdfcpu/color/color.go +++ b/pkg/pdfcpu/color/color.go @@ -36,6 +36,7 @@ var ( Red = SimpleColor{1, 0, 0} Green = SimpleColor{0, 1, 0} Blue = SimpleColor{0, 0, 1} + Yellow = SimpleColor{.5, .5, 0} ) var ErrInvalidColor = errors.New("pdfcpu: invalid color constant") diff --git a/pkg/pdfcpu/create/create.go b/pkg/pdfcpu/create/create.go index 2bbcf312..0d258abe 100644 --- a/pkg/pdfcpu/create/create.go +++ b/pkg/pdfcpu/create/create.go @@ -332,7 +332,7 @@ func CreatePage( } for _, la := range p.LinkAnnots { - d, err := la.RenderDict(xRefTable, *pageDictIndRef) + d, err := la.RenderDict(xRefTable, pageDictIndRef) if err != nil { return nil, nil, &json.UnsupportedTypeError{} } @@ -382,7 +382,7 @@ func UpdatePage(xRefTable *model.XRefTable, dIndRef types.IndirectRef, d, res ty } for _, la := range p.LinkAnnots { - d, err := la.RenderDict(xRefTable, dIndRef) + d, err := la.RenderDict(xRefTable, &dIndRef) if err != nil { return err } diff --git a/pkg/pdfcpu/createAnnotations.go b/pkg/pdfcpu/createAnnotations.go index 1e9124fb..59fa4797 100644 --- a/pkg/pdfcpu/createAnnotations.go +++ b/pkg/pdfcpu/createAnnotations.go @@ -537,7 +537,7 @@ func createFileAttachmentAnnotation(xRefTable *model.XRefTable, pageIndRef types fn := filepath.Base(fileName) - s, err := types.EscapeUTF16String(fn) + s, err := types.EscapedUTF16String(fn) if err != nil { return nil, err } @@ -583,7 +583,7 @@ func createFileSpecDict(xRefTable *model.XRefTable, fileName string) (types.Dict } fn := filepath.Base(fileName) - s, err := types.EscapeUTF16String(fn) + s, err := types.EscapedUTF16String(fn) if err != nil { return nil, err } @@ -682,9 +682,9 @@ func createMovieAnnotation(xRefTable *model.XRefTable, pageIndRef types.Indirect return xRefTable.IndRefForNewObject(d) } -func createMediaRenditionAction(xRefTable *model.XRefTable, mediaClipDataDict *types.IndirectRef) types.Dict { +func createMediaRenditionAction(mediaClipDataDict *types.IndirectRef) types.Dict { - r := createMediaRendition(xRefTable, mediaClipDataDict) + r := createMediaRendition(mediaClipDataDict) return types.Dict( map[string]types.Object{ @@ -717,7 +717,7 @@ func createScreenAnnotation(xRefTable *model.XRefTable, pageIndRef types.Indirec return nil, err } - mediaRenditionAction := createMediaRenditionAction(xRefTable, ir) + mediaRenditionAction := createMediaRenditionAction(ir) selectorRenditionAction := createSelectorRenditionAction(ir) diff --git a/pkg/pdfcpu/createRenditions.go b/pkg/pdfcpu/createRenditions.go index 76cc3b0e..dcf0c065 100644 --- a/pkg/pdfcpu/createRenditions.go +++ b/pkg/pdfcpu/createRenditions.go @@ -267,7 +267,7 @@ func createScreenParamsDict() *types.Dict { return &d1 } -func createMediaRendition(xRefTable *model.XRefTable, mediaClipDataDict *types.IndirectRef) *types.Dict { +func createMediaRendition(mediaClipDataDict *types.IndirectRef) *types.Dict { mhbe := createMHBEDict() diff --git a/pkg/pdfcpu/createTestPDF.go b/pkg/pdfcpu/createTestPDF.go index 700d01a2..617e4140 100644 --- a/pkg/pdfcpu/createTestPDF.go +++ b/pkg/pdfcpu/createTestPDF.go @@ -34,13 +34,25 @@ var ( ) func CreateXRefTableWithRootDict() (*model.XRefTable, error) { + // TODO + //xRefTable := model.NewXRefTable(nil) xRefTable := &model.XRefTable{ - Table: map[int]*model.XRefTableEntry{}, - Names: map[string]*model.Node{}, - PageAnnots: map[int]model.PgAnnots{}, - Stats: model.NewPDFStats(), - URIs: map[int]map[string]string{}, - UsedGIDs: map[string]map[uint16]bool{}, + Table: map[int]*model.XRefTableEntry{}, + Names: map[string]*model.Node{}, + NameRefs: map[string]model.NameMap{}, + KeywordList: types.StringSet{}, + Properties: map[string]string{}, + LinearizationObjs: types.IntSet{}, + PageAnnots: map[int]model.PgAnnots{}, + PageThumbs: map[int]types.IndirectRef{}, + Signatures: map[int]map[int]model.Signature{}, + Stats: model.NewPDFStats(), + ValidationMode: model.ValidationRelaxed, + ValidateLinks: false, + URIs: map[int]map[string]string{}, + UsedGIDs: map[string]map[uint16]bool{}, + FillFonts: map[string]types.IndirectRef{}, + Conf: nil, } xRefTable.Table[0] = model.NewFreeHeadXRefTableEntry() @@ -222,7 +234,7 @@ func CreateResourceDictInheritanceDemoXRef() (*model.XRefTable, error) { return xRefTable, nil } -func createFunctionalShadingDict(xRefTable *model.XRefTable) types.Dict { +func createFunctionalShadingDict() types.Dict { f := types.Dict( map[string]types.Object{ "FunctionType": types.Integer(2), @@ -241,7 +253,7 @@ func createFunctionalShadingDict(xRefTable *model.XRefTable) types.Dict { return d } -func createRadialShadingDict(xRefTable *model.XRefTable) types.Dict { +func createRadialShadingDict() types.Dict { f := types.Dict( map[string]types.Object{ "FunctionType": types.Integer(2), @@ -347,9 +359,9 @@ func addResources(xRefTable *model.XRefTable, pageDict types.Dict, fontName stri return err } - functionalBasedShDict := createFunctionalShadingDict(xRefTable) + functionalBasedShDict := createFunctionalShadingDict() - radialShDict := createRadialShadingDict(xRefTable) + radialShDict := createRadialShadingDict() f := types.Dict( map[string]types.Object{ @@ -1151,7 +1163,7 @@ func addThreads(xRefTable *model.XRefTable, rootDict types.Dict, pageIndRef type return nil } -func addOpenAction(xRefTable *model.XRefTable, rootDict types.Dict) error { +func addOpenAction(rootDict types.Dict) error { nextActionDict := types.Dict( map[string]types.Object{ "Type": types.Name("Action"), @@ -1176,7 +1188,7 @@ func addOpenAction(xRefTable *model.XRefTable, rootDict types.Dict) error { return nil } -func addURI(xRefTable *model.XRefTable, rootDict types.Dict) { +func addURI(rootDict types.Dict) { d := types.NewDict() d.InsertString("Base", "http://www.adobe.com") @@ -1214,7 +1226,7 @@ func addSpiderInfo(xRefTable *model.XRefTable, rootDict types.Dict) error { return nil } -func addOCProperties(xRefTable *model.XRefTable, rootDict types.Dict) error { +func addOCProperties(rootDict types.Dict) error { usageAppDict := types.Dict( map[string]types.Object{ "Event": types.Name("View"), @@ -1251,7 +1263,7 @@ func addOCProperties(xRefTable *model.XRefTable, rootDict types.Dict) error { return nil } -func addRequirements(xRefTable *model.XRefTable, rootDict types.Dict) { +func addRequirements(rootDict types.Dict) { d := types.NewDict() d.InsertName("Type", "Requirement") d.InsertName("S", "EnableJavaScripts") @@ -1283,24 +1295,24 @@ func CreateAnnotationDemoXRef() (*model.XRefTable, error) { return nil, err } - err = addOpenAction(xRefTable, rootDict) + err = addOpenAction(rootDict) if err != nil { return nil, err } - addURI(xRefTable, rootDict) + addURI(rootDict) err = addSpiderInfo(xRefTable, rootDict) if err != nil { return nil, err } - err = addOCProperties(xRefTable, rootDict) + err = addOCProperties(rootDict) if err != nil { return nil, err } - addRequirements(xRefTable, rootDict) + addRequirements(rootDict) return xRefTable, nil } @@ -1949,7 +1961,7 @@ func CreateContextWithXRefTable(conf *model.Configuration, pageDim *types.Dim) ( return CreateContext(xRefTable, conf), nil } -func createDemoContentStreamDict(xRefTable *model.XRefTable, pageDict types.Dict, b []byte) (*types.IndirectRef, error) { +func createDemoContentStreamDict(xRefTable *model.XRefTable, b []byte) (*types.IndirectRef, error) { sd, _ := xRefTable.NewStreamDictForBuf(b) if err := sd.Encode(); err != nil { return nil, err @@ -1980,7 +1992,7 @@ func createDemoPage(xRefTable *model.XRefTable, parentPageIndRef types.IndirectR pageDict.Insert("Resources", resDict) } - ir, err := createDemoContentStreamDict(xRefTable, pageDict, p.Buf.Bytes()) + ir, err := createDemoContentStreamDict(xRefTable, p.Buf.Bytes()) if err != nil { return nil, err } diff --git a/pkg/pdfcpu/crypto.go b/pkg/pdfcpu/crypto.go index 44c1e9ee..7b3890ac 100644 --- a/pkg/pdfcpu/crypto.go +++ b/pkg/pdfcpu/crypto.go @@ -26,10 +26,12 @@ import ( "crypto/rand" "crypto/rc4" "crypto/sha256" + "crypto/sha512" "encoding/binary" "encoding/hex" "fmt" "io" + "math/big" "strconv" "time" @@ -37,6 +39,9 @@ import ( "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" + + "golang.org/x/text/secure/precis" + "golang.org/x/text/unicode/norm" ) var ( @@ -99,6 +104,7 @@ var ( model.IMPORTBOOKMARKS: {0, 1}, model.EXPORTBOOKMARKS: {0, 1}, model.LISTIMAGES: {0, 1}, + model.UPDATEIMAGES: {0, 1}, model.CREATE: {0, 0}, model.DUMP: {0, 1}, model.LISTFORMFIELDS: {0, 0}, @@ -117,14 +123,14 @@ var ( model.LISTVIEWERPREFERENCES: {0, 1}, model.SETVIEWERPREFERENCES: {0, 1}, model.RESETVIEWERPREFERENCES: {0, 1}, + model.ZOOM: {0, 1}, } - ErrUnknownEncryption = errors.New("pdfcpu: PDF 2.0 encryption not supported") + ErrUnknownEncryption = errors.New("pdfcpu: unknown encryption") ) // NewEncryptDict creates a new EncryptDict using the standard security handler. -func newEncryptDict(needAES bool, keyLength int, permissions int16) types.Dict { - +func newEncryptDict(v model.Version, needAES bool, keyLength int, permissions int16) types.Dict { d := types.NewDict() d.Insert("Filter", types.Name("Standard")) @@ -135,8 +141,11 @@ func newEncryptDict(needAES bool, keyLength int, permissions int16) types.Dict { if keyLength == 256 { i = 5 } - d.Insert("R", types.Integer(i)) d.Insert("V", types.Integer(i)) + if v == model.V20 { + i++ + } + d.Insert("R", types.Integer(i)) } else { d.Insert("R", types.Integer(2)) d.Insert("V", types.Integer(1)) @@ -183,7 +192,6 @@ func newEncryptDict(needAES bool, keyLength int, permissions int16) types.Dict { } func encKey(userpw string, e *model.Enc) (key []byte) { - // 2a pw := []byte(userpw) if len(pw) >= 32 { @@ -235,11 +243,14 @@ func encKey(userpw string, e *model.Enc) (key []byte) { // validateUserPassword validates the user password aka document open password. func validateUserPassword(ctx *model.Context) (ok bool, err error) { - if ctx.E.R == 5 { return validateUserPasswordAES256(ctx) } + if ctx.E.R == 6 { + return validateUserPasswordAES256Rev6(ctx) + } + // Alg.4/5 p63 // 4a/5a create encryption key using Alg.2 p61 @@ -263,7 +274,6 @@ func validateUserPassword(ctx *model.Context) (ok bool, err error) { } func key(ownerpw, userpw string, r, l int) (key []byte) { - // 3a pw := []byte(ownerpw) if len(pw) == 0 { @@ -301,7 +311,6 @@ func key(ownerpw, userpw string, r, l int) (key []byte) { // O calculates the owner password digest. func o(ctx *model.Context) ([]byte, error) { - ownerpw := ctx.OwnerPW userpw := ctx.UserPW @@ -348,10 +357,8 @@ func o(ctx *model.Context) ([]byte, error) { // U calculates the user password digest. func u(ctx *model.Context) (u []byte, key []byte, err error) { - - // The PW string is generated from OS codepage characters by first converting the string to - // PDFDocEncoding. If input is Unicode, first convert to a codepage encoding , and then to - // PDFDocEncoding for backward compatibility. + // The PW string is generated from OS codepage characters by first converting the string to PDFDocEncoding. + // If input is Unicode, first convert to a codepage encoding , and then to PDFDocEncoding for backward compatibility. userpw := ctx.UserPW //fmt.Printf("U userpw=ctx.UserPW=%s\n", userpw) @@ -414,21 +421,41 @@ func validationSalt(bb []byte) []byte { } func keySalt(bb []byte) []byte { - return bb[40:] + return bb[40:48] } -func validateOwnerPasswordAES256(ctx *model.Context) (ok bool, err error) { +func decryptOE(ctx *model.Context, opw []byte) error { + b := append(opw, keySalt(ctx.E.O)...) + b = append(b, ctx.E.U...) + key := sha256.Sum256(b) + + cb, err := aes.NewCipher(key[:]) + if err != nil { + return err + } + + iv := make([]byte, 16) + ctx.EncKey = make([]byte, 32) + + mode := cipher.NewCBCDecrypter(cb, iv) + mode.CryptBlocks(ctx.EncKey, ctx.E.OE) + return nil +} + +func validateOwnerPasswordAES256(ctx *model.Context) (ok bool, err error) { if len(ctx.OwnerPW) == 0 { return false, nil } - // TODO Process PW with SASLPrep profile (RFC 4013) of stringprep (RFC 3454). - opw := []byte(ctx.OwnerPW) + opw, err := processInput(ctx.OwnerPW) + if err != nil { + return false, err + } + if len(opw) > 127 { opw = opw[:127] } - //fmt.Printf("opw <%s> isValidUTF8String: %t\n", opw, utf8.Valid(opw)) // Algorithm 3.2a 3. b := append(opw, validationSalt(ctx.E.O)...) @@ -439,32 +466,39 @@ func validateOwnerPasswordAES256(ctx *model.Context) (ok bool, err error) { return false, nil } - b = append(opw, keySalt(ctx.E.O)...) - b = append(b, ctx.E.U...) - key := sha256.Sum256(b) + if err := decryptOE(ctx, opw); err != nil { + return false, err + } + + return true, nil +} + +func decryptUE(ctx *model.Context, upw []byte) error { + key := sha256.Sum256(append(upw, keySalt(ctx.E.U)...)) cb, err := aes.NewCipher(key[:]) if err != nil { - return false, err + return err } iv := make([]byte, 16) ctx.EncKey = make([]byte, 32) mode := cipher.NewCBCDecrypter(cb, iv) - mode.CryptBlocks(ctx.EncKey, ctx.E.OE) + mode.CryptBlocks(ctx.EncKey, ctx.E.UE) - return true, nil + return nil } func validateUserPasswordAES256(ctx *model.Context) (ok bool, err error) { + upw, err := processInput(ctx.UserPW) + if err != nil { + return false, err + } - // TODO Process PW with SASLPrep profile (RFC 4013) of stringprep (RFC 3454). - upw := []byte(ctx.UserPW) if len(upw) > 127 { upw = upw[:127] } - //fmt.Printf("upw <%s> isValidUTF8String: %t\n", upw, utf8.Valid(upw)) // Algorithm 3.2a 4, s := sha256.Sum256(append(upw, validationSalt(ctx.E.U)...)) @@ -473,7 +507,112 @@ func validateUserPasswordAES256(ctx *model.Context) (ok bool, err error) { return false, nil } - key := sha256.Sum256(append(upw, keySalt(ctx.E.U)...)) + if err := decryptUE(ctx, upw); err != nil { + return false, err + } + + return true, nil +} + +func processInput(input string) ([]byte, error) { + // Create a new Precis profile for SASLprep + p := precis.NewIdentifier( + precis.BidiRule, + precis.Norm(norm.NFKC), + ) + + output, err := p.String(input) + if err != nil { + return nil, err + } + + return []byte(output), nil +} + +func hashRev6(input, pw, U []byte) ([]byte, int, error) { + // 7.6.4.3.4 Algorithm 2.B returns 32 bytes. + + mod3 := new(big.Int).SetUint64(3) + + k0 := sha256.Sum256(input) + k := k0[:] + + var e []byte + j := 0 + + for ; j < 64 || e[len(e)-1] > byte(j-32); j++ { + var k1 []byte + bb := append(pw, k...) + if len(U) > 0 { + bb = append(bb, U...) + } + for i := 0; i < 64; i++ { + k1 = append(k1, bb...) + } + + cb, err := aes.NewCipher(k[:16]) + if err != nil { + return nil, -1, err + } + + iv := k[16:32] + e = make([]byte, len(k1)) + mode := cipher.NewCBCEncrypter(cb, iv) + mode.CryptBlocks(e, k1) + + num := new(big.Int).SetBytes(e[:16]) + r := (new(big.Int).Mod(num, mod3)).Uint64() + + switch r { + case 0: + k0 := sha256.Sum256(e) + k = k0[:] + case 1: + k0 := sha512.Sum384(e) + k = k0[:] + case 2: + k0 := sha512.Sum512(e) + k = k0[:] + } + + } + + return k[:32], j, nil +} + +func validateOwnerPasswordAES256Rev6(ctx *model.Context) (ok bool, err error) { + if len(ctx.OwnerPW) == 0 { + return false, nil + } + + // Process PW with SASLPrep profile (RFC 4013) of stringprep (RFC 3454). + opw, err := processInput(ctx.OwnerPW) + if err != nil { + return false, err + } + + if len(opw) > 127 { + opw = opw[:127] + } + + // Algorithm 12 + bb := append(opw, validationSalt(ctx.E.O)...) + bb = append(bb, ctx.E.U...) + s, _, err := hashRev6(bb, opw, ctx.E.U) + if err != nil { + return false, err + } + + if !bytes.HasPrefix(ctx.E.O, s[:]) { + return false, nil + } + + bb = append(opw, keySalt(ctx.E.O)...) + bb = append(bb, ctx.E.U...) + key, _, err := hashRev6(bb, opw, ctx.E.U) + if err != nil { + return false, err + } cb, err := aes.NewCipher(key[:]) if err != nil { @@ -484,23 +623,67 @@ func validateUserPasswordAES256(ctx *model.Context) (ok bool, err error) { ctx.EncKey = make([]byte, 32) mode := cipher.NewCBCDecrypter(cb, iv) - mode.CryptBlocks(ctx.EncKey, ctx.E.UE) + mode.CryptBlocks(ctx.EncKey, ctx.E.OE) + + return true, nil +} + +func validateUserPasswordAES256Rev6(ctx *model.Context) (bool, error) { + if len(ctx.E.UE) != 32 { + return false, errors.New("UE: invalid length") + } + + upw, err := processInput(ctx.UserPW) + if err != nil { + return false, err + } + if len(upw) > 127 { + upw = upw[:127] + } + + // Validate U prefix + bb := append([]byte{}, upw...) + bb = append(bb, validationSalt(ctx.E.U)...) + s, _, err := hashRev6(bb, upw, nil) + if err != nil { + return false, err + } + if !bytes.HasPrefix(ctx.E.U, s) { + return false, nil + } + + // Derive decryption key + bb = append([]byte{}, upw...) + bb = append(bb, keySalt(ctx.E.U)...) + key, _, err := hashRev6(bb, upw, nil) + if err != nil { + return false, err + } + + block, err := aes.NewCipher(key) + if err != nil { + return false, err + } + + iv := make([]byte, 16) + encKey := make([]byte, 32) + cipher.NewCBCDecrypter(block, iv).CryptBlocks(encKey, ctx.E.UE) + ctx.EncKey = encKey return true, nil } // ValidateOwnerPassword validates the owner password aka change permissions password. func validateOwnerPassword(ctx *model.Context) (ok bool, err error) { - e := ctx.E if e.R == 5 { return validateOwnerPasswordAES256(ctx) } - // The PW string is generated from OS codepage characters by first converting the string to - // PDFDocEncoding. If input is Unicode, first convert to a codepage encoding , and then to - // PDFDocEncoding for backward compatibility. + if e.R == 6 { + return validateOwnerPasswordAES256Rev6(ctx) + } ownerpw := ctx.OwnerPW userpw := ctx.UserPW @@ -554,29 +737,67 @@ func validateOwnerPassword(ctx *model.Context) (ok bool, err error) { return ok, err } -// SupportedCFEntry returns true if all entries found are supported. -func supportedCFEntry(d types.Dict) (bool, error) { +func validateCFLength(len int, cfm *string) bool { + // See table 25 Length + + if cfm != nil { + if (*cfm == "AESV2" && len != 16) || (*cfm == "AESV3" && len != 32) { + return false + } + } + + // Standard security handler expresses in bytes. + minBytes, maxBytes := 5, 32 + if len < minBytes { + return false + } + if len <= maxBytes { + return true + } + + // Public security handler expresses in bits. + minBits, maxBits := 40, 256 + if len < minBits || len > maxBits { + return false + } + + if len%8 > 0 { + return false + } + + return true +} +func supportedCFEntry(d types.Dict) (bool, error) { cfm := d.NameEntry("CFM") if cfm != nil && *cfm != "V2" && *cfm != "AESV2" && *cfm != "AESV3" { return false, errors.New("pdfcpu: supportedCFEntry: invalid entry \"CFM\"") } + aes := cfm != nil && (*cfm == "AESV2" || *cfm == "AESV3") + ae := d.NameEntry("AuthEvent") if ae != nil && *ae != "DocOpen" { - return false, errors.New("pdfcpu: supportedCFEntry: invalid entry \"AuthEvent\"") + return aes, errors.New("pdfcpu: supportedCFEntry: invalid entry \"AuthEvent\"") } - l := d.IntEntry("Length") - if l != nil && (*l < 5 || *l > 16) && *l != 32 && *l != 256 { - return false, errors.New("pdfcpu: supportedCFEntry: invalid entry \"Length\"") + len := d.IntEntry("Length") + if len == nil { + return aes, nil } - return cfm != nil && (*cfm == "AESV2" || *cfm == "AESV3"), nil + if !validateCFLength(*len, cfm) { + s := "" + if cfm != nil { + s = *cfm + } + return false, errors.Errorf("pdfcpu: supportedCFEntry: invalid entry \"Length\" %d %s", *len, s) + } + + return aes, nil } func perms(p int) (list []string) { - list = append(list, fmt.Sprintf("permission bits: %012b (x%03X)", uint32(p)&0x0F3C, uint32(p)&0x0F3C)) list = append(list, fmt.Sprintf("Bit 3: %t (print(rev2), print quality(rev>=3))", p&0x0004 > 0)) list = append(list, fmt.Sprintf("Bit 4: %t (modify other than controlled by bits 6,9,11)", p&0x0008 > 0)) @@ -586,13 +807,11 @@ func perms(p int) (list []string) { list = append(list, fmt.Sprintf("Bit 10: %t (extract(rev>=3))", p&0x0200 > 0)) list = append(list, fmt.Sprintf("Bit 11: %t (modify(rev>=3))", p&0x0400 > 0)) list = append(list, fmt.Sprintf("Bit 12: %t (print high-level(rev>=3))", p&0x0800 > 0)) - return list } // PermissionsList returns a list of set permissions. func PermissionsList(p int) (list []string) { - if p == 0 { return append(list, "Full access") } @@ -602,7 +821,6 @@ func PermissionsList(p int) (list []string) { // Permissions returns a list of set permissions. func Permissions(ctx *model.Context) (list []string) { - p := 0 if ctx.E != nil { p = ctx.E.P @@ -612,10 +830,9 @@ func Permissions(ctx *model.Context) (list []string) { } func validatePermissions(ctx *model.Context) (bool, error) { - // Algorithm 3.2a 5. - if ctx.E.R != 5 { + if ctx.E.R != 5 && ctx.E.R != 6 { return true, nil } @@ -635,10 +852,9 @@ func validatePermissions(ctx *model.Context) (bool, error) { } func writePermissions(ctx *model.Context, d types.Dict) error { - // Algorithm 3.10 - if ctx.E.R != 5 { + if ctx.E.R != 5 && ctx.E.R != 6 { return nil } @@ -682,7 +898,6 @@ func logP(enc *model.Enc) { } func maskExtract(mode model.CommandMode, secHandlerRev int) int { - p, ok := perm[mode] // no permissions defined or don't need extract permission @@ -700,7 +915,6 @@ func maskExtract(mode model.CommandMode, secHandlerRev int) int { } func maskModify(mode model.CommandMode, secHandlerRev int) int { - p, ok := perm[mode] // no permissions defined or don't need modify permission @@ -719,7 +933,6 @@ func maskModify(mode model.CommandMode, secHandlerRev int) int { // HasNeededPermissions returns true if permissions for pdfcpu processing are present. func hasNeededPermissions(mode model.CommandMode, enc *model.Enc) bool { - // see 7.6.3.2 logP(enc) @@ -741,18 +954,25 @@ func hasNeededPermissions(mode model.CommandMode, enc *model.Enc) bool { return true } -func getV(d types.Dict) (*int, error) { - +func getV(ctx *model.Context, d types.Dict, l int) (*int, error) { v := d.IntEntry("V") if v == nil || (*v != 1 && *v != 2 && *v != 4 && *v != 5) { return nil, errors.Errorf("getV: \"V\" must be one of 1,2,4,5") } + if *v == 5 { + if l != 256 { + return nil, errors.Errorf("getV: \"V\" 5 invalid length, must be 256, got %d", l) + } + if ctx.XRefTable.Version() != model.V20 && ctx.XRefTable.ValidationMode == model.ValidationStrict { + return nil, errors.New("getV: 5 valid for PDF 2.0 only") + } + } + return v, nil } func checkStmf(ctx *model.Context, stmf *string, cfDict types.Dict) error { - if stmf != nil && *stmf != "Identity" { d := cfDict.DictEntry(*stmf) @@ -770,9 +990,8 @@ func checkStmf(ctx *model.Context, stmf *string, cfDict types.Dict) error { return nil } -func checkV(ctx *model.Context, d types.Dict) (*int, error) { - - v, err := getV(d) +func checkV(ctx *model.Context, d types.Dict, l int) (*int, error) { + v, err := getV(ctx, d, l) if err != nil { return nil, err } @@ -827,7 +1046,6 @@ func checkV(ctx *model.Context, d types.Dict) (*int, error) { } func length(d types.Dict) (int, error) { - l := d.IntEntry("Length") if l == nil { return 40, nil @@ -840,23 +1058,27 @@ func length(d types.Dict) (int, error) { return *l, nil } -func getR(d types.Dict) (int, error) { +func getR(ctx *model.Context, d types.Dict) (int, error) { + maxR := 5 + if ctx.XRefTable.Version() == model.V20 || ctx.XRefTable.ValidationMode == model.ValidationRelaxed { + maxR = 6 + } r := d.IntEntry("R") - if r == nil || *r < 2 || *r > 5 { - if r != nil && *r > 5 { - return 0, ErrUnknownEncryption - } - return 0, errors.New("pdfcpu: encryption: \"R\" must be 2,3,4,5") + if r == nil || *r < 2 || *r > maxR { + return 0, ErrUnknownEncryption } return *r, nil } func validateAlgorithm(ctx *model.Context) (ok bool) { - k := ctx.EncryptKeyLength + if ctx.XRefTable.Version() == model.V20 { + return ctx.EncryptUsingAES && k == 256 + } + if ctx.EncryptUsingAES { return k == 40 || k == 128 || k == 256 } @@ -865,76 +1087,86 @@ func validateAlgorithm(ctx *model.Context) (ok bool) { } func validateAES256Parameters(d types.Dict) (oe, ue, perms []byte, err error) { + // OE + oe, err = d.StringEntryBytes("OE") + if err != nil { + return nil, nil, nil, err + } + if len(oe) != 32 { + return nil, nil, nil, errors.New("pdfcpu: encryption dictionary: 'OE' entry missing or not 32 bytes") + } - for { - - // OE - oe, err = d.StringEntryBytes("OE") - if err != nil { - break - } - if oe == nil || len(oe) != 32 { - err = errors.New("pdfcpu: unsupported encryption: required entry \"OE\" missing or invalid") - break - } - - // UE - ue, err = d.StringEntryBytes("UE") - if err != nil { - break - } - if ue == nil || len(ue) != 32 { - err = errors.New("pdfcpu: unsupported encryption: required entry \"UE\" missing or invalid") - break - } - - // Perms - perms, err = d.StringEntryBytes("Perms") - if err != nil { - break - } - if perms == nil || len(perms) != 16 { - err = errors.New("pdfcpu: unsupported encryption: required entry \"Perms\" missing or invalid") - } + // UE + ue, err = d.StringEntryBytes("UE") + if err != nil { + return nil, nil, nil, err + } + if len(ue) != 32 { + return nil, nil, nil, errors.New("pdfcpu: encryption dictionary: 'UE' entry missing or not 32 bytes") + } - break + // Perms + perms, err = d.StringEntryBytes("Perms") + if err != nil { + return nil, nil, nil, err + } + if len(perms) != 16 { + return nil, nil, nil, errors.New("pdfcpu: encryption dictionary: 'Perms' entry missing or not 16 bytes") } - return oe, ue, perms, err + return oe, ue, perms, nil } -func validateOAndU(d types.Dict) (o, u []byte, err error) { - - for { +func validateOAndU(ctx *model.Context, d types.Dict, r int) (o, u []byte, err error) { + // O, 32 bytes long if the value of R is 4 or less and 48 bytes long if the value of R is 6. + o, err = d.StringEntryBytes("O") + if err != nil { + return nil, nil, err + } - // O - o, err = d.StringEntryBytes("O") - if err != nil { - break + if ctx.XRefTable.ValidationMode == model.ValidationStrict { + if r == 6 && len(o) < 48 { + return nil, nil, errors.New("pdfcpu: unsupported encryption: missing or invalid required entry \"O\"") } - if o == nil || len(o) != 32 && len(o) != 48 { - err = errors.New("pdfcpu: unsupported encryption: missing or invalid required entry \"O\"") - break + if r <= 4 && len(o) < 32 { + return nil, nil, errors.New("pdfcpu: unsupported encryption: missing or invalid required entry \"O\"") } + } - // U - u, err = d.StringEntryBytes("U") - if err != nil { - break + // if l := len(o); l != 32 && l != 48 { + // if ctx.XRefTable.ValidationMode == model.ValidationStrict || l < 48 { + // return nil, nil, errors.New("pdfcpu: unsupported encryption: missing or invalid required entry \"O\"") + // } + // o = o[:48] // len(o) > 48, truncate + // } + + // U, 32 bytes long if the value of R is 4 or less and 48 bytes long if the value of R is 6. + u, err = d.StringEntryBytes("U") + if err != nil { + return nil, nil, err + } + + if ctx.XRefTable.ValidationMode == model.ValidationStrict { + if r == 6 && len(u) < 48 { + return nil, nil, errors.New("pdfcpu: unsupported encryption: missing or invalid required entry \"O\"") } - if u == nil || len(u) != 32 && len(u) != 48 { - err = errors.New("pdfcpu: unsupported encryption: missing or invalid required entry \"U\"") + if r <= 4 && len(u) < 32 { + return nil, nil, errors.New("pdfcpu: unsupported encryption: missing or invalid required entry \"O\"") } - - break } - return o, u, err + // if l := len(u); l != 32 && l != 48 { + // if ctx.XRefTable.ValidationMode == model.ValidationStrict || l < 48 { // Fix 1163 + // return nil, nil, errors.New("pdfcpu: unsupported encryption: missing or invalid required entry \"U\"") + // } + // u = u[:48] + // } + + return o, u, nil } // SupportedEncryption returns a pointer to a struct encapsulating used encryption. func supportedEncryption(ctx *model.Context, d types.Dict) (*model.Enc, error) { - // Filter filter := d.NameEntry("Filter") if filter == nil || *filter != "Standard" { @@ -946,31 +1178,31 @@ func supportedEncryption(ctx *model.Context, d types.Dict) (*model.Enc, error) { return nil, errors.New("pdfcpu: unsupported encryption: \"SubFilter\" not supported") } - // V - v, err := checkV(ctx, d) + // Length + l, err := length(d) if err != nil { return nil, err } - // Length - l, err := length(d) + // V + v, err := checkV(ctx, d, l) if err != nil { return nil, err } // R - r, err := getR(d) + r, err := getR(ctx, d) if err != nil { return nil, err } - o, u, err := validateOAndU(d) + o, u, err := validateOAndU(ctx, d, r) if err != nil { return nil, err } var oe, ue, perms []byte - if r == 5 { + if r == 5 || r == 6 { oe, ue, perms, err = validateAES256Parameters(d) if err != nil { return nil, err @@ -1005,7 +1237,6 @@ func supportedEncryption(ctx *model.Context, d types.Dict) (*model.Enc, error) { } func decryptKey(objNumber, generation int, key []byte, aes bool) []byte { - m := md5.New() nr := uint32(objNumber) @@ -1034,69 +1265,31 @@ func decryptKey(objNumber, generation int, key []byte, aes bool) []byte { // EncryptBytes encrypts s using RC4 or AES. func encryptBytes(b []byte, objNr, genNr int, encKey []byte, needAES bool, r int) ([]byte, error) { - if needAES { k := encKey - if r != 5 { + if r != 5 && r != 6 { k = decryptKey(objNr, genNr, encKey, needAES) } - bb, err := encryptAESBytes(b, k) - if err != nil { - return nil, err - } - return bb, nil + return encryptAESBytes(b, k) } return applyRC4CipherBytes(b, objNr, genNr, encKey, needAES) } -// EncryptString encrypts s using RC4 or AES. -func encryptString(s string, objNr, genNr int, key []byte, needAES bool, r int) (*string, error) { - - b, err := encryptBytes([]byte(s), objNr, genNr, key, needAES, r) - if err != nil { - return nil, err - } - - s1, err := types.Escape(string(b)) - if err != nil { - return nil, err - } - - return s1, err -} - // decryptBytes decrypts bb using RC4 or AES. func decryptBytes(b []byte, objNr, genNr int, encKey []byte, needAES bool, r int) ([]byte, error) { - if needAES { k := encKey - if r != 5 { + if r != 5 && r != 6 { k = decryptKey(objNr, genNr, encKey, needAES) } - bb, err := decryptAESBytes(b, k) - if err != nil { - return nil, err - } - return bb, nil + return decryptAESBytes(b, k) } return applyRC4CipherBytes(b, objNr, genNr, encKey, needAES) } -// decryptString decrypts s using RC4 or AES. -func decryptString(s string, objNr, genNr int, key []byte, needAES bool, r int) ([]byte, error) { - - bb, err := types.Unescape(s) - if err != nil { - return nil, err - } - - return decryptBytes(bb, objNr, genNr, key, needAES, r) -} - func applyRC4CipherBytes(b []byte, objNr, genNr int, key []byte, needAES bool) ([]byte, error) { - c, err := rc4.NewCipher(decryptKey(objNr, genNr, key, needAES)) if err != nil { return nil, err @@ -1108,14 +1301,13 @@ func applyRC4CipherBytes(b []byte, objNr, genNr int, key []byte, needAES bool) ( } func encrypt(m map[string]types.Object, k string, v types.Object, objNr, genNr int, key []byte, needAES bool, r int) error { - s, err := encryptDeepObject(v, objNr, genNr, key, needAES, r) if err != nil { return err } if s != nil { - m[k] = *s + m[k] = s } return nil @@ -1128,7 +1320,7 @@ func encryptDict(d types.Dict, objNr, genNr int, key []byte, needAES bool, r int ft = d["Type"] } if ft != nil { - if ftv, ok := ft.(types.Name); ok && ftv == "Sig" { + if ftv, ok := ft.(types.Name); ok && (ftv == "Sig" || ftv == "DocTimeStamp") { isSig = true } } @@ -1145,9 +1337,88 @@ func encryptDict(d types.Dict, objNr, genNr int, key []byte, needAES bool, r int return nil } -// EncryptDeepObject recurses over non trivial PDF objects and encrypts all strings encountered. -func encryptDeepObject(objIn types.Object, objNr, genNr int, key []byte, needAES bool, r int) (*types.HexLiteral, error) { +func encryptStringLiteral(sl types.StringLiteral, objNr, genNr int, key []byte, needAES bool, r int) (*types.StringLiteral, error) { + bb, err := types.Unescape(sl.Value()) + if err != nil { + return nil, err + } + + bb, err = encryptBytes(bb, objNr, genNr, key, needAES, r) + if err != nil { + return nil, err + } + + s, err := types.Escape(string(bb)) + if err != nil { + return nil, err + } + + sl = types.StringLiteral(*s) + + return &sl, nil +} + +func decryptStringLiteral(sl types.StringLiteral, objNr, genNr int, key []byte, needAES bool, r int) (*types.StringLiteral, error) { + if sl.Value() == "" { + return &sl, nil + } + bb, err := types.Unescape(sl.Value()) + if err != nil { + return nil, err + } + + bb, err = decryptBytes(bb, objNr, genNr, key, needAES, r) + if err != nil { + return nil, err + } + + s, err := types.Escape(string(bb)) + if err != nil { + return nil, err + } + + sl = types.StringLiteral(*s) + + return &sl, nil +} + +func encryptHexLiteral(hl types.HexLiteral, objNr, genNr int, key []byte, needAES bool, r int) (*types.HexLiteral, error) { + bb, err := hl.Bytes() + if err != nil { + return nil, err + } + + bb, err = encryptBytes(bb, objNr, genNr, key, needAES, r) + if err != nil { + return nil, err + } + + hl = types.NewHexLiteral(bb) + + return &hl, nil +} + +func decryptHexLiteral(hl types.HexLiteral, objNr, genNr int, key []byte, needAES bool, r int) (*types.HexLiteral, error) { + if hl.Value() == "" { + return &hl, nil + } + bb, err := hl.Bytes() + if err != nil { + return nil, err + } + + bb, err = decryptBytes(bb, objNr, genNr, key, needAES, r) + if err != nil { + return nil, err + } + + hl = types.NewHexLiteral(bb) + return &hl, nil +} + +// EncryptDeepObject recurses over non trivial PDF objects and encrypts all strings encountered. +func encryptDeepObject(objIn types.Object, objNr, genNr int, key []byte, needAES bool, r int) (types.Object, error) { _, ok := objIn.(types.IndirectRef) if ok { return nil, nil @@ -1174,26 +1445,23 @@ func encryptDeepObject(objIn types.Object, objNr, genNr int, key []byte, needAES return nil, err } if s != nil { - obj[i] = *s + obj[i] = s } } case types.StringLiteral: - s := obj.Value() - b, err := encryptBytes([]byte(s), objNr, genNr, key, needAES, r) + sl, err := encryptStringLiteral(obj, objNr, genNr, key, needAES, r) if err != nil { return nil, err } - hl := types.NewHexLiteral(b) - return &hl, nil + return *sl, nil case types.HexLiteral: - bb, err := encryptHexLiteral(obj, objNr, genNr, key, needAES, r) + hl, err := encryptHexLiteral(obj, objNr, genNr, key, needAES, r) if err != nil { return nil, err } - hl := types.NewHexLiteral(bb) - return &hl, nil + return *hl, nil default: @@ -1209,7 +1477,7 @@ func decryptDict(d types.Dict, objNr, genNr int, key []byte, needAES bool, r int ft = d["Type"] } if ft != nil { - if ftv, ok := ft.(types.Name); ok && ftv == "Sig" { + if ftv, ok := ft.(types.Name); ok && (ftv == "Sig" || ftv == "DocTimeStamp") { isSig = true } } @@ -1222,14 +1490,13 @@ func decryptDict(d types.Dict, objNr, genNr int, key []byte, needAES bool, r int return err } if s != nil { - d[k] = *s + d[k] = s } } return nil } -func decryptDeepObject(objIn types.Object, objNr, genNr int, key []byte, needAES bool, r int) (*types.HexLiteral, error) { - +func decryptDeepObject(objIn types.Object, objNr, genNr int, key []byte, needAES bool, r int) (types.Object, error) { _, ok := objIn.(types.IndirectRef) if ok { return nil, nil @@ -1249,25 +1516,23 @@ func decryptDeepObject(objIn types.Object, objNr, genNr int, key []byte, needAES return nil, err } if s != nil { - obj[i] = *s + obj[i] = s } } case types.StringLiteral: - bb, err := decryptString(obj.Value(), objNr, genNr, key, needAES, r) + sl, err := decryptStringLiteral(obj, objNr, genNr, key, needAES, r) if err != nil { return nil, err } - hl := types.NewHexLiteral(bb) - return &hl, nil + return *sl, nil case types.HexLiteral: - bb, err := decryptHexLiteral(obj, objNr, genNr, key, needAES, r) + hl, err := decryptHexLiteral(obj, objNr, genNr, key, needAES, r) if err != nil { return nil, err } - hl := types.NewHexLiteral(bb) - return &hl, nil + return *hl, nil default: @@ -1278,9 +1543,8 @@ func decryptDeepObject(objIn types.Object, objNr, genNr int, key []byte, needAES // EncryptStream encrypts a stream buffer using RC4 or AES. func encryptStream(buf []byte, objNr, genNr int, encKey []byte, needAES bool, r int) ([]byte, error) { - k := encKey - if r != 5 { + if r != 5 && r != 6 { k = decryptKey(objNr, genNr, encKey, needAES) } @@ -1293,9 +1557,8 @@ func encryptStream(buf []byte, objNr, genNr int, encKey []byte, needAES bool, r // decryptStream decrypts a stream buffer using RC4 or AES. func decryptStream(buf []byte, objNr, genNr int, encKey []byte, needAES bool, r int) ([]byte, error) { - k := encKey - if r != 5 { + if r != 5 && r != 6 { k = decryptKey(objNr, genNr, encKey, needAES) } @@ -1307,7 +1570,6 @@ func decryptStream(buf []byte, objNr, genNr int, encKey []byte, needAES bool, r } func applyRC4Bytes(buf, key []byte) ([]byte, error) { - c, err := rc4.NewCipher(key) if err != nil { return nil, err @@ -1326,7 +1588,6 @@ func applyRC4Bytes(buf, key []byte) ([]byte, error) { } func encryptAESBytes(b, key []byte) ([]byte, error) { - // pad b to aes.Blocksize l := len(b) % aes.BlockSize c := 0x10 @@ -1363,7 +1624,6 @@ func encryptAESBytes(b, key []byte) ([]byte, error) { } func decryptAESBytes(b, key []byte) ([]byte, error) { - if len(b) < aes.BlockSize { return nil, errors.New("pdfcpu: decryptAESBytes: Ciphertext too short") } @@ -1395,7 +1655,6 @@ func decryptAESBytes(b, key []byte) ([]byte, error) { } func fileID(ctx *model.Context) (types.HexLiteral, error) { - // see also 14.4 File Identifiers. // The calculation of the file identifier need not be reproducible; @@ -1415,7 +1674,7 @@ func fileID(ctx *model.Context) (types.HexLiteral, error) { h.Write([]byte(strconv.Itoa(ctx.Read.ReadFileSize()))) // All values of the info dict which is assumed to be there at this point. - if ctx.Version() < model.V20 { + if ctx.XRefTable.Version() < model.V20 { d, err := ctx.DereferenceDict(*ctx.Info) if err != nil { return "", err @@ -1434,87 +1693,77 @@ func fileID(ctx *model.Context) (types.HexLiteral, error) { return types.HexLiteral(hex.EncodeToString(m)), nil } -func encryptHexLiteral(hl types.HexLiteral, objNr, genNr int, key []byte, needAES bool, r int) ([]byte, error) { - - bb, err := hl.Bytes() - if err != nil { - return nil, err - } - - return encryptBytes(bb, objNr, genNr, key, needAES, r) +func calcFileEncKey(ctx *model.Context) error { + ctx.EncKey = make([]byte, 32) + _, err := io.ReadFull(rand.Reader, ctx.EncKey) + return err } -func decryptHexLiteral(hl types.HexLiteral, objNr, genNr int, key []byte, needAES bool, r int) ([]byte, error) { - - bb, err := hl.Bytes() +func calcOAndUAES256(ctx *model.Context, d types.Dict) (err error) { + b := make([]byte, 16) + _, err = io.ReadFull(rand.Reader, b) if err != nil { - return nil, err + return err } - return decryptBytes(bb, objNr, genNr, key, needAES, r) -} + u := append(make([]byte, 32), b...) + upw := []byte(ctx.UserPW) + h := sha256.Sum256(append(upw, validationSalt(u)...)) -func calcFileEncKeyFromUE(ctx *model.Context) (k []byte, err error) { + ctx.E.U = append(h[:], b...) + d.Update("U", types.HexLiteral(hex.EncodeToString(ctx.E.U))) - upw := []byte(ctx.OwnerPW) - key := sha256.Sum256(append(upw, keySalt(ctx.E.U)...)) + /////////////////////////////////// - cb, err := aes.NewCipher(key[:]) + b = make([]byte, 16) + _, err = io.ReadFull(rand.Reader, b) if err != nil { - return nil, err + return err } - iv := make([]byte, 16) - k = make([]byte, 32) - - mode := cipher.NewCBCDecrypter(cb, iv) - mode.CryptBlocks(k, ctx.E.UE) - - return k, nil -} - -// func calcFileEncKeyFromOE(ctx *model.Context) (k []byte, err error) { + o := append(make([]byte, 32), b...) + opw := []byte(ctx.OwnerPW) + c := append(opw, validationSalt(o)...) + h = sha256.Sum256(append(c, ctx.E.U...)) + ctx.E.O = append(h[:], b...) + d.Update("O", types.HexLiteral(hex.EncodeToString(ctx.E.O))) -// opw := []byte(ctx.OwnerPW) -// b := append(opw, keySalt(ctx.E.O)...) -// b = append(b, ctx.E.U...) -// key := sha256.Sum256(b) + ////////////////////////////////// -// cb, err := aes.NewCipher(key[:]) -// if err != nil { -// return nil, err -// } + if err := calcFileEncKey(ctx); err != nil { + return err + } -// iv := make([]byte, 16) -// k = make([]byte, 32) + ////////////////////////////////// -// mode := cipher.NewCBCDecrypter(cb, iv) -// mode.CryptBlocks(k, ctx.E.OE) + h = sha256.Sum256(append(upw, keySalt(u)...)) + cb, err := aes.NewCipher(h[:]) + if err != nil { + return err + } -// return k, nil -// } + iv := make([]byte, 16) + mode := cipher.NewCBCEncrypter(cb, iv) + mode.CryptBlocks(ctx.E.UE, ctx.EncKey) + d.Update("UE", types.HexLiteral(hex.EncodeToString(ctx.E.UE))) -func calcFileEncKey(ctx *model.Context, d types.Dict) (err error) { + ////////////////////////////////// - // Calc Random UE (32 bytes) - ue := make([]byte, 32) - _, err = io.ReadFull(rand.Reader, ue) + c = append(opw, keySalt(o)...) + h = sha256.Sum256(append(c, ctx.E.U...)) + cb, err = aes.NewCipher(h[:]) if err != nil { return err } - ctx.E.UE = ue - d.Update("UE", types.HexLiteral(hex.EncodeToString(ctx.E.UE))) - - // Calc file encryption key. - ctx.EncKey, err = calcFileEncKeyFromUE(ctx) + mode = cipher.NewCBCEncrypter(cb, iv) + mode.CryptBlocks(ctx.E.OE, ctx.EncKey) + d.Update("OE", types.HexLiteral(hex.EncodeToString(ctx.E.OE))) - return err + return nil } -func calcOAndUAES256(ctx *model.Context, d types.Dict) (err error) { - - // 1) Calc U. +func calcOAndUAES256Rev6(ctx *model.Context, d types.Dict) (err error) { b := make([]byte, 16) _, err = io.ReadFull(rand.Reader, b) if err != nil { @@ -1523,11 +1772,16 @@ func calcOAndUAES256(ctx *model.Context, d types.Dict) (err error) { u := append(make([]byte, 32), b...) upw := []byte(ctx.UserPW) - h := sha256.Sum256(append(upw, validationSalt(u)...)) + h, _, err := hashRev6(append(upw, validationSalt(u)...), upw, nil) + if err != nil { + return err + } + ctx.E.U = append(h[:], b...) d.Update("U", types.HexLiteral(hex.EncodeToString(ctx.E.U))) - // 2) Calc O (depends on U). + /////////////////////////// + b = make([]byte, 16) _, err = io.ReadFull(rand.Reader, b) if err != nil { @@ -1537,17 +1791,27 @@ func calcOAndUAES256(ctx *model.Context, d types.Dict) (err error) { o := append(make([]byte, 32), b...) opw := []byte(ctx.OwnerPW) c := append(opw, validationSalt(o)...) - h = sha256.Sum256(append(c, ctx.E.U...)) + h, _, err = hashRev6(append(c, ctx.E.U...), opw, ctx.E.U) + if err != nil { + return err + } + ctx.E.O = append(h[:], b...) d.Update("O", types.HexLiteral(hex.EncodeToString(ctx.E.O))) - err = calcFileEncKey(ctx, d) + /////////////////////////// + + if err := calcFileEncKey(ctx); err != nil { + return err + } + + /////////////////////////// + + h, _, err = hashRev6(append(upw, keySalt(u)...), upw, nil) if err != nil { return err } - // Encrypt file encryption key into UE. - h = sha256.Sum256(append(upw, keySalt(u)...)) cb, err := aes.NewCipher(h[:]) if err != nil { return err @@ -1558,9 +1822,14 @@ func calcOAndUAES256(ctx *model.Context, d types.Dict) (err error) { mode.CryptBlocks(ctx.E.UE, ctx.EncKey) d.Update("UE", types.HexLiteral(hex.EncodeToString(ctx.E.UE))) - // Encrypt file encryption key into OE. + ////////////////////////////// + c = append(opw, keySalt(o)...) - h = sha256.Sum256(append(c, ctx.E.U...)) + h, _, err = hashRev6(append(c, ctx.E.U...), opw, ctx.E.U) + if err != nil { + return err + } + cb, err = aes.NewCipher(h[:]) if err != nil { return err @@ -1574,11 +1843,14 @@ func calcOAndUAES256(ctx *model.Context, d types.Dict) (err error) { } func calcOAndU(ctx *model.Context, d types.Dict) (err error) { - if ctx.E.R == 5 { return calcOAndUAES256(ctx, d) } + if ctx.E.R == 6 { + return calcOAndUAES256Rev6(ctx, d) + } + ctx.E.O, err = o(ctx) if err != nil { return err diff --git a/pkg/pdfcpu/cut.go b/pkg/pdfcpu/cut.go index e20a73ee..d51ab03e 100644 --- a/pkg/pdfcpu/cut.go +++ b/pkg/pdfcpu/cut.go @@ -147,6 +147,7 @@ func createOutline( ctxSrc, ctxDest *model.Context, pagesIndRef types.IndirectRef, pagesDict, d types.Dict, + pageNr int, cropBox *types.Rectangle, migrated map[int]int, cut *model.Cut) error { @@ -177,7 +178,7 @@ func createOutline( drawOutlineCuts(&buf, cropBox, cb, cut) - bb, err := ctxSrc.PageContent(d1) + bb, err := ctxSrc.PageContent(d1, pageNr) if err != nil { return err } @@ -223,7 +224,7 @@ func createOutline( return nil } -func prepForCut(ctxSrc *model.Context, i int) ( +func prepForCut(ctxSrc *model.Context, pageNr int) ( *model.Context, *types.Rectangle, *types.IndirectRef, @@ -247,12 +248,12 @@ func prepForCut(ctxSrc *model.Context, i int) ( return nil, nil, nil, nil, nil, nil, err } - d, _, inhPAttrs, err := ctxSrc.PageDict(i, false) + d, _, inhPAttrs, err := ctxSrc.PageDict(pageNr, false) if err != nil { return nil, nil, nil, nil, nil, nil, err } if d == nil { - return nil, nil, nil, nil, nil, nil, errors.Errorf("pdfcpu: unknown page number: %d\n", i) + return nil, nil, nil, nil, nil, nil, errors.Errorf("pdfcpu: unknown page number: %d\n", pageNr) } d.Delete("Annots") @@ -264,8 +265,8 @@ func prepForCut(ctxSrc *model.Context, i int) ( return ctxDest, cropBox, pagesIndRef, pagesDict, d, inhPAttrs, nil } -func internPageRot(ctxSrc *model.Context, rotate int, cropBox *types.Rectangle, d types.Dict, trans []byte) error { - bb, err := ctxSrc.PageContent(d) +func internPageRot(ctxSrc *model.Context, rotate int, cropBox *types.Rectangle, d types.Dict, pageNr int, trans []byte) error { + bb, err := ctxSrc.PageContent(d, pageNr) if err != nil { return err } @@ -297,7 +298,7 @@ func internPageRot(ctxSrc *model.Context, rotate int, cropBox *types.Rectangle, return nil } -func handleCutMargin(ctxSrc *model.Context, d, d1 types.Dict, cropBox, cb *types.Rectangle, i, j int, w, h float64, sc *float64, cut *model.Cut) error { +func handleCutMargin(ctxSrc *model.Context, d, d1 types.Dict, pageNr int, cropBox, cb *types.Rectangle, i, j int, w, h float64, sc *float64, cut *model.Cut) error { ar := cb.AspectRatio() mv := cut.Margin / ar @@ -355,7 +356,7 @@ func handleCutMargin(ctxSrc *model.Context, d, d1 types.Dict, cropBox, cb *types var trans bytes.Buffer fmt.Fprintf(&trans, "q %.5f %.5f %.5f %.5f %.5f %.5f cm ", m[0][0], m[0][1], m[1][0], m[1][1], m[2][0], m[2][1]) - bbOrig, err := ctxSrc.PageContent(d) + bbOrig, err := ctxSrc.PageContent(d, pageNr) if err != nil { return err } @@ -383,6 +384,7 @@ func createTiles( ctxSrc, ctxDest *model.Context, pagesIndRef types.IndirectRef, pagesDict, d types.Dict, + pageNr int, cropBox *types.Rectangle, inhPAttrs *model.InheritedPageAttrs, migrated map[int]int, @@ -422,7 +424,7 @@ func createTiles( d1["CropBox"] = cb.Array() if cut.Margin > 0 { - if err := handleCutMargin(ctxSrc, d, d1, cropBox, cb, i, j, w, h, &sc, cut); err != nil { + if err := handleCutMargin(ctxSrc, d, d1, pageNr, cropBox, cb, i, j, w, h, &sc, cut); err != nil { return err } } @@ -449,12 +451,12 @@ func createTiles( return nil } -func CutPage(ctxSrc *model.Context, i int, cut *model.Cut) (*model.Context, error) { +func CutPage(ctxSrc *model.Context, pageNr int, cut *model.Cut) (*model.Context, error) { // required: at least one of horizontalCut, verticalCut // optionally: border, margin, bgcolor - ctxDest, cropBox, pagesIndRef, pagesDict, d, inhPAttrs, err := prepForCut(ctxSrc, i) + ctxDest, cropBox, pagesIndRef, pagesDict, d, inhPAttrs, err := prepForCut(ctxSrc, pageNr) if err != nil { return nil, err } @@ -470,17 +472,17 @@ func CutPage(ctxSrc *model.Context, i int, cut *model.Cut) (*model.Context, erro d.Delete("Rotate") } - if err := internPageRot(ctxSrc, rotate, cropBox, d, nil); err != nil { + if err := internPageRot(ctxSrc, rotate, cropBox, d, pageNr, nil); err != nil { return nil, err } migrated := map[int]int{} - if err := createOutline(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, cropBox, migrated, cut); err != nil { + if err := createOutline(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, pageNr, cropBox, migrated, cut); err != nil { return nil, err } - if err := createTiles(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, cropBox, inhPAttrs, migrated, cut); err != nil { + if err := createTiles(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, pageNr, cropBox, inhPAttrs, migrated, cut); err != nil { return nil, err } @@ -524,11 +526,11 @@ func createNDownCuts(n int, cropBox *types.Rectangle, cut *model.Cut) { } } -func NDownPage(ctxSrc *model.Context, i, n int, cut *model.Cut) (*model.Context, error) { +func NDownPage(ctxSrc *model.Context, pageNr, n int, cut *model.Cut) (*model.Context, error) { // Optionally: border, margin, bgcolor - ctxDest, cropBox, pagesIndRef, pagesDict, d, inhPAttrs, err := prepForCut(ctxSrc, i) + ctxDest, cropBox, pagesIndRef, pagesDict, d, inhPAttrs, err := prepForCut(ctxSrc, pageNr) if err != nil { return nil, err } @@ -544,7 +546,7 @@ func NDownPage(ctxSrc *model.Context, i, n int, cut *model.Cut) (*model.Context, d.Delete("Rotate") } - if err := internPageRot(ctxSrc, rotate, cropBox, d, nil); err != nil { + if err := internPageRot(ctxSrc, rotate, cropBox, d, pageNr, nil); err != nil { return nil, err } @@ -552,11 +554,11 @@ func NDownPage(ctxSrc *model.Context, i, n int, cut *model.Cut) (*model.Context, migrated := map[int]int{} - if err := createOutline(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, cropBox, migrated, cut); err != nil { + if err := createOutline(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, pageNr, cropBox, migrated, cut); err != nil { return nil, err } - if err := createTiles(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, cropBox, inhPAttrs, migrated, cut); err != nil { + if err := createTiles(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, pageNr, cropBox, inhPAttrs, migrated, cut); err != nil { return nil, err } @@ -591,12 +593,12 @@ func createPosterCuts(cropBox *types.Rectangle, cut *model.Cut) { } } -func PosterPage(ctxSrc *model.Context, i int, cut *model.Cut) (*model.Context, error) { +func PosterPage(ctxSrc *model.Context, pageNr int, cut *model.Cut) (*model.Context, error) { // required: formsize(=papersize) or dimensions // optionally: scalefactor, border, margin, bgcolor - ctxDest, cropBox, pagesIndRef, pagesDict, d, inhPAttrs, err := prepForCut(ctxSrc, i) + ctxDest, cropBox, pagesIndRef, pagesDict, d, inhPAttrs, err := prepForCut(ctxSrc, pageNr) if err != nil { return nil, err } @@ -630,7 +632,7 @@ func PosterPage(ctxSrc *model.Context, i int, cut *model.Cut) (*model.Context, e var trans bytes.Buffer fmt.Fprintf(&trans, "q %.5f %.5f %.5f %.5f %.5f %.5f cm ", m[0][0], m[0][1], m[1][0], m[1][1], m[2][0], m[2][1]) - if err := internPageRot(ctxSrc, rotate, cropBox, d, trans.Bytes()); err != nil { + if err := internPageRot(ctxSrc, rotate, cropBox, d, pageNr, trans.Bytes()); err != nil { return nil, err } @@ -638,11 +640,11 @@ func PosterPage(ctxSrc *model.Context, i int, cut *model.Cut) (*model.Context, e migrated := map[int]int{} - if err := createOutline(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, cropBox, migrated, cut); err != nil { + if err := createOutline(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, pageNr, cropBox, migrated, cut); err != nil { return nil, err } - if err := createTiles(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, cropBox, inhPAttrs, migrated, cut); err != nil { + if err := createTiles(ctxSrc, ctxDest, *pagesIndRef, pagesDict, d, pageNr, cropBox, inhPAttrs, migrated, cut); err != nil { return nil, err } diff --git a/pkg/pdfcpu/doc.go b/pkg/pdfcpu/doc.go index 196c65f4..47153def 100644 --- a/pkg/pdfcpu/doc.go +++ b/pkg/pdfcpu/doc.go @@ -9,10 +9,11 @@ The commands are: booklet arrange pages onto larger sheets of paper to make a booklet or zine bookmarks list, import, export, remove bookmarks boxes list, add, remove page boundaries for selected pages + certificates list, inspect, import, reset certificates changeopw change owner password changeupw change user password collect create custom sequence of selected pages - config print configuration + config list, reset configuration create create PDF content including forms via JSON crop set cropbox for selected pages cut custom cut pages horizontally or vertically @@ -22,7 +23,7 @@ The commands are: fonts install, list supported fonts, create cheat sheets form list, remove fields, lock, unlock, reset, export, fill form via JSON or CSV grid rearrange pages or images for enhanced browsing experience - images list images for selected pages + images list, extract, update images import import/convert images to PDF info print file info keywords list, add, remove keywords @@ -41,12 +42,13 @@ The commands are: resize scale selected pages rotate rotate selected pages selectedpages print definition of the -pages flag + signatures validate signatures split split up a PDF by span or bookmark stamp add, remove, update Unicode text, image or PDF stamps for selected pages trim create trimmed version of selected pages validate validate PDF against PDF 32000-1:2008 (PDF 1.7) + basic PDF 2.0 validation version print version - viewpref list, set, reset viewer preferences for opened document + viewerpref list, set, reset viewer preferences for opened document watermark add, remove, update Unicode text, image or PDF watermarks for selected pages zoom zoom in/out of selected pages by magnification factor or corresponding margin */ diff --git a/pkg/pdfcpu/extract.go b/pkg/pdfcpu/extract.go index 9b45c915..1b8be0f4 100644 --- a/pkg/pdfcpu/extract.go +++ b/pkg/pdfcpu/extract.go @@ -24,6 +24,7 @@ import ( "github.com/angel-one/pdfcpu/pkg/filter" "github.com/angel-one/pdfcpu/pkg/log" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/font" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -34,7 +35,22 @@ import ( func ImageObjNrs(ctx *model.Context, pageNr int) []int { // TODO Exclude SMask image objects. objNrs := []int{} - for k, v := range ctx.Optimize.PageImages[pageNr-1] { + + if pageNr < 1 { + return objNrs + } + + imgObjNrs := ctx.Optimize.PageImages + if len(imgObjNrs) == 0 { + return objNrs + } + + pageImgObjNrs := imgObjNrs[pageNr-1] + if pageImgObjNrs == nil { + return objNrs + } + + for k, v := range pageImgObjNrs { if v { objNrs = append(objNrs, k) } @@ -201,6 +217,30 @@ func ColorSpaceComponents(xRefTable *model.XRefTable, sd *types.StreamDict) (int return 0, nil } +func imageWidth(ctx *model.Context, sd *types.StreamDict, objNr int) (int, error) { + obj, ok := sd.Find("Width") + if !ok { + return 0, errors.Errorf("pdfcpu: missing image width obj#%d", objNr) + } + i, err := ctx.DereferenceInteger(obj) + if err != nil { + return 0, err + } + return i.Value(), nil +} + +func imageHeight(ctx *model.Context, sd *types.StreamDict, objNr int) (int, error) { + obj, ok := sd.Find("Height") + if !ok { + return 0, errors.Errorf("pdfcpu: missing image height obj#%d", objNr) + } + i, err := ctx.DereferenceInteger(obj) + if err != nil { + return 0, err + } + return i.Value(), nil +} + func imageStub( ctx *model.Context, sd *types.StreamDict, @@ -209,14 +249,14 @@ func imageStub( thumb, imgMask bool, objNr int) (*model.Image, error) { - w := sd.IntEntry("Width") - if w == nil { - return nil, errors.Errorf("pdfcpu: missing image width obj#%d", objNr) + w, err := imageWidth(ctx, sd, objNr) + if err != nil { + return nil, err } - h := sd.IntEntry("Height") - if h == nil { - return nil, errors.Errorf("pdfcpu: missing image height obj#%d", objNr) + h, err := imageHeight(ctx, sd, objNr) + if err != nil { + return nil, err } cs, err := ColorSpaceString(ctx, sd) @@ -256,7 +296,7 @@ func imageStub( interpol = true } - i, err := StreamLength(ctx, sd) + size, err := StreamLength(ctx, sd) if err != nil { return nil, err } @@ -273,13 +313,13 @@ func imageStub( IsImgMask: imgMask, HasImgMask: mask, HasSMask: sMask, - Width: *w, - Height: *h, + Width: w, + Height: h, Cs: cs, Comp: comp, Bpc: bpc, Interpol: interpol, - Size: i, + Size: size, Filter: filters, DecodeParms: s, } @@ -332,7 +372,7 @@ func decodeImage(ctx *model.Context, sd *types.StreamDict, filters, lastFilter s switch lastFilter { - case filter.DCT, filter.JPX, filter.Flate, filter.CCITTFax, filter.RunLength: + case filter.DCT, filter.JPX, filter.Flate, filter.LZW, filter.CCITTFax, filter.RunLength: if err := sd.Decode(); err != nil { return err } @@ -354,7 +394,7 @@ func decodeImage(ctx *model.Context, sd *types.StreamDict, filters, lastFilter s func img( ctx *model.Context, sd *types.StreamDict, - thumb, imgMask bool, + thumb bool, resourceID, filters, lastFilter string, objNr int) (*model.Image, error) { @@ -394,7 +434,7 @@ func ExtractImage(ctx *model.Context, sd *types.StreamDict, thumb bool, resource return imageStub(ctx, sd, resourceID, filters, lastFilter, decodeParms, thumb, imgMask, objNr) } - return img(ctx, sd, thumb, imgMask, resourceID, filters, lastFilter, objNr) + return img(ctx, sd, thumb, resourceID, filters, lastFilter, objNr) } // ExtractPageImages extracts all images used by pageNr. @@ -403,7 +443,7 @@ func ExtractPageImages(ctx *model.Context, pageNr int, stub bool) (map[int]model m := map[int]model.Image{} for _, objNr := range ImageObjNrs(ctx, pageNr) { imageObj := ctx.Optimize.ImageObjects[objNr] - img, err := ExtractImage(ctx, imageObj.ImageDict, false, imageObj.ResourceNames[0], objNr, stub) + img, err := ExtractImage(ctx, imageObj.ImageDict, false, imageObj.ResourceNames[pageNr-1], objNr, stub) if err != nil { return nil, err } @@ -442,7 +482,22 @@ type Font struct { // Requires an optimized context. func FontObjNrs(ctx *model.Context, pageNr int) []int { objNrs := []int{} - for k, v := range ctx.Optimize.PageFonts[pageNr-1] { + + if pageNr < 1 { + return objNrs + } + + fontObjNrs := ctx.Optimize.PageFonts + if len(fontObjNrs) == 0 { + return objNrs + } + + pageFontObjNrs := fontObjNrs[pageNr-1] + if pageFontObjNrs == nil { + return objNrs + } + + for k, v := range pageFontObjNrs { if v { objNrs = append(objNrs, k) } @@ -452,15 +507,7 @@ func FontObjNrs(ctx *model.Context, pageNr int) []int { // ExtractFont extracts a font from fontObject. func ExtractFont(ctx *model.Context, fontObject model.FontObject, objNr int) (*Font, error) { - // Only embedded fonts have binary data. - if !fontObject.Embedded() { - if log.DebugEnabled() { - log.Debug.Printf("ExtractFont: ignoring obj#%d - non embedded font: %s\n", objNr, fontObject.FontName) - } - return nil, nil - } - - d, err := fontDescriptor(ctx.XRefTable, fontObject.FontDict, objNr) + d, err := font.FontDescriptor(ctx.XRefTable, fontObject.FontDict, objNr) if err != nil { return nil, err } @@ -509,8 +556,12 @@ func ExtractFont(ctx *model.Context, fontObject model.FontObject, objNr int) (*F f = &Font{bytes.NewReader(sd.Content), fontObject.FontName, "ttf"} default: + s := fmt.Sprintf("extractFontData: obj#%d - unsupported fonttype %s - font: %s\n", objNr, fontType, fontObject.FontName) if log.InfoEnabled() { - log.Info.Printf("extractFontData: ignoring obj#%d - unsupported fonttype %s - font: %s\n", objNr, fontType, fontObject.FontName) + log.Info.Println(s) + } + if log.CLIEnabled() { + log.CLI.Printf(s) } return nil, nil } @@ -519,9 +570,12 @@ func ExtractFont(ctx *model.Context, fontObject model.FontObject, objNr int) (*F } // ExtractPageFonts extracts all fonts used by pageNr. -func ExtractPageFonts(ctx *model.Context, pageNr int) ([]Font, error) { +func ExtractPageFonts(ctx *model.Context, pageNr int, objNrs, skipped types.IntSet) ([]Font, error) { ff := []Font{} for _, i := range FontObjNrs(ctx, pageNr) { + if objNrs[i] || skipped[i] { + continue + } fontObject := ctx.Optimize.FontObjects[i] f, err := ExtractFont(ctx, *fontObject, i) if err != nil { @@ -529,6 +583,9 @@ func ExtractPageFonts(ctx *model.Context, pageNr int) ([]Font, error) { } if f != nil { ff = append(ff, *f) + objNrs[i] = true + } else { + skipped[i] = true } } return ff, nil @@ -549,14 +606,9 @@ func ExtractFormFonts(ctx *model.Context) ([]Font, error) { return ff, nil } -// ExtractPage extracts pageNr into a new single page context. -func ExtractPage(ctx *model.Context, pageNr int) (*model.Context, error) { - return ExtractPages(ctx, []int{pageNr}, false) -} - // ExtractPages extracts pageNrs into a new single page context. func ExtractPages(ctx *model.Context, pageNrs []int, usePgCache bool) (*model.Context, error) { - ctxDest, err := CreateContextWithXRefTable(nil, types.PaperSize["A4"]) + ctxDest, err := CreateContextWithXRefTable(ctx.Conf, types.PaperSize["A4"]) if err != nil { return nil, err } @@ -575,7 +627,7 @@ func ExtractPageContent(ctx *model.Context, pageNr int) (io.Reader, error) { if err != nil { return nil, err } - bb, err := ctx.PageContent(d) + bb, err := ctx.PageContent(d, pageNr) if err != nil && err != model.ErrNoContent { return nil, err } diff --git a/pkg/pdfcpu/font/fontDict.go b/pkg/pdfcpu/font/fontDict.go index 73fd2992..8eef51ca 100644 --- a/pkg/pdfcpu/font/fontDict.go +++ b/pkg/pdfcpu/font/fontDict.go @@ -30,6 +30,7 @@ import ( "unicode/utf16" "github.com/angel-one/pdfcpu/pkg/font" + "github.com/angel-one/pdfcpu/pkg/log" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -66,6 +67,15 @@ func CJKEncoding(s string) bool { return types.MemberOf(s, []string{"UniGB-UTF16-H", "UniCNS-UTF16-H", "UniJIS-UTF16-H", "UniKS-UTF16-H"}) } +func ScriptForEncoding(enc string) string { + for k, v := range cjkParms { + if v.encoding == enc { + return k + } + } + return "" +} + func fontDescriptorIndRefs(fd types.Dict, lang string, font *model.FontResource) error { if lang != "" { if s := fd.NameEntry("Lang"); s != nil { @@ -147,7 +157,7 @@ func flateEncodedStreamIndRef(xRefTable *model.XRefTable, data []byte) (*types.I return xRefTable.IndRefForNewObject(*sd) } -func ttfFontFile(xRefTable *model.XRefTable, ttf font.TTFLight, fontName string) (*types.IndirectRef, error) { +func ttfFontFile(xRefTable *model.XRefTable, fontName string) (*types.IndirectRef, error) { bb, err := font.Read(fontName) if err != nil { return nil, err @@ -155,7 +165,7 @@ func ttfFontFile(xRefTable *model.XRefTable, ttf font.TTFLight, fontName string) return flateEncodedStreamIndRef(xRefTable, bb) } -func ttfSubFontFile(xRefTable *model.XRefTable, ttf font.TTFLight, fontName string, indRef *types.IndirectRef) (*types.IndirectRef, error) { +func ttfSubFontFile(xRefTable *model.XRefTable, fontName string, indRef *types.IndirectRef) (*types.IndirectRef, error) { bb, err := font.Subset(fontName, xRefTable.UsedGIDs[fontName]) if err != nil { return nil, err @@ -232,15 +242,6 @@ func coreFontDict(xRefTable *model.XRefTable, coreFontName string) (*types.Indir if coreFontName != "Symbol" && coreFontName != "ZapfDingbats" { d.InsertName("Encoding", "WinAnsiEncoding") } - // if coreFontName == "Helvetica" { - // indRef, err := PDFDocEncoding(xRefTable) - // if err != nil { - // return nil, err - // } - // d.Insert("Encoding", *indRef) - // } else if coreFontName != "Symbol" && coreFontName != "ZapfDingbats" { - // d.InsertName("Encoding", "WinAnsiEncoding") - // } return xRefTable.IndRefForNewObject(d) } @@ -301,11 +302,11 @@ func ttfFontDescriptorFlags(ttf font.TTFLight) uint32 { } // CIDFontFile returns a TrueType font file or subfont file for fontName. -func CIDFontFile(xRefTable *model.XRefTable, ttf font.TTFLight, fontName string, subFont bool) (*types.IndirectRef, error) { +func CIDFontFile(xRefTable *model.XRefTable, fontName string, subFont bool) (*types.IndirectRef, error) { if subFont { - return ttfSubFontFile(xRefTable, ttf, fontName, nil) + return ttfSubFontFile(xRefTable, fontName, nil) } - return ttfFontFile(xRefTable, ttf, fontName) + return ttfFontFile(xRefTable, fontName) } // CIDFontDescriptor returns a font descriptor describing the CIDFont’s default metrics other than its glyph widths. @@ -330,7 +331,7 @@ func CIDFontDescriptor(xRefTable *model.XRefTable, ttf font.TTFLight, fontName, ) if embed { - fontFile, err = CIDFontFile(xRefTable, ttf, fontName, true) + fontFile, err = CIDFontFile(xRefTable, fontName, true) if err != nil { return nil, err } @@ -360,8 +361,8 @@ func CIDFontDescriptor(xRefTable *model.XRefTable, ttf font.TTFLight, fontName, } // FontDescriptor returns a TrueType font descriptor describing font’s default metrics other than its glyph widths. -func FontDescriptor(xRefTable *model.XRefTable, ttf font.TTFLight, fontName, fontLang string) (*types.IndirectRef, error) { - fontFile, err := ttfFontFile(xRefTable, ttf, fontName) +func NewFontDescriptor(xRefTable *model.XRefTable, ttf font.TTFLight, fontName, fontLang string) (*types.IndirectRef, error) { + fontFile, err := ttfFontFile(xRefTable, fontName) if err != nil { return nil, err } @@ -747,7 +748,7 @@ func UpdateUserfont(xRefTable *model.XRefTable, fontName string, f model.FontRes return err } - if _, err := ttfSubFontFile(xRefTable, ttf, fontName, f.FontFile); err != nil { + if _, err := ttfSubFontFile(xRefTable, fontName, f.FontFile); err != nil { return err } @@ -954,7 +955,7 @@ func trueTypeFontDict(xRefTable *model.XRefTable, fontName, fontLang string) (*t return nil, err } - fdIndRef, err := FontDescriptor(xRefTable, ttf, fontName, fontLang) + fdIndRef, err := NewFontDescriptor(xRefTable, ttf, fontName, fontLang) if err != nil { return nil, err } @@ -1068,8 +1069,8 @@ func Name(xRefTable *model.XRefTable, fontDict types.Dict, objNumber int) (prefi } // Lang detects the optional language indicator in a font dict. -func Lang(xRefTable *model.XRefTable, d types.Dict) (string, error) { - o, found := d.Find("FontDescriptor") +func Lang(xRefTable *model.XRefTable, fontDict types.Dict) (string, error) { + o, found := fontDict.Find("FontDescriptor") if found { fd, err := xRefTable.DereferenceDict(o) if err != nil { @@ -1083,9 +1084,21 @@ func Lang(xRefTable *model.XRefTable, d types.Dict) (string, error) { return s, nil } - arr := d.ArrayEntry("DescendantFonts") - indRef := arr[0].(types.IndirectRef) - d1, err := xRefTable.DereferenceDict(indRef) + o, found = fontDict.Find("DescendantFonts") + if !found { + return "", ErrCorruptFontDict + } + + arr, err := xRefTable.DereferenceArray(o) + if err != nil { + return "", err + } + + if len(arr) != 1 { + return "", ErrCorruptFontDict + } + + d1, err := xRefTable.DereferenceDict(arr[0]) if err != nil { return "", err } @@ -1105,3 +1118,107 @@ func Lang(xRefTable *model.XRefTable, d types.Dict) (string, error) { return "", nil } + +func trivialFontDescriptor(xRefTable *model.XRefTable, fontDict types.Dict, objNr int) (types.Dict, error) { + o, ok := fontDict.Find("FontDescriptor") + if !ok { + return nil, nil + } + + // fontDescriptor directly available. + + d, err := xRefTable.DereferenceDict(o) + if err != nil { + return nil, err + } + + if d == nil { + return nil, errors.Errorf("pdfcpu: trivialFontDescriptor: FontDescriptor is null for font object %d\n", objNr) + } + + if d.Type() != nil && *d.Type() != "FontDescriptor" { + return nil, errors.Errorf("pdfcpu: trivialFontDescriptor: FontDescriptor dict incorrect dict type for font object %d\n", objNr) + } + + return d, nil +} + +// FontDescriptor gets the font descriptor for this font. +func FontDescriptor(xRefTable *model.XRefTable, fontDict types.Dict, objNr int) (types.Dict, error) { + if log.OptimizeEnabled() { + log.Optimize.Println("fontDescriptor begin") + } + + d, err := trivialFontDescriptor(xRefTable, fontDict, objNr) + if err != nil { + return nil, err + } + if d != nil { + return d, nil + } + + // Try to access a fontDescriptor in a Descendent font for Type0 fonts. + + o, ok := fontDict.Find("DescendantFonts") + if !ok { + //logErrorOptimize.Printf("FontDescriptor: Neither FontDescriptor nor DescendantFonts for font object %d\n", objectNumber) + return nil, nil + } + + // A descendant font is contained in an array of size 1. + + a, err := xRefTable.DereferenceArray(o) + if err != nil || a == nil { + return nil, errors.Errorf("pdfcpu: fontDescriptor: DescendantFonts: IndirectRef or Array with length 1 expected for font object %d\n", objNr) + } + if len(a) != 1 { + return nil, errors.Errorf("pdfcpu: fontDescriptor: DescendantFonts Array length <> 1 %v\n", a) + } + + // dict is the fontDict of the descendant font. + d, err = xRefTable.DereferenceDict(a[0]) + if err != nil { + return nil, errors.Errorf("pdfcpu: fontDescriptor: No descendant font dict for %v\n", a) + } + if d == nil { + return nil, errors.Errorf("pdfcpu: fontDescriptor: descendant font dict is null for %v\n", a) + } + + if *d.Type() != "Font" { + return nil, errors.Errorf("pdfcpu: fontDescriptor: font dict with incorrect dict type for %v\n", d) + } + + o, ok = d.Find("FontDescriptor") + if !ok { + log.Optimize.Printf("fontDescriptor: descendant font not embedded %s\n", d) + return nil, nil + } + + d, err = xRefTable.DereferenceDict(o) + if err != nil { + return nil, errors.Errorf("pdfcpu: fontDescriptor: No FontDescriptor dict for font object %d\n", objNr) + } + + if log.OptimizeEnabled() { + log.Optimize.Println("fontDescriptor end") + } + + return d, nil +} + +func Embedded(xRefTable *model.XRefTable, fontDict types.Dict, objNr int) (bool, error) { + fd, err := FontDescriptor(xRefTable, fontDict, objNr) + if err != nil { + return false, err + } + if _, ok := fd.Find("FontFile"); ok { + return true, nil + } + if _, ok := fd.Find("FontFile2"); ok { + return true, nil + } + if _, ok := fd.Find("FontFile3"); ok { + return true, nil + } + return false, nil +} diff --git a/pkg/pdfcpu/form/export.go b/pkg/pdfcpu/form/export.go index 2f9a964f..e3861f70 100644 --- a/pkg/pdfcpu/form/export.go +++ b/pkg/pdfcpu/form/export.go @@ -20,6 +20,7 @@ import ( "encoding/json" "io" "path/filepath" + "strconv" "strings" "time" @@ -29,6 +30,15 @@ import ( "github.com/pkg/errors" ) +const ( + + // REQUIRED is used for required dict entries. + REQUIRED = true + + // OPTIONAL is used for optional dict entries. + OPTIONAL = false +) + // Header represents form meta data. type Header struct { Source string `json:"source"` @@ -48,8 +58,10 @@ type TextField struct { Pages []int `json:"pages"` ID string `json:"id"` Name string `json:"name,omitempty"` + AltName string `json:"altname,omitempty"` Default string `json:"default,omitempty"` Value string `json:"value"` + MaxLen int `json:"maxlen,omitempty"` Multiline bool `json:"multiline"` Locked bool `json:"locked"` } @@ -59,6 +71,7 @@ type DateField struct { Pages []int `json:"pages"` ID string `json:"id"` Name string `json:"name,omitempty"` + AltName string `json:"altname,omitempty"` Format string `json:"format"` Default string `json:"default,omitempty"` Value string `json:"value"` @@ -70,6 +83,7 @@ type CheckBox struct { Pages []int `json:"pages"` ID string `json:"id"` Name string `json:"name,omitempty"` + AltName string `json:"altname,omitempty"` Default bool `json:"default"` Value bool `json:"value"` Locked bool `json:"locked"` @@ -80,6 +94,7 @@ type RadioButtonGroup struct { Pages []int `json:"pages"` ID string `json:"id"` Name string `json:"name,omitempty"` + AltName string `json:"altname,omitempty"` Options []string `json:"options"` Default string `json:"default,omitempty"` Value string `json:"value"` @@ -91,6 +106,7 @@ type ComboBox struct { Pages []int `json:"pages"` ID string `json:"id"` Name string `json:"name,omitempty"` + AltName string `json:"altname,omitempty"` Editable bool `json:"editable"` Options []string `json:"options"` Default string `json:"default,omitempty"` @@ -103,6 +119,7 @@ type ListBox struct { Pages []int `json:"pages"` ID string `json:"id"` Name string `json:"name,omitempty"` + AltName string `json:"altname,omitempty"` Multi bool `json:"multi"` Options []string `json:"options"` Defaults []string `json:"defaults,omitempty"` @@ -186,15 +203,54 @@ func (f Form) listBoxValuesAndLock(id, name string) ([]string, bool, bool) { return nil, false, false } -func extractRadioButtonGroupOptions(xRefTable *model.XRefTable, d types.Dict) ([]string, error) { +func locateAPN(xRefTable *model.XRefTable, d types.Dict) (types.Dict, error) { + + obj, ok := d.Find("AP") + if !ok { + return nil, errors.New("corrupt form field: missing entry \"AP\"") + } + d1, err := xRefTable.DereferenceDict(obj) + if err != nil { + return nil, err + } + if len(d1) == 0 { + return nil, errors.New("corrupt form field: missing entry \"AP\"") + } + + obj, ok = d1.Find("N") + if !ok { + return nil, errors.New("corrupt AP field: missing entry \"N\"") + } + d2, err := xRefTable.DereferenceDict(obj) + if err != nil { + return nil, err + } + + if len(d2) == 0 { + return nil, errors.New("corrupt AP field: missing entry \"N\"") + } + + return d2, nil +} + +func extractRadioButtonGroupOptions(xRefTable *model.XRefTable, d types.Dict) ([]string, bool, error) { var opts []string p := 0 + opts, err := parseOptions(xRefTable, d, OPTIONAL) + if err != nil { + return nil, false, err + } + + if len(opts) > 0 { + return opts, true, nil + } + for _, o := range d.ArrayEntry("Kids") { d, err := xRefTable.DereferenceDict(o) if err != nil { - return nil, err + return nil, false, err } indRef := d.IndirectRefEntry("P") @@ -206,18 +262,15 @@ func extractRadioButtonGroupOptions(xRefTable *model.XRefTable, d types.Dict) ([ } } - d1 := d.DictEntry("AP") - if d1 == nil { - return nil, errors.New("corrupt form field: missing entry AP") - } - d2 := d1.DictEntry("N") - if d2 == nil { - return nil, errors.New("corrupt AP field: missing entry N") + d1, err := locateAPN(xRefTable, d) + if err != nil { + return nil, false, err } - for k := range d2 { + + for k := range d1 { k, err := types.DecodeName(k) if err != nil { - return nil, err + return nil, false, err } if k != "Off" { for _, opt := range opts { @@ -230,15 +283,42 @@ func extractRadioButtonGroupOptions(xRefTable *model.XRefTable, d types.Dict) ([ } } - return opts, nil + return opts, false, nil +} + +func resolveOption(s string, opts []string, explicit bool) (string, error) { + n, err := types.DecodeName(s) + if err != nil { + return "", err + } + if len(opts) > 0 && explicit { + j, err := strconv.Atoi(n) + if err != nil { + return "", err + } + for i, o := range opts { + if i == j { + n = o + break + } + } + } + return n, nil } -func extractRadioButtonGroup(xRefTable *model.XRefTable, page int, d types.Dict, id, name string, locked bool) (*RadioButtonGroup, error) { +func extractRadioButtonGroup(xRefTable *model.XRefTable, page int, d types.Dict, id, name, altName string, locked bool) (*RadioButtonGroup, error) { - rbg := &RadioButtonGroup{Pages: []int{page}, ID: id, Name: name, Locked: locked} + rbg := &RadioButtonGroup{Pages: []int{page}, ID: id, Name: name, AltName: altName, Locked: locked} + + opts, explicit, err := extractRadioButtonGroupOptions(xRefTable, d) + if err != nil { + return nil, err + } + + rbg.Options = opts if s := d.NameEntry("DV"); s != nil { - n, err := types.DecodeName(*s) + n, err := resolveOption(*s, opts, explicit) if err != nil { return nil, err } @@ -246,7 +326,7 @@ func extractRadioButtonGroup(xRefTable *model.XRefTable, page int, d types.Dict, } if s := d.NameEntry("V"); s != nil { - n, err := types.DecodeName(*s) + n, err := resolveOption(*s, opts, explicit) if err != nil { return nil, err } @@ -255,41 +335,35 @@ func extractRadioButtonGroup(xRefTable *model.XRefTable, page int, d types.Dict, } } - opts, err := extractRadioButtonGroupOptions(xRefTable, d) - if err != nil { - return nil, err - } - - rbg.Options = opts - return rbg, nil } -func extractCheckBox(page int, d types.Dict, id, name string, locked bool) (*CheckBox, error) { +func extractCheckBox(page int, d types.Dict, id, name, altName string, locked bool) (*CheckBox, error) { - cb := &CheckBox{Pages: []int{page}, ID: id, Name: name, Locked: locked} + cb := &CheckBox{Pages: []int{page}, ID: id, Name: name, AltName: altName, Locked: locked} if o, ok := d.Find("DV"); ok { - cb.Default = o.(types.Name) == "Yes" + cb.Default = o.(types.Name) != "Off" } if o, ok := d.Find("V"); ok { - cb.Value = o.(types.Name) == "Yes" + n := o.(types.Name) + cb.Value = len(n) > 0 && n != "Off" } return cb, nil } -func extractComboBox(xRefTable *model.XRefTable, page int, d types.Dict, id, name string, locked bool) (*ComboBox, error) { +func extractComboBox(xRefTable *model.XRefTable, page int, d types.Dict, id, name, altName string, locked bool) (*ComboBox, error) { - cb := &ComboBox{Pages: []int{page}, ID: id, Name: name, Locked: locked} + cb := &ComboBox{Pages: []int{page}, ID: id, Name: name, AltName: altName, Locked: locked} if sl := d.StringLiteralEntry("DV"); sl != nil { s, err := types.StringLiteralToString(*sl) if err != nil { return nil, err } - cb.Default = s + cb.Default = strings.TrimSpace(s) } if sl := d.StringLiteralEntry("V"); sl != nil { @@ -297,10 +371,10 @@ func extractComboBox(xRefTable *model.XRefTable, page int, d types.Dict, id, nam if err != nil { return nil, err } - cb.Value = s + cb.Value = strings.TrimSpace(s) } - opts, err := parseOptions(xRefTable, d) + opts, err := parseOptions(xRefTable, d, REQUIRED) if err != nil { return nil, err } @@ -313,8 +387,7 @@ func extractComboBox(xRefTable *model.XRefTable, page int, d types.Dict, id, nam return cb, nil } -func extractDateFormat(xRefTable *model.XRefTable, d types.Dict) (*primitives.DateFormat, error) { - +func dateFormatFromJSAction(d types.Dict) (*primitives.DateFormat, error) { d1 := d.DictEntry("AA") if len(d1) > 0 { d2 := d1.DictEntry("F") @@ -336,24 +409,45 @@ func extractDateFormat(xRefTable *model.XRefTable, d types.Dict) (*primitives.Da } } } + return nil, nil +} + +func extractDateFormat(xRefTable *model.XRefTable, d types.Dict) (*primitives.DateFormat, error) { + df, err := dateFormatFromJSAction(d) + if err != nil { + return nil, err + } + if df != nil { + return df, nil + } if o, found := d.Find("DV"); found { - sl, _ := o.(types.StringLiteral) - s, err := types.StringLiteralToString(sl) + o1, err := xRefTable.Dereference(o) if err != nil { return nil, err } + sl, err := types.StringOrHexLiteral(o1) + if err != nil { + return nil, err + } + s := "" + if sl != nil { + s = *sl + } if df, err := primitives.DateFormatForDate(s); err == nil { return df, nil } } if o, found := d.Find("V"); found { - sl, _ := o.(types.StringLiteral) - s, err := types.StringLiteralToString(sl) + sl, err := types.StringOrHexLiteral(o) if err != nil { return nil, err } + s := "" + if sl != nil { + s = *sl + } if df, err := primitives.DateFormatForDate(s); err == nil { return df, nil } @@ -362,61 +456,55 @@ func extractDateFormat(xRefTable *model.XRefTable, d types.Dict) (*primitives.Da return nil, nil } -func extractDateField(page int, d types.Dict, id, name string, df *primitives.DateFormat, locked bool) (*DateField, error) { +func extractDateField(xRefTable *model.XRefTable, page int, d types.Dict, id, name, altName string, df *primitives.DateFormat, locked bool) (*DateField, error) { - dfield := &DateField{Pages: []int{page}, ID: id, Name: name, Format: df.Ext, Locked: locked} + dfield := &DateField{Pages: []int{page}, ID: id, Name: name, AltName: altName, Format: df.Ext, Locked: locked} - if o, found := d.Find("DV"); found { - sl, _ := o.(types.StringLiteral) - s, err := types.StringLiteralToString(sl) - if err != nil { - return nil, err - } - dfield.Default = s + v, err := getV(xRefTable, d) + if err != nil { + return nil, err } + dfield.Value = v - if o, found := d.Find("V"); found { - sl, _ := o.(types.StringLiteral) - s, err := types.StringLiteralToString(sl) - if err != nil { - return nil, err - } - dfield.Value = s + dv, err := getDV(xRefTable, d) + if err != nil { + return nil, err } + dfield.Default = dv return dfield, nil } -func extractTextField(page int, d types.Dict, id, name string, ff *int, locked bool) (*TextField, error) { +func extractTextField(xRefTable *model.XRefTable, page int, d types.Dict, id, name, altName string, ff *int, locked bool) (*TextField, error) { multiLine := ff != nil && uint(primitives.FieldFlags(*ff))&uint(primitives.FieldMultiline) > 0 - tf := &TextField{Pages: []int{page}, ID: id, Name: name, Multiline: multiLine, Locked: locked} + maxLen := 0 + i := d.IntEntry("MaxLen") + if i != nil { + maxLen = *i + } - if o, found := d.Find("DV"); found { - sl, _ := o.(types.StringLiteral) - s, err := types.StringLiteralToString(sl) - if err != nil { - return nil, err - } - tf.Default = s + tf := &TextField{Pages: []int{page}, ID: id, Name: name, AltName: altName, Multiline: multiLine, MaxLen: maxLen, Locked: locked} + + v, err := getV(xRefTable, d) + if err != nil { + return nil, err } + tf.Value = v - if o, found := d.Find("V"); found { - sl, _ := o.(types.StringLiteral) - s, err := types.StringLiteralToString(sl) - if err != nil { - return nil, err - } - tf.Value = s + dv, err := getDV(xRefTable, d) + if err != nil { + return nil, err } + tf.Default = dv return tf, nil } -func extractListBox(xRefTable *model.XRefTable, page int, d types.Dict, id, name string, locked, multi bool) (*ListBox, error) { +func extractListBox(xRefTable *model.XRefTable, page int, d types.Dict, id, name, altName string, locked, multi bool) (*ListBox, error) { - lb := &ListBox{Pages: []int{page}, ID: id, Name: name, Locked: locked, Multi: multi} + lb := &ListBox{Pages: []int{page}, ID: id, Name: name, AltName: altName, Locked: locked, Multi: multi} if !multi { if sl := d.StringLiteralEntry("DV"); sl != nil { @@ -424,14 +512,14 @@ func extractListBox(xRefTable *model.XRefTable, page int, d types.Dict, id, name if err != nil { return nil, err } - lb.Defaults = []string{s} + lb.Defaults = []string{strings.TrimSpace(s)} } if sl := d.StringLiteralEntry("V"); sl != nil { s, err := types.StringLiteralToString(*sl) if err != nil { return nil, err } - lb.Values = []string{s} + lb.Values = []string{strings.TrimSpace(s)} } } else { ss, err := parseStringLiteralArray(xRefTable, d, "DV") @@ -446,7 +534,7 @@ func extractListBox(xRefTable *model.XRefTable, page int, d types.Dict, id, name lb.Values = ss } - opts, err := parseOptions(xRefTable, d) + opts, err := parseOptions(xRefTable, d, REQUIRED) if err != nil { return nil, err } @@ -509,11 +597,11 @@ func exportBtn( i int, form *Form, d types.Dict, - id, name string, + id, name, altName string, locked bool, ok *bool) error { - if len(d.ArrayEntry("Kids")) > 0 { + if len(d.ArrayEntry("Kids")) > 1 { for _, rb := range form.RadioButtonGroups { if rb.ID == id && rb.Name == name { @@ -522,7 +610,7 @@ func exportBtn( } } - rbg, err := extractRadioButtonGroup(xRefTable, i, d, id, name, locked) + rbg, err := extractRadioButtonGroup(xRefTable, i, d, id, name, altName, locked) if err != nil { return err } @@ -539,7 +627,7 @@ func exportBtn( } } - cb, err := extractCheckBox(i, d, id, name, locked) + cb, err := extractCheckBox(i, d, id, name, altName, locked) if err != nil { return err } @@ -554,7 +642,7 @@ func exportCh( i int, form *Form, d types.Dict, - id, name string, + id, name, altName string, locked bool, ok *bool) error { @@ -572,7 +660,7 @@ func exportCh( } } - cb, err := extractComboBox(xRefTable, i, d, id, name, locked) + cb, err := extractComboBox(xRefTable, i, d, id, name, altName, locked) if err != nil { return err } @@ -589,7 +677,7 @@ func exportCh( } multi := primitives.FieldFlags(*ff)&primitives.FieldMultiselect > 0 - lb, err := extractListBox(xRefTable, i, d, id, name, locked, multi) + lb, err := extractListBox(xRefTable, i, d, id, name, altName, locked, multi) if err != nil { return err } @@ -604,7 +692,7 @@ func exportTx( i int, form *Form, d types.Dict, - id, name string, + id, name, altName string, ff *int, locked bool, ok *bool) error { @@ -623,7 +711,7 @@ func exportTx( } } - df, err := extractDateField(i, d, id, name, df, locked) + df, err := extractDateField(xRefTable, i, d, id, name, altName, df, locked) if err != nil { return err } @@ -640,7 +728,7 @@ func exportTx( } } - tf, err := extractTextField(i, d, id, name, ff, locked) + tf, err := extractTextField(xRefTable, i, d, id, name, altName, ff, locked) if err != nil { return err } @@ -650,6 +738,21 @@ func exportTx( return nil } +func exportPageField(ft string, xRefTable *model.XRefTable, i int, form *Form, d types.Dict, id, name, altName string, locked bool, ok *bool, ff *int) error { + var err error + + switch ft { + case "Btn": + err = exportBtn(xRefTable, i, form, d, id, name, altName, locked, ok) + case "Ch": + err = exportCh(xRefTable, i, form, d, id, name, altName, locked, ok) + case "Tx": + err = exportTx(xRefTable, i, form, d, id, name, altName, ff, locked, ok) + } + + return err +} + func exportPageFields(xRefTable *model.XRefTable, i int, form *Form, m map[string]fieldInfo, ok *bool) error { for id, fi := range m { @@ -677,23 +780,20 @@ func exportPageFields(xRefTable *model.XRefTable, i int, form *Form, m map[strin } } - switch *ft { - case "Btn": - if err := exportBtn(xRefTable, i, form, d, id, name, locked, ok); err != nil { - return err - } - - case "Ch": - if err := exportCh(xRefTable, i, form, d, id, name, locked, ok); err != nil { + altName := "" + if o, found := d.Find("TU"); found { + s, err := types.StringOrHexLiteral(o) + if err != nil { return err } - - case "Tx": - if err := exportTx(xRefTable, i, form, d, id, name, ff, locked, ok); err != nil { - return err + if s != nil { + altName = *s } } + if err := exportPageField(*ft, xRefTable, i, form, d, id, name, altName, locked, ok, ff); err != nil { + return err + } } return nil diff --git a/pkg/pdfcpu/form/fill.go b/pkg/pdfcpu/form/fill.go index b5f46505..f052a5a0 100644 --- a/pkg/pdfcpu/form/fill.go +++ b/pkg/pdfcpu/form/fill.go @@ -22,6 +22,7 @@ import ( "strconv" "strings" + "github.com/angel-one/pdfcpu/pkg/font" pdffont "github.com/angel-one/pdfcpu/pkg/pdfcpu/font" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/primitives" @@ -69,6 +70,8 @@ func addImages(ctx *model.Context, pages map[string]*Page) ([]*model.Page, error RadioBtnAPs: map[float64]*primitives.AP{}, OldFieldIDs: types.StringSet{}, Debug: false, + Offline: ctx.Offline, + Timeout: ctx.Timeout, } if err := cacheResIDs(ctx, pdf); err != nil { @@ -302,7 +305,7 @@ func imageBox(s, src, url string) (*primitives.ImageBox, string, error) { s = s[4:] if s[0] != '(' || s[len(s)-1] != ')' { - return nil, "", errors.Errorf("pdfcpu: parsing cvs fieldNames: corrupted @img: <%s>", s) + return nil, "", errors.Errorf("pdfcpu: parsing cvs fieldNames: invalid @img: <%s>", s) } s = s[1 : len(s)-1] @@ -319,7 +322,7 @@ func imageBox(s, src, url string) (*primitives.ImageBox, string, error) { for _, s := range ss { ss1 := strings.Split(s, ":") if len(ss1) != 2 { - return nil, "", errors.Errorf("pdfcpu: parsing cvs fieldNames: corrupted @img: <%s>", s) + return nil, "", errors.Errorf("pdfcpu: parsing cvs fieldNames: invalid @img: <%s>", s) } paramPrefix := strings.TrimSpace(ss1[0]) @@ -459,17 +462,12 @@ func fillRadioButtons(ctx *model.Context, d types.Dict, vNew string, v types.Nam return err } - d1 := d.DictEntry("AP") - if d1 == nil { - return errors.New("pdfcpu: corrupt form field: missing entry AP") - } - - d2 := d1.DictEntry("N") - if d2 == nil { - return errors.New("pdfcpu: corrupt AP field: missing entry N") + d1, err := locateAPN(ctx.XRefTable, d) + if err != nil { + return err } - for k := range d2 { + for k := range d1 { k, err := types.DecodeName(k) if err != nil { return err @@ -491,6 +489,7 @@ func fillRadioButtonGroup( ctx *model.Context, d types.Dict, id, name string, + opts []string, locked bool, format DataFormat, fillDetails func(id, name string, fieldType FieldType, format DataFormat) ([]string, bool, bool), @@ -514,6 +513,16 @@ func fillRadioButtonGroup( } vNew := vv[0] + + if len(opts) > 0 { + for i, o := range opts { + if o == vNew { + vNew = strconv.Itoa(i) + break + } + } + } + vOld := "" if s := d.NameEntry("V"); s != nil { n, err := types.DecodeName(*s) @@ -541,6 +550,34 @@ func fillRadioButtonGroup( return nil } +func fillCheckBoxKid(ctx *model.Context, kids types.Array, off bool) (*types.Name, error) { + d, err := ctx.DereferenceDict(kids[0]) + if err != nil { + return nil, err + } + + d1, err := locateAPN(ctx.XRefTable, d) + if err != nil { + return nil, err + } + + offName, yesName, err := primitives.CalcCheckBoxASNames(ctx, d1) + if err != nil { + return nil, err + } + + asName := yesName + if off { + asName = offName + } + + if _, found := d.Find("AS"); found { + d["AS"] = asName + } + + return &asName, nil +} + func fillCheckBox( ctx *model.Context, d types.Dict, @@ -568,10 +605,11 @@ func fillCheckBox( } s := strings.ToLower(vv[0]) - vNew := strings.HasPrefix(s, "t") + vNew := strings.HasPrefix(s, "t") // true vOld := false if o, found := d.Find("V"); found { - vOld = o.(types.Name) == "Yes" + n := o.(types.Name) + vOld = len(n) > 0 && n != "Off" } if vNew == vOld { return nil @@ -581,18 +619,33 @@ func fillCheckBox( if vNew { v = types.Name("Yes") } + + kids := d.ArrayEntry("Kids") + if len(kids) == 1 { + asName, err := fillCheckBoxKid(ctx, kids, v == types.Name("Off")) + if err != nil { + return err + } + d["V"] = *asName + *ok = true + return nil + } + d["V"] = v if _, found := d.Find("AS"); found { - offName, yesName := primitives.CalcCheckBoxASNames(d) + offName, yesName, err := primitives.CalcCheckBoxASNames(ctx, d) + if err != nil { + return err + } //fmt.Printf("off:<%s> yes:<%s>\n", offName, yesName) asName := yesName if v == "Off" { asName = offName } d["AS"] = asName + d["V"] = asName } *ok = true - return nil } @@ -610,8 +663,13 @@ func fillBtn( return nil } - if len(d.ArrayEntry("Kids")) > 0 { - if err := fillRadioButtonGroup(ctx, d, id, name, locked, format, fillDetails, ok); err != nil { + opts, err := parseOptions(ctx.XRefTable, d, OPTIONAL) + if err != nil { + return err + } + + if len(d.ArrayEntry("Kids")) > 1 { + if err := fillRadioButtonGroup(ctx, d, id, name, opts, locked, format, fillDetails, ok); err != nil { return err } } else { @@ -632,7 +690,6 @@ func fillComboBox( format DataFormat, fonts map[string]types.IndirectRef, fillDetails func(id, name string, fieldType FieldType, format DataFormat) ([]string, bool, bool), - ff *int, ok *bool) error { vv, lock, found := fillDetails(id, name, FTComboBox, format) @@ -640,6 +697,8 @@ func fillComboBox( return nil } + da := d.StringEntry("DA") + vNew := vv[0] if locked { if !lock { @@ -649,7 +708,7 @@ func fillComboBox( } } else if lock { lockFormField(d) - if err := primitives.EnsureComboBoxAP(ctx, d, vNew, fonts); err != nil { + if err := primitives.EnsureComboBoxAP(ctx, d, vNew, da, fonts); err != nil { return err } *ok = true @@ -667,7 +726,7 @@ func fillComboBox( return nil } - s, err := types.EscapeUTF16String(vNew) + s, err := types.EscapedUTF16String(vNew) if err != nil { return err } @@ -702,7 +761,7 @@ func updateListBoxValues(multi bool, d types.Dict, opts, vNew []string) (types.A break } } - s, err := types.EscapeUTF16String(v) + s, err := types.EscapedUTF16String(v) if err != nil { return nil, err } @@ -719,7 +778,7 @@ func updateListBoxValues(multi bool, d types.Dict, opts, vNew []string) (types.A } v := vNew[0] - s, err := types.EscapeUTF16String(v) + s, err := types.EscapedUTF16String(v) if err != nil { return nil, err } @@ -796,7 +855,9 @@ func fillListBox( return err } - if err := primitives.EnsureListBoxAP(ctx, d, opts, ind, fonts); err != nil { + da := d.StringEntry("DA") + + if err := primitives.EnsureListBoxAP(ctx, d, opts, ind, da, fonts); err != nil { return err } @@ -820,7 +881,7 @@ func fillCh( return errors.New("pdfcpu: corrupt form field: missing entry Ff") } - opts, err := parseOptions(ctx.XRefTable, d) + opts, err := parseOptions(ctx.XRefTable, d, REQUIRED) if err != nil { return err } @@ -830,7 +891,7 @@ func fillCh( } if primitives.FieldFlags(*ff)&primitives.FieldCombo > 0 { - return fillComboBox(ctx, d, id, name, opts, locked, format, fonts, fillDetails, ff, ok) + return fillComboBox(ctx, d, id, name, opts, locked, format, fonts, fillDetails, ok) } return fillListBox(ctx, d, id, name, opts, locked, format, fonts, fillDetails, ff, ok) @@ -844,7 +905,6 @@ func fillDateField( format DataFormat, fonts map[string]types.IndirectRef, fillDetails func(id, name string, fieldType FieldType, format DataFormat) ([]string, bool, bool), - ff *int, ok *bool) error { vv, lock, found := fillDetails(id, name, FTDate, format) @@ -865,23 +925,44 @@ func fillDateField( } vNew := vv[0] + if vNew == vOld { return nil } - s, err := types.EscapeUTF16String(vNew) + s, err := types.EscapedUTF16String(vNew) if err != nil { return err } - d["V"] = types.StringLiteral(*s) - if err := primitives.EnsureDateFieldAP(ctx, d, vNew, fonts); err != nil { + da := d.StringEntry("DA") + + kids := d.ArrayEntry("Kids") + if len(kids) > 0 { + + for _, o := range kids { + + d, err := ctx.DereferenceDict(o) + if err != nil { + return err + } + + if err := primitives.EnsureDateFieldAP(ctx, d, vNew, da, fonts); err != nil { + return err + } + + *ok = true + } + + return nil + } + + if err := primitives.EnsureDateFieldAP(ctx, d, vNew, da, fonts); err != nil { return err } *ok = true - return nil } @@ -919,17 +1000,27 @@ func fillTextField( return nil } - s, err := types.EscapeUTF16String(vNew) + s, err := types.EscapedUTF16String(vNew) if err != nil { return err } - d["V"] = types.StringLiteral(*s) multiLine := ff != nil && uint(primitives.FieldFlags(*ff))&uint(primitives.FieldMultiline) > 0 + comb := ff != nil && primitives.FieldFlags(*ff)&primitives.FieldComb > 0 + + maxLen := 0 + i := d.IntEntry("MaxLen") + if i != nil { + maxLen = *i + } + + da := d.StringEntry("DA") + kids := d.ArrayEntry("Kids") if len(kids) > 0 { + for _, o := range kids { d, err := ctx.DereferenceDict(o) @@ -937,7 +1028,7 @@ func fillTextField( return err } - if err := primitives.EnsureTextFieldAP(ctx, d, vNew, multiLine, fonts); err != nil { + if err := primitives.EnsureTextFieldAP(ctx, d, vNew, multiLine, comb, maxLen, da, fonts); err != nil { return err } @@ -947,7 +1038,7 @@ func fillTextField( return nil } - if err := primitives.EnsureTextFieldAP(ctx, d, vNew, multiLine, fonts); err != nil { + if err := primitives.EnsureTextFieldAP(ctx, d, vNew, multiLine, comb, maxLen, da, fonts); err != nil { return err } @@ -970,18 +1061,14 @@ func fillTx( if err != nil { return err } - vOld := "" - if o, found := d.Find("V"); found { - sl, _ := o.(types.StringLiteral) - s, err := types.StringLiteralToString(sl) - if err != nil { - return err - } - vOld = s + + vOld, err := getV(ctx.XRefTable, d) + if err != nil { + return err } if df != nil { - return fillDateField(ctx, d, id, name, vOld, locked, format, fonts, fillDetails, ff, ok) + return fillDateField(ctx, d, id, name, vOld, locked, format, fonts, fillDetails, ok) } return fillTextField(ctx, d, id, name, vOld, locked, format, fonts, fillDetails, ff, ok) @@ -1058,6 +1145,34 @@ func fillWidgetAnnots( return nil } +func setupFillFonts(xRefTable *model.XRefTable) error { + d, err := primitives.FormFontResDict(xRefTable) + if err != nil { + return err + } + + m := xRefTable.FillFonts + + if d == nil { + // TODO setup/reuse Helvetica and add to m + return nil + } + + for k, v := range d { + indRef := v.(types.IndirectRef) + fontName, _, _, err := primitives.FormFontDetails(xRefTable, indRef) + if err != nil { + return err + } + + if font.IsCoreFont(fontName) || font.IsUserFont(fontName) { + m[k] = indRef + } + } + + return nil +} + // FillForm populates form fields as provided by fillDetails and also supports virtual image fields. func FillForm( ctx *model.Context, @@ -1075,6 +1190,10 @@ func FillForm( fonts := map[string]types.IndirectRef{} indRefs := map[types.IndirectRef]bool{} + if err := setupFillFonts(xRefTable); err != nil { + return false, nil, err + } + var ok bool for i := 1; i <= xRefTable.PageCount; i++ { diff --git a/pkg/pdfcpu/form/form.go b/pkg/pdfcpu/form/form.go index 06215a38..c893d848 100644 --- a/pkg/pdfcpu/form/form.go +++ b/pkg/pdfcpu/form/form.go @@ -65,14 +65,15 @@ func (ft FieldType) String() string { // Field represents a form field for s particular page number. type Field struct { - Pages []int - Locked bool - Typ FieldType - ID string - Name string - Dv string - V string - Opts string + Pages []int + Locked bool + Typ FieldType + ID string + Name string + AltName string + Dv string + V string + Opts string } func (f Field) pageString() string { @@ -88,8 +89,8 @@ func (f Field) pageString() string { } type FieldMeta struct { - def, val, opt bool - pageMax, defMax, valMax, idMax, nameMax int + altName, def, val, opt bool + pageMax, defMax, valMax, idMax, nameMax, altNameMax int } func fields(xRefTable *model.XRefTable) (types.Array, error) { @@ -250,8 +251,14 @@ func extractStringSlice(a types.Array) ([]string, error) { return ss, nil } -func parseOptions(xRefTable *model.XRefTable, d types.Dict) ([]string, error) { - o, _ := d.Find("Opt") +func parseOptions(xRefTable *model.XRefTable, d types.Dict, required bool) ([]string, error) { + o, ok := d.Find("Opt") + if !ok { + if required { + return nil, errors.New("corrupt form field: missing entry \"Opt\"") + } + return nil, nil + } a, err := xRefTable.DereferenceArray(o) if err != nil { return nil, err @@ -285,27 +292,32 @@ func parseStringLiteralArray(xRefTable *model.XRefTable, d types.Dict, key strin return nil, nil } -func collectRadioButtonGroupOptions(xRefTable *model.XRefTable, d types.Dict) (string, error) { +func collectRadioButtonGroupOptions(xRefTable *model.XRefTable, d types.Dict) ([]string, error) { - var vv []string + vv, err := parseOptions(xRefTable, d, OPTIONAL) + if err != nil { + return nil, err + } + if len(vv) > 0 { + return vv, nil + } for _, o := range d.ArrayEntry("Kids") { + d, err := xRefTable.DereferenceDict(o) if err != nil { - return "", err - } - d1 := d.DictEntry("AP") - if d1 == nil { - return "", errors.New("corrupt form field: missing entry AP") + return nil, err } - d2 := d1.DictEntry("N") - if d2 == nil { - return "", errors.New("corrupt AP field: missing entry N") + + d1, err := locateAPN(xRefTable, d) + if err != nil { + return nil, err } - for k := range d2 { + + for k := range d1 { k, err := types.DecodeName(k) if err != nil { - return "", err + return nil, err } if k != "Off" { found := false @@ -323,19 +335,40 @@ func collectRadioButtonGroupOptions(xRefTable *model.XRefTable, d types.Dict) (s } } - return strings.Join(vv, ","), nil + return vv, nil } func collectRadioButtonGroup(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMeta) error { f.Typ = FTRadioButtonGroup + opts, err := collectRadioButtonGroupOptions(xRefTable, d) + if err != nil { + return err + } + + f.Opts = strings.Join(opts, ",") + if len(f.Opts) > 0 { + fm.opt = true + } + if s := d.NameEntry("V"); s != nil { v, err := types.DecodeName(*s) if err != nil { return err } if v != "Off" { + if len(opts) > 0 { + j, err := strconv.Atoi(v) + if err == nil { + for i, o := range opts { + if i == j { + v = o + break + } + } + } + } if w := runewidth.StringWidth(v); w > fm.valMax { fm.valMax = w } @@ -344,16 +377,6 @@ func collectRadioButtonGroup(xRefTable *model.XRefTable, d types.Dict, f *Field, } } - s, err := collectRadioButtonGroupOptions(xRefTable, d) - if err != nil { - return err - } - - f.Opts = s - if len(f.Opts) > 0 { - fm.opt = true - } - return nil } @@ -381,13 +404,14 @@ func collectBtn(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMet f.Dv = dv } - if len(d.ArrayEntry("Kids")) > 0 { + if len(d.ArrayEntry("Kids")) > 1 { return collectRadioButtonGroup(xRefTable, d, f, fm) } f.Typ = FTCheckBox if o, found := d.Find("V"); found { - if o.(types.Name) == "Yes" { + n := o.(types.Name) + if len(n) > 0 && n != "Off" { v := "Yes" if len(v) > fm.valMax { fm.valMax = len(v) @@ -400,7 +424,7 @@ func collectBtn(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMet return nil } -func collectComboBox(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMeta) error { +func collectComboBox(d types.Dict, f *Field, fm *FieldMeta) error { f.Typ = FTComboBox if sl := d.StringLiteralEntry("V"); sl != nil { v, err := types.StringLiteralToString(*sl) @@ -484,7 +508,7 @@ func collectListBox(xRefTable *model.XRefTable, multi bool, d types.Dict, f *Fie func collectCh(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMeta) error { ff := d.IntEntry("Ff") - vv, err := parseOptions(xRefTable, d) + vv, err := parseOptions(xRefTable, d, REQUIRED) if err != nil { return err } @@ -495,7 +519,7 @@ func collectCh(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMeta } if ff != nil && primitives.FieldFlags(*ff)&primitives.FieldCombo > 0 { - return collectComboBox(xRefTable, d, f, fm) + return collectComboBox(d, f, fm) } multi := ff != nil && (primitives.FieldFlags(*ff)&primitives.FieldMultiselect > 0) @@ -503,42 +527,91 @@ func collectCh(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMeta return collectListBox(xRefTable, multi, d, f, fm) } -func collectTx(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMeta) error { +func inheritedV(xRefTable *model.XRefTable, d types.Dict) (string, error) { if o, found := d.Find("V"); found { - sl, _ := o.(types.StringLiteral) - s, err := types.StringLiteralToString(sl) + s1, err := types.StringOrHexLiteral(o) if err != nil { - return err + return "", err } - v := s - if i := strings.Index(s, "\n"); i >= 0 { - v = s[:i] - v += "\\n" + if s1 != nil { + return *s1, nil } + } + indRef := d.IndirectRefEntry("Parent") + if indRef == nil { + return "", nil + } + d, err := xRefTable.DereferenceDict(*indRef) + if err != nil { + return "", err + } + return inheritedV(xRefTable, d) +} + +func getV(xRefTable *model.XRefTable, d types.Dict) (string, error) { + v, err := inheritedV(xRefTable, d) + if err != nil { + return "", err + } + return v, nil +} + +func inheritedDV(xRefTable *model.XRefTable, d types.Dict) (string, error) { + if o, found := d.Find("DV"); found { + s1, err := types.StringOrHexLiteral(o) + if err != nil { + return "", err + } + if s1 != nil { + return *s1, nil + } + } + indRef := d.IndirectRefEntry("Parent") + if indRef == nil { + return "", nil + } + d, err := xRefTable.DereferenceDict(*indRef) + if err != nil { + return "", err + } + return inheritedDV(xRefTable, d) +} + +func getDV(xRefTable *model.XRefTable, d types.Dict) (string, error) { + dv, err := inheritedDV(xRefTable, d) + if err != nil { + return "", err + } + return dv, nil +} + +func collectTx(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMeta) error { + v, err := getV(xRefTable, d) + if err != nil { + return err + } + if v != "" { + v = strings.ReplaceAll(v, "\x0A", "\\n") if w := runewidth.StringWidth(v); w > fm.valMax { fm.valMax = w } fm.val = true f.V = v } - if o, found := d.Find("DV"); found { - sl, _ := o.(types.StringLiteral) - s, err := types.StringLiteralToString(sl) - if err != nil { - return err - } - dv := s - if i := strings.Index(s, "\n"); i >= 0 { - dv = dv[:i] - dv += "\\n" - } - if w := runewidth.StringWidth(dv); w > fm.defMax { - fm.defMax = w + dv, err := getDV(xRefTable, d) + if err != nil { + return err + } + if dv != "" { + dv = strings.ReplaceAll(dv, "\x0A", "\\n") + if w := runewidth.StringWidth(dv); w > fm.valMax { + fm.valMax = w } fm.def = true f.Dv = dv } + df, err := extractDateFormat(xRefTable, d) if err != nil { return err @@ -550,28 +623,47 @@ func collectTx(xRefTable *model.XRefTable, d types.Dict, f *Field, fm *FieldMeta return nil } -func collectPageField( - xRefTable *model.XRefTable, - d types.Dict, - i int, - fi *fieldInfo, - fm *FieldMeta, - fs *[]Field) error { +func collectField(xRefTable *model.XRefTable, ft string, d types.Dict, f *Field, fm *FieldMeta) error { + var err error - exists := false + switch ft { + case "Btn": + err = collectBtn(xRefTable, d, f, fm) + case "Ch": + err = collectCh(xRefTable, d, f, fm) + case "Tx": + err = collectTx(xRefTable, d, f, fm) + } + + return err +} + +func locateField(fs *[]Field, fi *fieldInfo, fm *FieldMeta, pageNr int) bool { for j, field := range *fs { if field.ID == fi.id && field.Name == fi.name { - field.Pages = append(field.Pages, i) + field.Pages = append(field.Pages, pageNr) ps := field.pageString() if len(ps) > fm.pageMax { fm.pageMax = len(ps) } (*fs)[j] = field - exists = true + return true } } + return false +} - f := Field{Pages: []int{i}} +func collectPageField( + xRefTable *model.XRefTable, + d types.Dict, + pageNr int, + fi *fieldInfo, + fm *FieldMeta, + fs *[]Field) error { + + foundField := locateField(fs, fi, fm, pageNr) + + f := Field{Pages: []int{pageNr}} f.ID = fi.id if w := runewidth.StringWidth(fi.id); w > fm.idMax { @@ -594,28 +686,36 @@ func collectPageField( if ft == nil { ft = d.NameEntry("FT") if ft == nil { - return errors.Errorf("pdfcpu: corrupt form field %s: missing entry FT\n%s", f.ID, d) + return errors.Errorf("pdfcpu: corrupt form field %s: missing entry \"FT\"\n%s", f.ID, d) } } - var err error - - switch *ft { - case "Btn": - err = collectBtn(xRefTable, d, &f, fm) - - case "Ch": - err = collectCh(xRefTable, d, &f, fm) + if o, found := d.Find("TU"); found { + s1, err := types.StringOrHexLiteral(o) + if err != nil { + return err + } + s := "" + if s1 != nil { + s = *s1 + } + if len(s) > 80 { + s = s[:40] + } + altName := s - case "Tx": - err = collectTx(xRefTable, d, &f, fm) + if w := runewidth.StringWidth(altName); w > fm.altNameMax { + fm.altNameMax = w + } + fm.altName = true + f.AltName = altName } - if err != nil { + if err := collectField(xRefTable, *ft, d, &f, fm); err != nil { return err } - if !exists { + if !foundField { *fs = append(*fs, f) } @@ -716,6 +816,15 @@ func calcListHeader(fm *FieldMeta) (string, []int) { horSep = append(horSep, 6) } + if fm.altName { + s += draw.VBar + " AltName " + if fm.altNameMax > 7 { + s += strings.Repeat(" ", fm.altNameMax-7) + horSep = append(horSep, 9+fm.altNameMax-7) + } else { + horSep = append(horSep, 9) + } + } if fm.def { s += draw.VBar + " Default " if fm.defMax > 7 { @@ -763,7 +872,7 @@ func multiPageFieldsMap(fs []Field) map[string][]Field { return m } -func renderMultiPageFields(ctx *model.Context, m map[string][]Field, fm *FieldMeta) ([]string, error) { +func renderMultiPageFields(m map[string][]Field, fm *FieldMeta) ([]string, error) { var ss []string @@ -801,6 +910,10 @@ func renderMultiPageFields(ctx *model.Context, m map[string][]Field, fm *FieldMe nameFill := strings.Repeat(" ", fm.nameMax-runewidth.StringWidth(f.Name)) s := fmt.Sprintf("%s%s %s %-9s %s %s%s %s %s%s ", p, pageFill, l, t, draw.VBar, f.ID, idFill, draw.VBar, f.Name, nameFill) p = strings.Repeat(" ", len(p)) + if fm.altName { + altNameFill := strings.Repeat(" ", fm.altNameMax-runewidth.StringWidth(f.AltName)) + s += fmt.Sprintf("%s %s%s ", draw.VBar, f.AltName, altNameFill) + } if fm.def { dvFill := strings.Repeat(" ", fm.defMax-runewidth.StringWidth(f.Dv)) s += fmt.Sprintf("%s %s%s ", draw.VBar, f.Dv, dvFill) @@ -829,7 +942,7 @@ func renderFields(ctx *model.Context, fs []Field, fm *FieldMeta) ([]string, erro m := multiPageFieldsMap(fs) if len(m) > 0 { - ss1, err := renderMultiPageFields(ctx, m, fm) + ss1, err := renderMultiPageFields(m, fm) if err != nil { return nil, err } @@ -873,6 +986,10 @@ func renderFields(ctx *model.Context, fs []Field, fm *FieldMeta) ([]string, erro idFill := strings.Repeat(" ", fm.idMax-runewidth.StringWidth(f.ID)) nameFill := strings.Repeat(" ", fm.nameMax-runewidth.StringWidth(f.Name)) s := fmt.Sprintf("%s%s %s %-9s %s %s%s %s %s%s ", p, pageFill, l, t, draw.VBar, f.ID, idFill, draw.VBar, f.Name, nameFill) + if fm.altName { + altNameFill := strings.Repeat(" ", fm.altNameMax-runewidth.StringWidth(f.AltName)) + s += fmt.Sprintf("%s %s%s ", draw.VBar, f.AltName, altNameFill) + } if fm.def { dvFill := strings.Repeat(" ", fm.defMax-runewidth.StringWidth(f.Dv)) s += fmt.Sprintf("%s %s%s ", draw.VBar, f.Dv, dvFill) @@ -901,7 +1018,7 @@ func FormFields(ctx *model.Context) ([]Field, *FieldMeta, error) { return nil, nil, err } - fm := &FieldMeta{pageMax: 2, idMax: 3, nameMax: 4, defMax: 7, valMax: 5} + fm := &FieldMeta{pageMax: 2, idMax: 3, nameMax: 4, altNameMax: 7, defMax: 7, valMax: 5} fs, err := collectFields(xRefTable, fields, fm) if err != nil { @@ -1051,7 +1168,7 @@ func removeIndRefByIndex(indRefs []types.IndirectRef, i int) []types.IndirectRef return indRefs[:lastIndex] } -func removeFromFields(xRefTable *model.XRefTable, indRefs *[]types.IndirectRef, fields *types.Array) error { +func removeFormFields(xRefTable *model.XRefTable, indRefs *[]types.IndirectRef, fields *types.Array) error { f := types.Array{} for _, v := range *fields { indRef1 := v.(types.IndirectRef) @@ -1085,7 +1202,7 @@ func removeFromFields(xRefTable *model.XRefTable, indRefs *[]types.IndirectRef, if err != nil { return err } - if err := removeFromFields(xRefTable, indRefs, &kids); err != nil { + if err := removeFormFields(xRefTable, indRefs, &kids); err != nil { return err } if len(kids) > 0 { @@ -1164,7 +1281,7 @@ func RemoveFormFields(ctx *model.Context, fieldIDsOrNames []string) (bool, error copy(indRefsClone, indRefs) // Remove fields from AcroDict. - if err := removeFromFields(xRefTable, &indRefsClone, &fields); err != nil { + if err := removeFormFields(xRefTable, &indRefsClone, &fields); err != nil { return false, err } @@ -1244,19 +1361,18 @@ func resetBtn(xRefTable *model.XRefTable, d types.Dict) error { // RadiobuttonGroup for _, o := range d.ArrayEntry("Kids") { + d, err := xRefTable.DereferenceDict(o) if err != nil { return err } - d1 := d.DictEntry("AP") - if d1 == nil { - return errors.New("corrupt form field: missing entry AP") - } - d2 := d1.DictEntry("N") - if d2 == nil { - return errors.New("corrupt AP field: missing entry N") + + d1, err := locateAPN(xRefTable, d) + if err != nil { + return err } - for k := range d2 { + + for k := range d1 { k, err := types.DecodeName(k) if err != nil { return err @@ -1333,10 +1449,10 @@ func resetMultiListBox(xRefTable *model.XRefTable, d types.Dict, opts []string) func resetCh(ctx *model.Context, d types.Dict, fonts map[string]types.IndirectRef) error { ff := d.IntEntry("Ff") if ff == nil { - return errors.New("pdfcpu: corrupt form field: missing entry Ff") + return errors.New("pdfcpu: corrupt form field: missing entry \"Ff\"") } - opts, err := parseOptions(ctx.XRefTable, d) + opts, err := parseOptions(ctx.XRefTable, d, REQUIRED) if err != nil { return err } @@ -1356,8 +1472,10 @@ func resetCh(ctx *model.Context, d types.Dict, fonts map[string]types.IndirectRe return err } + da := d.StringEntry("DA") + if primitives.FieldFlags(*ff)&primitives.FieldCombo == 0 { - if err := primitives.EnsureListBoxAP(ctx, d, opts, ind, fonts); err != nil { + if err := primitives.EnsureListBoxAP(ctx, d, opts, ind, da, fonts); err != nil { return err } } @@ -1371,8 +1489,12 @@ func resetTx(ctx *model.Context, d types.Dict, fonts map[string]types.IndirectRe err error ) if o, found := d.Find("DV"); found { - d["V"] = o - sl, _ := o.(types.StringLiteral) + o1, err := ctx.Dereference(o) + if err != nil { + return err + } + d["V"] = o1 + sl, _ := o1.(types.StringLiteral) s, err = types.StringLiteralToString(sl) if err != nil { return err @@ -1384,18 +1506,46 @@ func resetTx(ctx *model.Context, d types.Dict, fonts map[string]types.IndirectRe d.Delete("V") } - isDate := true + isDate := false if s != "" { _, err := primitives.DateFormatForDate(s) isDate = err == nil } + ff := d.IntEntry("Ff") + multiLine := ff != nil && uint(primitives.FieldFlags(*ff))&uint(primitives.FieldMultiline) > 0 + comb := ff != nil && uint(primitives.FieldFlags(*ff))&uint(primitives.FieldComb) > 0 + + da := d.StringEntry("DA") + + kids := d.ArrayEntry("Kids") + if len(kids) > 0 { + + for _, o := range kids { + + d, err := ctx.DereferenceDict(o) + if err != nil { + return err + } + + if isDate { + err = primitives.EnsureDateFieldAP(ctx, d, s, da, fonts) + } else { + err = primitives.EnsureTextFieldAP(ctx, d, s, multiLine, comb, 0, da, fonts) + } + + if err != nil { + return err + } + } + + return nil + } + if isDate { - err = primitives.EnsureDateFieldAP(ctx, d, s, fonts) + err = primitives.EnsureDateFieldAP(ctx, d, s, da, fonts) } else { - ff := d.IntEntry("Ff") - multiLine := ff != nil && uint(primitives.FieldFlags(*ff))&uint(primitives.FieldMultiline) > 0 - err = primitives.EnsureTextFieldAP(ctx, d, s, multiLine, fonts) + err = primitives.EnsureTextFieldAP(ctx, d, s, multiLine, comb, 0, da, fonts) } return err @@ -1450,7 +1600,7 @@ func resetPageFields( if ft == nil { ft = d.NameEntry("FT") if ft == nil { - return errors.Errorf("pdfcpu: corrupt form field %s: missing entry FT\n%s", fi.id, d) + return errors.Errorf("pdfcpu: corrupt form field %s: missing entry \"FT\"\n%s", fi.id, d) } } @@ -1550,10 +1700,12 @@ func ensureAP(ctx *model.Context, d types.Dict, fi *fieldInfo, fonts map[string] if ft == nil { ft = d.NameEntry("FT") if ft == nil { - return errors.Errorf("pdfcpu: corrupt form field %s: missing entry FT\n%s", fi.id, d) + return errors.Errorf("pdfcpu: corrupt form field %s: missing entry \"FT\"\n%s", fi.id, d) } } + da := d.StringEntry("DA") + if *ft == "Ch" { ff := d.IntEntry("Ff") @@ -1568,7 +1720,7 @@ func ensureAP(ctx *model.Context, d types.Dict, fi *fieldInfo, fonts map[string] v = s } - if err := primitives.EnsureComboBoxAP(ctx, d, v, fonts); err != nil { + if err := primitives.EnsureComboBoxAP(ctx, d, v, da, fonts); err != nil { return err } @@ -1712,7 +1864,7 @@ func deleteAP(d types.Dict, fi *fieldInfo) error { if ft == nil { ft = d.NameEntry("FT") if ft == nil { - return errors.Errorf("pdfcpu: corrupt form field %s: missing entry FT\n%s", fi.id, d) + return errors.Errorf("pdfcpu: corrupt form field %s: missing entry \"FT\"\n%s", fi.id, d) } } if *ft == "Ch" { diff --git a/pkg/pdfcpu/iccProfile.go b/pkg/pdfcpu/iccProfile.go index 6ec8b84b..8fa038e0 100644 --- a/pkg/pdfcpu/iccProfile.go +++ b/pkg/pdfcpu/iccProfile.go @@ -28,7 +28,7 @@ import ( // // We fall back to the alternate color space and if there is none to whatever color space makes sense. -//ICC profiles use big endian always. +// ICC profiles use big endian always. type iccProfile struct { b []byte rX, rY, rZ float32 // redMatrixColumn; the first column in the matrix, which is used in matrix/TRC transforms. @@ -279,7 +279,7 @@ func (p iccProfile) String() string { s += fmt.Sprintf("Tag %d: signature:%s offset:%d(#%02x) size:%d(#%02x)\n%s\n", i, sig, off, off, size, size, hex.Dump(p.b[off:off+size])) //s += fmt.Sprintf("Tag %d: signature:%s offset:%d(#%02x) size:%d(#%02x)\n", i, sig, off, off, size, size) } - s += fmt.Sprintf("Matrix:\n") + s += "Matrix:\n" s += fmt.Sprintf("%4.4f %4.4f %4.4f\n", p.rX, p.gX, p.bX) s += fmt.Sprintf("%4.4f %4.4f %4.4f\n", p.rY, p.gY, p.bY) s += fmt.Sprintf("%4.4f %4.4f %4.4f\n", p.rZ, p.gZ, p.bZ) diff --git a/pkg/pdfcpu/image.go b/pkg/pdfcpu/image.go index fc346747..2f73dfba 100644 --- a/pkg/pdfcpu/image.go +++ b/pkg/pdfcpu/image.go @@ -18,6 +18,7 @@ package pdfcpu import ( "fmt" + "io" "path/filepath" "sort" "strconv" @@ -27,6 +28,7 @@ import ( "github.com/angel-one/pdfcpu/pkg/pdfcpu/draw" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" + "github.com/pkg/errors" ) // Images returns all embedded images of ctx. @@ -42,10 +44,17 @@ func Images(ctx *model.Context, selectedPages types.IntSet) ([]map[int]model.Ima mm := []map[int]model.Image{} var ( - maxLenObjNr, maxLenID, maxLenSize, maxLenFilters int + maxLenPageNr, maxLenObjNr, maxLenID, maxLenSize, maxLenFilters int ) + maxPageNr := 0 + for _, i := range pageNrs { + + if i > maxPageNr { + maxPageNr = i + } + m, err := ExtractPageImages(ctx, i, true) if err != nil { return nil, nil, err @@ -72,18 +81,28 @@ func Images(ctx *model.Context, selectedPages types.IntSet) ([]map[int]model.Ima mm = append(mm, m) } - maxLen := &ImageListMaxLengths{ObjNr: maxLenObjNr, ID: maxLenID, Size: maxLenSize, Filters: maxLenFilters} + maxLenPageNr = len(strconv.Itoa(maxPageNr)) + + maxLen := &ImageListMaxLengths{PageNr: maxLenPageNr, ObjNr: maxLenObjNr, ID: maxLenID, Size: maxLenSize, Filters: maxLenFilters} return mm, maxLen, nil } func prepHorSep(horSep *[]int, maxLen *ImageListMaxLengths) string { - s := "Page Obj# " + s := "Page " + if maxLen.PageNr > 4 { + s += strings.Repeat(" ", maxLen.PageNr-4) + *horSep = append(*horSep, 5+maxLen.PageNr-4) + } else { + *horSep = append(*horSep, 5) + } + + s += draw.VBar + " Obj# " if maxLen.ObjNr > 4 { s += strings.Repeat(" ", maxLen.ObjNr-4) - *horSep = append(*horSep, 10+maxLen.ObjNr-4) + *horSep = append(*horSep, 6+maxLen.ObjNr-4) } else { - *horSep = append(*horSep, 10) + *horSep = append(*horSep, 6) } s += draw.VBar + " Id " @@ -126,7 +145,39 @@ func sortedObjNrs(ii map[int]model.Image) []int { return objNrs } -func listImages(ctx *model.Context, mm []map[int]model.Image, maxLen *ImageListMaxLengths) ([]string, int, int64, error) { +func attrs(img model.Image) (string, string, string, string, string) { + t := "image" + if img.IsImgMask { + t = "imask" + } + if img.Thumb { + t = "thumb" + } + + sm := " " + if img.HasSMask { + sm = "*" + } + + im := " " + if img.HasImgMask { + im = "*" + } + + bpc := "-" + if img.Bpc > 0 { + bpc = strconv.Itoa(img.Bpc) + } + + interp := " " + if img.Interpol { + interp = "*" + } + + return t, sm, im, bpc, interp +} + +func listImages(mm []map[int]model.Image, maxLen *ImageListMaxLengths) ([]string, int, int64) { ss := []string{} first := true j, size := 0, int64(0) @@ -144,40 +195,22 @@ func listImages(ctx *model.Context, mm []map[int]model.Image, maxLen *ImageListM for _, objNr := range sortedObjNrs(ii) { img := ii[objNr] - pageNr := "" - if newPage { - pageNr = strconv.Itoa(img.PageNr) + pageNr := strconv.Itoa(img.PageNr) + if !newPage { + pageNr = strings.Repeat(" ", len(pageNr)) + } else { newPage = false } - t := "image" - if img.IsImgMask { - t = "imask" - } - if img.Thumb { - t = "thumb" - } - sm := " " - if img.HasSMask { - sm = "*" - } - - im := " " - if img.HasImgMask { - im = "*" - } + t, sm, im, bpc, interp := attrs(img) - bpc := "-" - if img.Bpc > 0 { - bpc = strconv.Itoa(img.Bpc) + s := strconv.Itoa(img.PageNr) + fill0 := strings.Repeat(" ", maxLen.PageNr-len(s)) + if maxLen.PageNr < 4 { + fill0 += strings.Repeat(" ", 4-maxLen.PageNr) } - interp := " " - if img.Interpol { - interp = "*" - } - - s := strconv.Itoa(img.ObjNr) + s = strconv.Itoa(img.ObjNr) fill1 := strings.Repeat(" ", maxLen.ObjNr-len(s)) if maxLen.ObjNr < 4 { fill1 += strings.Repeat(" ", 4-maxLen.ObjNr) @@ -194,8 +227,9 @@ func listImages(ctx *model.Context, mm []map[int]model.Image, maxLen *ImageListM fill3 = strings.Repeat(" ", 4-maxLen.Size) } - ss = append(ss, fmt.Sprintf("%4s %s%s %s %s%s %s %s %s %s %s %5d %s %5d %s %10s %d %s %s %s %s%s %s %s", - pageNr, fill1, strconv.Itoa(img.ObjNr), draw.VBar, + ss = append(ss, fmt.Sprintf("%s%s %s %s%s %s %s%s %s %s %s %s %s %5d %s %5d %s %10s %d %s %s %s %s%s %s %s", + fill0, pageNr, draw.VBar, + fill1, strconv.Itoa(img.ObjNr), draw.VBar, fill2, img.Name, draw.VBar, t, sm, im, draw.VBar, img.Width, draw.VBar, @@ -210,11 +244,11 @@ func listImages(ctx *model.Context, mm []map[int]model.Image, maxLen *ImageListM } } } - return ss, j, size, nil + return ss, j, size } type ImageListMaxLengths struct { - ObjNr, ID, Size, Filters int + PageNr, ObjNr, ID, Size, Filters int } // ListImages returns a formatted list of embedded images. @@ -225,12 +259,15 @@ func ListImages(ctx *model.Context, selectedPages types.IntSet) ([]string, error return nil, err } - ss, j, size, err := listImages(ctx, mm, maxLen) - if err != nil { - return nil, err + ss, j, size := listImages(mm, maxLen) + + s := fmt.Sprintf("%d images available", j) + + if j > 0 { + s += fmt.Sprintf(" (%s)", types.ByteSize(size)) } - return append([]string{fmt.Sprintf("%d images available(%s)", j, types.ByteSize(size))}, ss...), nil + return append([]string{s}, ss...), nil } // WriteImageToDisk returns a closure for writing img to disk. @@ -245,14 +282,140 @@ func WriteImageToDisk(outDir, fileName string) func(model.Image, bool, int) erro qual = "thumb" } f := fmt.Sprintf(s+"_%s.%s", fileName, img.PageNr, qual, img.FileType) - // if singleImgPerPage { - // if img.thumb { - // s += "_" + qual - // } - // f = fmt.Sprintf(s+".%s", fileName, img.pageNr, img.FileType) - // } outFile := filepath.Join(outDir, f) log.CLI.Printf("writing %s\n", outFile) return WriteReader(outFile, img) } } + +func validateImageDimensions(ctx *model.Context, objNr, w, h int) error { + imgObj := ctx.Optimize.ImageObjects[objNr] + if imgObj == nil { + return errors.Errorf("pdfcpu: unknown image object for objNr=%d", objNr) + } + + d := imgObj.ImageDict + + width := d.IntEntry("Width") + height := d.IntEntry("Height") + + if width == nil || height == nil { + return errors.New("pdfcpu: corrupt image dict") + } + + if *width != w || *height != h { + return errors.Errorf("pdfcpu: invalid image dimensions, want(%d,%d), got(%d,%d)", w, h, *width, *height) + } + + return nil +} + +// UpdateImagesByObjNr replaces an XObject. +func UpdateImagesByObjNr(ctx *model.Context, rd io.Reader, objNr int) error { + + sd, w, h, err := model.CreateImageStreamDict(ctx.XRefTable, rd) + if err != nil { + return err + } + + if err := validateImageDimensions(ctx, objNr, w, h); err != nil { + return err + } + + genNr := 0 + entry, ok := ctx.FindTableEntry(objNr, genNr) + if !ok { + errors.Errorf("pdfcpu: invalid objNr=%d", objNr) + } + + entry.Object = *sd + + return nil +} + +func isInheritedXObjectResource(inhRes types.Dict, id string) bool { + if inhRes == nil { + return false + } + + d := inhRes.DictEntry("XObject") + if d == nil { + return false + } + + for resId := range d { + if resId == id { + return true + } + } + + return false +} + +// UpdateImagesByPageNrAndId replaces the XObject referenced by pageNr and id. +func UpdateImagesByPageNrAndId(ctx *model.Context, rd io.Reader, pageNr int, id string) error { + + imgIndRef, w, h, err := model.CreateImageResource(ctx.XRefTable, rd) + if err != nil { + return err + } + + d, _, inhPAttrs, err := ctx.PageDict(pageNr, false) + if err != nil { + return err + } + + obj, found := d.Find("Resources") + if !found { + if isInheritedXObjectResource(inhPAttrs.Resources, id) { + d1 := types.NewDict() + d1[id] = *imgIndRef + d2 := types.NewDict() + d2["XObject"] = d1 + d["Resources"] = d2 + return nil + } + return errors.Errorf("pdfcpu: page %d: unknown resource %s\n", pageNr, id) + } + + resDict, err := ctx.DereferenceDict(obj) + if err != nil { + return err + } + + obj1, ok := resDict.Find("XObject") + if !ok { + if isInheritedXObjectResource(inhPAttrs.Resources, id) { + d := types.NewDict() + d[id] = *imgIndRef + resDict["XObject"] = d + return nil + } + return errors.Errorf("pdfcpu: page %d: unknown resource %s\n", pageNr, id) + } + + imgResDict, err := ctx.DereferenceDict(obj1) + if err != nil { + return err + } + + for resId, indRef := range imgResDict { + if resId == id { + + ir := indRef.(types.IndirectRef) + if err := validateImageDimensions(ctx, ir.ObjectNumber.Value(), w, h); err != nil { + return err + } + + imgResDict[id] = *imgIndRef + return nil + } + } + + if isInheritedXObjectResource(inhPAttrs.Resources, id) { + imgResDict[id] = *imgIndRef + return nil + } + + return errors.Errorf("pdfcpu: page %d: unknown resource %s\n", pageNr, id) +} diff --git a/pkg/pdfcpu/image_test.go b/pkg/pdfcpu/image_test.go index 5fab526c..ca3683e1 100644 --- a/pkg/pdfcpu/image_test.go +++ b/pkg/pdfcpu/image_test.go @@ -87,7 +87,7 @@ func streamDictForJPGFile(xRefTable *model.XRefTable, fileName string) (*types.S } - sd, err := model.CreateDCTImageObject(xRefTable, bb, c.Width, c.Height, 8, cs) + sd, err := model.CreateDCTImageStreamDict(xRefTable, bb, c.Width, c.Height, 8, cs) if err != nil { return nil, err } @@ -107,7 +107,7 @@ func streamDictForImageFile(xRefTable *model.XRefTable, fileName string) (*types } defer f.Close() - sd, _, _, err := model.CreateImageStreamDict(xRefTable, f, false, false) + sd, _, _, err := model.CreateImageStreamDict(xRefTable, f) return sd, err } @@ -217,7 +217,7 @@ func TestReadWritePNGAndWEBP(t *testing.T) { } // Read in a device gray image stream dump from disk. -func read1BPCDeviceGrayFlateStreamDump(xRefTable *model.XRefTable, fileName string) (*types.StreamDict, error) { +func read1BPCDeviceGrayFlateStreamDump(fileName string) (*types.StreamDict, error) { f, err := os.Open(fileName) if err != nil { return nil, err @@ -257,7 +257,7 @@ func TestReadDeviceGrayWritePNG(t *testing.T) { filename := "DeviceGray" path := filepath.Join(inDir, filename+".raw") - sd, err := read1BPCDeviceGrayFlateStreamDump(xRefTable, path) + sd, err := read1BPCDeviceGrayFlateStreamDump(path) if err != nil { t.Fatalf("err: %v\n", err) } @@ -305,7 +305,7 @@ func TestReadDeviceGrayWritePNG(t *testing.T) { } // Read in a device CMYK image stream dump from disk. -func read8BPCDeviceCMYKFlateStreamDump(xRefTable *model.XRefTable, fileName string) (*types.StreamDict, error) { +func read8BPCDeviceCMYKFlateStreamDump(fileName string) (*types.StreamDict, error) { f, err := os.Open(fileName) if err != nil { return nil, err @@ -352,7 +352,7 @@ func TestReadCMYKWriteTIFF(t *testing.T) { filename := "DeviceCMYK" path := filepath.Join(inDir, filename+".raw") - sd, err := read8BPCDeviceCMYKFlateStreamDump(xRefTable, path) + sd, err := read8BPCDeviceCMYKFlateStreamDump(path) if err != nil { t.Errorf("err: %v\n", err) } diff --git a/pkg/pdfcpu/importImage.go b/pkg/pdfcpu/importImage.go index ed891646..b8aac410 100644 --- a/pkg/pdfcpu/importImage.go +++ b/pkg/pdfcpu/importImage.go @@ -118,7 +118,7 @@ func parsePageFormatImp(s string, imp *Import) (err error) { return err } -func parsePageDim(v string, u types.DisplayUnit) (*types.Dim, string, error) { +func ParsePageDim(v string, u types.DisplayUnit) (*types.Dim, string, error) { ss := strings.Split(v, " ") if len(ss) != 2 { @@ -127,12 +127,12 @@ func parsePageDim(v string, u types.DisplayUnit) (*types.Dim, string, error) { w, err := strconv.ParseFloat(ss[0], 64) if err != nil || w <= 0 { - return nil, v, errors.Errorf("pdfcpu: dimension X must be a positiv numeric value: %s\n", ss[0]) + return nil, v, errors.Errorf("pdfcpu: dimension X must be a positive numeric value: %s\n", ss[0]) } h, err := strconv.ParseFloat(ss[1], 64) if err != nil || h <= 0 { - return nil, v, errors.Errorf("pdfcpu: dimension Y must be a positiv numeric value: %s\n", ss[1]) + return nil, v, errors.Errorf("pdfcpu: dimension Y must be a positive numeric value: %s\n", ss[1]) } d := types.Dim{Width: types.ToUserSpace(w, u), Height: types.ToUserSpace(h, u)} @@ -144,7 +144,7 @@ func parseDimensionsImp(s string, imp *Import) (err error) { if imp.UserDim { return errors.New("pdfcpu: only one of formsize(papersize) or dimensions allowed") } - imp.PageDim, imp.PageSize, err = parsePageDim(s, imp.InpUnit) + imp.PageDim, imp.PageSize, err = ParsePageDim(s, imp.InpUnit) imp.UserDim = true return err } @@ -327,56 +327,72 @@ func importImagePDFBytes(wr io.Writer, pageDim *types.Dim, imgWidth, imgHeight f m[0][0], m[0][1], m[1][0], m[1][1], m[2][0], m[2][1]) } -// NewPageForImage creates a new page dict in xRefTable for given image reader r. -func NewPageForImage(xRefTable *model.XRefTable, r io.Reader, parentIndRef *types.IndirectRef, imp *Import) (*types.IndirectRef, error) { +// NewPagesForImage creates a new page dicts in xRefTable for given image reader r. +func NewPagesForImage(xRefTable *model.XRefTable, r io.Reader, parentIndRef *types.IndirectRef, imp *Import) ([]*types.IndirectRef, error) { // create image dict. - imgIndRef, w, h, err := model.CreateImageResource(xRefTable, r, imp.Gray, imp.Sepia) + imgResources, err := model.CreateImageResources(xRefTable, r, imp.Gray, imp.Sepia) if err != nil { return nil, err } - // create resource dict for XObject. - d := types.Dict( - map[string]types.Object{ - "ProcSet": types.NewNameArray("PDF", "Text", "ImageB", "ImageC", "ImageI"), - "XObject": types.Dict(map[string]types.Object{"Im0": *imgIndRef}), - }, - ) + indRefs := []*types.IndirectRef{} - resIndRef, err := xRefTable.IndRefForNewObject(d) - if err != nil { - return nil, err - } + for _, imgRes := range imgResources { - dim := &types.Dim{Width: float64(w), Height: float64(h)} - if imp.Pos != types.Full { - dim = imp.PageDim - } - // mediabox = physical page dimensions - mediaBox := types.RectForDim(dim.Width, dim.Height) + // create resource dict for XObject. + d := types.Dict( + map[string]types.Object{ + "ProcSet": types.NewNameArray("PDF", "Text", "ImageB", "ImageC", "ImageI"), + "XObject": types.Dict(map[string]types.Object{imgRes.Res.ID: *imgRes.Res.IndRef}), + }, + ) - var buf bytes.Buffer - importImagePDFBytes(&buf, dim, float64(w), float64(h), imp) - sd, _ := xRefTable.NewStreamDictForBuf(buf.Bytes()) - if err = sd.Encode(); err != nil { - return nil, err - } + resIndRef, err := xRefTable.IndRefForNewObject(d) + if err != nil { + return nil, err + } - contentsIndRef, err := xRefTable.IndRefForNewObject(*sd) - if err != nil { - return nil, err + dim := &types.Dim{Width: float64(imgRes.Width), Height: float64(imgRes.Height)} + if imp.Pos != types.Full { + dim = imp.PageDim + } + // mediabox = physical page dimensions + mediaBox := types.RectForDim(dim.Width, dim.Height) + + var buf bytes.Buffer + importImagePDFBytes(&buf, dim, float64(imgRes.Width), float64(imgRes.Height), imp) + sd, err := xRefTable.NewStreamDictForBuf(buf.Bytes()) + if err != nil { + return nil, err + } + + if err = sd.Encode(); err != nil { + return nil, err + } + + contentsIndRef, err := xRefTable.IndRefForNewObject(*sd) + if err != nil { + return nil, err + } + + pageDict := types.Dict( + map[string]types.Object{ + "Type": types.Name("Page"), + "Parent": *parentIndRef, + "MediaBox": mediaBox.Array(), + "Resources": *resIndRef, + "Contents": *contentsIndRef, + }, + ) + + indRef, err := xRefTable.IndRefForNewObject(pageDict) + if err != nil { + return nil, err + } + + indRefs = append(indRefs, indRef) } - pageDict := types.Dict( - map[string]types.Object{ - "Type": types.Name("Page"), - "Parent": *parentIndRef, - "MediaBox": mediaBox.Array(), - "Resources": *resIndRef, - "Contents": *contentsIndRef, - }, - ) - - return xRefTable.IndRefForNewObject(pageDict) + return indRefs, nil } diff --git a/pkg/pdfcpu/info.go b/pkg/pdfcpu/info.go index b557e08f..74845873 100644 --- a/pkg/pdfcpu/info.go +++ b/pkg/pdfcpu/info.go @@ -19,6 +19,7 @@ package pdfcpu import ( "fmt" "sort" + "strings" "time" "github.com/angel-one/pdfcpu/pkg/log" @@ -345,6 +346,7 @@ type PDFInfo struct { Attachments []model.Attachment `json:"attachments,omitempty"` Unit types.DisplayUnit `json:"-"` UnitString string `json:"unit"` + Fonts []model.FontInfo `json:"fonts,omitempty"` } func (info PDFInfo) renderKeywords(ss *[]string) error { @@ -423,15 +425,17 @@ func (info PDFInfo) renderFlagsPart2(ss *[]string, separator string) { s = "Yes" } *ss = append(*ss, fmt.Sprintf(" Form: %s", s)) + + if info.Signatures || info.AppendOnly { + *ss = append(*ss, " Signatures: Yes") + } + if info.Form { - if info.Signatures || info.AppendOnly { - *ss = append(*ss, " SignaturesExist: Yes") - s = "No" - if info.AppendOnly { - s = "Yes" - } - *ss = append(*ss, fmt.Sprintf(" AppendOnly: %s", s)) + s = "No" + if info.AppendOnly { + s = "Yes" } + *ss = append(*ss, fmt.Sprintf(" AppendOnly: %s", s)) } s = "No" @@ -471,16 +475,69 @@ func (info *PDFInfo) renderPermissions(ss *[]string) { } func (info *PDFInfo) renderAttachments(ss *[]string) { - ss0 := []string{} - for _, a := range info.Attachments { - ss0 = append(ss0, a.FileName) + for i, a := range info.Attachments { + if i == 0 { + *ss = append(*ss, fmt.Sprintf("%20s: %s", "Attachments", a.FileName)) + continue + } + *ss = append(*ss, fmt.Sprintf("%20s %s", "", a.FileName)) + } +} + +func (info *PDFInfo) renderFonts(ss *[]string) { + if len(info.Fonts) == 0 { + *ss = append(*ss, fmt.Sprintf("%20s: No fonts available", "Fonts")) + return + } + + *ss = append(*ss, fmt.Sprintf("%20s:", "Fonts")) + + maxLenName := 0 + for _, fi := range info.Fonts { + name := fi.Name + if len(fi.Prefix) > 0 { + name = fi.Prefix + "-" + name + } + if len(name) > maxLenName { + maxLenName = len(name) + } + } + + *ss = append(*ss, fmt.Sprintf("Name%s Type Encoding Embedded", strings.Repeat(" ", maxLenName-4))) + *ss = append(*ss, fmt.Sprint(draw.HorSepLine([]int{41 + maxLenName}))) + for _, fi := range info.Fonts { + name := fi.Name + if len(fi.Prefix) > 0 { + name = fi.Prefix + "-" + name + } + *ss = append(*ss, fmt.Sprintf("%s%s %-10s %-20s %t", name, strings.Repeat(" ", maxLenName-len(name)), fi.Type, fi.Encoding, fi.Embedded)) + } +} + +func setupFontInfos(ctx *model.Context, fontInfos *[]model.FontInfo) { + var fontNames []string + for k := range ctx.Optimize.Fonts { + fontNames = append(fontNames, k) + } + sort.Strings(fontNames) + + for _, fontName := range fontNames { + for _, objNr := range ctx.Optimize.Fonts[fontName] { + fontObj := ctx.Optimize.FontObjects[objNr] + fontInfo := model.FontInfo{ + Prefix: fontObj.Prefix, + Name: fontObj.FontName, + Type: fontObj.SubType(), + Encoding: fontObj.Encoding(), + Embedded: fontObj.Embedded, + } + *fontInfos = append(*fontInfos, fontInfo) + } } - sort.Strings(ss0) - *ss = append(*ss, ss0...) } // Info returns info about ctx. -func Info(ctx *model.Context, fileName string, selectedPages types.IntSet) (*PDFInfo, error) { +func Info(ctx *model.Context, fileName string, selectedPages types.IntSet, fonts bool) (*PDFInfo, error) { info := &PDFInfo{FileName: fileName, Unit: ctx.Unit, UnitString: ctx.UnitString()} v := ctx.HeaderVersion @@ -510,10 +567,11 @@ func Info(ctx *model.Context, fileName string, selectedPages types.IntSet) (*PDF info.PageDimensions = m info.Title = ctx.Title + info.Author = ctx.Author info.Subject = ctx.Subject info.Producer = ctx.Producer info.Creator = ctx.Creator - info.CreationDate = ctx.CreationDate + info.CreationDate = ctx.XRefTable.CreationDate info.ModificationDate = ctx.ModDate info.PageMode = "" @@ -528,7 +586,7 @@ func Info(ctx *model.Context, fileName string, selectedPages types.IntSet) (*PDF info.ViewerPref = ctx.ViewerPref - kwl, err := KeywordsList(ctx.XRefTable) + kwl, err := KeywordsList(ctx) if err != nil { return nil, err } @@ -546,7 +604,7 @@ func Info(ctx *model.Context, fileName string, selectedPages types.IntSet) (*PDF info.Outlines = len(ctx.Outlines) > 0 info.Names = len(ctx.Names) > 0 - info.Signatures = ctx.SignatureExist + info.Signatures = ctx.SignatureExist || ctx.AppendOnly || len(ctx.Signatures) > 0 info.AppendOnly = ctx.AppendOnly info.Encrypted = ctx.Encrypt != nil @@ -560,11 +618,19 @@ func Info(ctx *model.Context, fileName string, selectedPages types.IntSet) (*PDF } info.Attachments = aa + fontInfos := []model.FontInfo{} + + if fonts { + setupFontInfos(ctx, &fontInfos) + } + + info.Fonts = fontInfos + return info, nil } // ListInfo returns formatted info about ctx. -func ListInfo(info *PDFInfo, selectedPages types.IntSet) ([]string, error) { +func ListInfo(info *PDFInfo, selectedPages types.IntSet, fonts bool) ([]string, error) { var separator = draw.HorSepLine([]int{44}) var ss []string @@ -605,5 +671,9 @@ func ListInfo(info *PDFInfo, selectedPages types.IntSet) ([]string, error) { info.renderPermissions(&ss) info.renderAttachments(&ss) + if fonts { + info.renderFonts(&ss) + } + return ss, nil } diff --git a/pkg/pdfcpu/io.go b/pkg/pdfcpu/io.go new file mode 100644 index 00000000..e07e1972 --- /dev/null +++ b/pkg/pdfcpu/io.go @@ -0,0 +1,64 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pdfcpu + +import ( + "io" + "os" +) + +// Write rd to filepath and respect overwrite. +func Write(rd io.Reader, filepath string, overwrite bool) (bool, error) { + if !overwrite { + if _, err := os.Stat(filepath); err == nil { + return false, nil + } + } + + to, err := os.Create(filepath) + if err != nil { + return false, err + } + defer to.Close() + + _, err = io.Copy(to, rd) + return true, err +} + +// CopyFile copies srcFilename to destFilename +func CopyFile(srcFilename, destFilename string, overwrite bool) (bool, error) { + if !overwrite { + if _, err := os.Stat(destFilename); err == nil { + //log.Printf("skipping: %s already exists", filepath) + return false, nil + } + } + + from, err := os.Open(srcFilename) + if err != nil { + return false, err + } + defer from.Close() + to, err := os.Create(destFilename) + if err != nil { + return false, err + } + defer to.Close() + + _, err = io.Copy(to, from) + return true, err +} diff --git a/pkg/pdfcpu/keyword.go b/pkg/pdfcpu/keyword.go index d6f9fda7..707e74a2 100644 --- a/pkg/pdfcpu/keyword.go +++ b/pkg/pdfcpu/keyword.go @@ -24,44 +24,94 @@ import ( ) // KeywordsList returns a list of keywords as recorded in the document info dict. -func KeywordsList(xRefTable *model.XRefTable) ([]string, error) { - ss := strings.FieldsFunc(xRefTable.Keywords, func(c rune) bool { return c == ',' || c == ';' || c == '\r' }) - for i, s := range ss { - ss[i] = strings.TrimSpace(s) +func KeywordsList(ctx *model.Context) ([]string, error) { + var ss []string + for keyword, val := range ctx.KeywordList { + if val { + ss = append(ss, keyword) + } } return ss, nil } -// KeywordsAdd adds keywords to the document info dict. -// Returns true if at least one keyword was added. -func KeywordsAdd(xRefTable *model.XRefTable, keywords []string) error { - - list, err := KeywordsList(xRefTable) +func removeKeywordsFromMetadata(ctx *model.Context) error { + rootDict, err := ctx.Catalog() if err != nil { return err } - for _, s := range keywords { - if !types.MemberOf(s, list) { - xRefTable.Keywords += ", " + types.UTF8ToCP1252(s) - } + indRef, _ := rootDict["Metadata"].(types.IndirectRef) + entry, _ := ctx.FindTableEntryForIndRef(&indRef) + sd, _ := entry.Object.(types.StreamDict) + + if err = sd.Decode(); err != nil { + return err + } + + if err = model.RemoveKeywords(&sd.Content); err != nil { + return err + } + + //fmt.Println(hex.Dump(sd.Content)) + + if err := sd.Encode(); err != nil { + return err } - d, err := xRefTable.DereferenceDict(*xRefTable.Info) + entry.Object = sd + + return nil +} + +func finalizeKeywords(ctx *model.Context) error { + d, err := ctx.DereferenceDict(*ctx.Info) if err != nil || d == nil { return err } - d["Keywords"] = types.StringLiteral(xRefTable.Keywords) + ss, err := KeywordsList(ctx) + if err != nil { + return err + } + + s0 := strings.Join(ss, "; ") + + s, err := types.EscapedUTF16String(s0) + if err != nil { + return err + } + + d["Keywords"] = types.StringLiteral(*s) + + if ctx.CatalogXMPMeta != nil { + removeKeywordsFromMetadata(ctx) + } return nil } +// KeywordsAdd adds keywords to the document info dict. +// Returns true if at least one keyword was added. +func KeywordsAdd(ctx *model.Context, keywords []string) error { + if err := ensureInfoDictAndFileID(ctx); err != nil { + return err + } + + for _, keyword := range keywords { + ctx.KeywordList[strings.TrimSpace(keyword)] = true + } + + return finalizeKeywords(ctx) +} + // KeywordsRemove deletes keywords from the document info dict. // Returns true if at least one keyword was removed. -func KeywordsRemove(xRefTable *model.XRefTable, keywords []string) (bool, error) { - // TODO Handle missing info dict. - d, err := xRefTable.DereferenceDict(*xRefTable.Info) +func KeywordsRemove(ctx *model.Context, keywords []string) (bool, error) { + if ctx.Info == nil { + return false, nil + } + + d, err := ctx.DereferenceDict(*ctx.Info) if err != nil || d == nil { return false, err } @@ -69,38 +119,25 @@ func KeywordsRemove(xRefTable *model.XRefTable, keywords []string) (bool, error) if len(keywords) == 0 { // Remove all keywords. delete(d, "Keywords") - return true, nil - } - kw := make([]string, len(keywords)) - for i, s := range keywords { - kw[i] = types.UTF8ToCP1252(s) - } + if ctx.CatalogXMPMeta != nil { + removeKeywordsFromMetadata(ctx) + } - // Distil document keywords. - ss := strings.FieldsFunc(xRefTable.Keywords, func(c rune) bool { return c == ',' || c == ';' || c == '\r' }) + return true, nil + } - xRefTable.Keywords = "" var removed bool - first := true - - for _, s := range ss { - s = strings.TrimSpace(s) - if types.MemberOf(s, kw) { + for keyword := range ctx.KeywordList { + if types.MemberOf(keyword, keywords) { + ctx.KeywordList[keyword] = false removed = true - continue - } - if first { - xRefTable.Keywords = s - first = false - continue } - xRefTable.Keywords += ", " + s } if removed { - d["Keywords"] = types.StringLiteral(xRefTable.Keywords) + err = finalizeKeywords(ctx) } - return removed, nil + return removed, err } diff --git a/pkg/pdfcpu/merge.go b/pkg/pdfcpu/merge.go index 0c693bf7..b88d4af2 100644 --- a/pkg/pdfcpu/merge.go +++ b/pkg/pdfcpu/merge.go @@ -217,7 +217,7 @@ func handleCO(ctxSrc, ctxDest *model.Context, dSrc, dDest types.Dict) error { return nil } -func handleDR(ctxSrc, ctxDest *model.Context, dSrc, dDest types.Dict) error { +func handleDR(ctxSrc *model.Context, dSrc, dDest types.Dict) error { o, found := dSrc.Find("DR") if !found { return nil @@ -312,7 +312,7 @@ func handleFormAttributes(ctxSrc, ctxDest *model.Context, dSrc, dDest types.Dict } // DR: default resource dict - if err := handleDR(ctxSrc, ctxDest, dSrc, dDest); err != nil { + if err := handleDR(ctxSrc, dSrc, dDest); err != nil { return err } @@ -753,7 +753,7 @@ func createDividerPagesDict(ctx *model.Context, parentIndRef types.IndirectRef) last := len(dims) - 1 mediaBox := types.NewRectangle(0, 0, dims[last].Width, dims[last].Height) - indRefPageDict, err := ctx.EmptyPage(indRef, mediaBox) + indRefPageDict, err := ctx.EmptyPage(indRef, mediaBox, 0) if err != nil { return nil, err } diff --git a/pkg/pdfcpu/migrate.go b/pkg/pdfcpu/migrate.go index 89f93060..5a237b06 100644 --- a/pkg/pdfcpu/migrate.go +++ b/pkg/pdfcpu/migrate.go @@ -89,12 +89,8 @@ func migrateObject(o types.Object, ctxSource, ctxDest *model.Context, migrated m } func migrateAnnots(o types.Object, pageIndRef types.IndirectRef, ctxSrc, ctxDest *model.Context, migrated map[int]int) (types.Object, error) { - arr, err := ctxSrc.DereferenceArray(o) - if err != nil { - return nil, err - } - - for i, v := range arr { + arr := o.(types.Array) + for i, v := range o.(types.Array) { var d types.Dict o, ok := v.(types.IndirectRef) if ok { @@ -130,9 +126,11 @@ func migrateAnnots(o types.Object, pageIndRef types.IndirectRef, ctxSrc, ctxDest } pDict.Delete("Parent") } - if d[k], err = migrateObject(v, ctxSrc, ctxDest, migrated); err != nil { + o1, err := migrateObject(v, ctxSrc, ctxDest, migrated) + if err != nil { return nil, err } + d[k] = o1 } } @@ -146,6 +144,24 @@ func migratePageDict(d types.Dict, pageIndRef types.IndirectRef, ctxSrc, ctxDest continue } if k == "Annots" { + o, ok := d[k].(types.IndirectRef) + if ok { + objNr := o.ObjectNumber.Value() + if migrated[objNr] > 0 { + o.ObjectNumber = types.Integer(migrated[objNr]) + d[k] = o + continue + } + v, err = migrateIndRef(&o, ctxSrc, ctxDest, migrated) + if err != nil { + return err + } + d[k] = o + if _, err = migrateAnnots(v, pageIndRef, ctxSrc, ctxDest, migrated); err != nil { + return err + } + continue + } if d[k], err = migrateAnnots(v, pageIndRef, ctxSrc, ctxDest, migrated); err != nil { return err } @@ -158,6 +174,40 @@ func migratePageDict(d types.Dict, pageIndRef types.IndirectRef, ctxSrc, ctxDest return nil } +func migrateAnnot(indRef *types.IndirectRef, fieldsSrc, fieldsDest *types.Array, ctxSrc *model.Context, migrated map[int]int) error { + for _, v := range *fieldsSrc { + ir, ok := v.(types.IndirectRef) + if !ok { + continue + } + objNr := ir.ObjectNumber.Value() + if migrated[objNr] == indRef.ObjectNumber.Value() { + *fieldsDest = append(*fieldsDest, *indRef) + break + } + d, err := ctxSrc.DereferenceDict(ir) + if err != nil { + return err + } + o, ok := d.Find("Kids") + if !ok { + continue + } + kids, err := ctxSrc.DereferenceArray(o) + if err != nil { + return err + } + if ok, err = detectMigratedAnnot(ctxSrc, indRef, kids, migrated); err != nil { + return err + } + if ok { + *fieldsDest = append(*fieldsDest, *indRef) + } + } + + return nil +} + func migrateFields(d types.Dict, fieldsSrc, fieldsDest *types.Array, ctxSrc, ctxDest *model.Context, migrated map[int]int) error { o, _ := d.Find("Annots") annots, err := ctxDest.DereferenceArray(o) @@ -186,36 +236,11 @@ func migrateFields(d types.Dict, fieldsSrc, fieldsDest *types.Array, ctxSrc, ctx if found { continue } - for _, v := range *fieldsSrc { - ir, ok := v.(types.IndirectRef) - if !ok { - continue - } - objNr := ir.ObjectNumber.Value() - if migrated[objNr] == indRef.ObjectNumber.Value() { - *fieldsDest = append(*fieldsDest, indRef) - break - } - d, err := ctxSrc.DereferenceDict(ir) - if err != nil { - return err - } - o, ok := d.Find("Kids") - if !ok { - continue - } - kids, err := ctxSrc.DereferenceArray(o) - if err != nil { - return err - } - if ok, err = detectMigratedAnnot(ctxSrc, &indRef, kids, migrated); err != nil { - return err - } - if ok { - *fieldsDest = append(*fieldsDest, indRef) - } + if err := migrateAnnot(&indRef, fieldsSrc, fieldsDest, ctxSrc, migrated); err != nil { + return err } } + return nil } diff --git a/pkg/pdfcpu/model/annotation.go b/pkg/pdfcpu/model/annotation.go index 4cb9e3ac..d594fcd0 100644 --- a/pkg/pdfcpu/model/annotation.go +++ b/pkg/pdfcpu/model/annotation.go @@ -22,6 +22,7 @@ import ( "github.com/angel-one/pdfcpu/pkg/pdfcpu/color" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" + "github.com/pkg/errors" ) // AnnotationFlags represents the PDF annotation flags. @@ -70,6 +71,7 @@ const ( AnnWatermark Ann3D AnnRedact + AnnCustom ) var AnnotTypes = map[string]AnnotationType{ @@ -81,7 +83,7 @@ var AnnotTypes = map[string]AnnotationType{ "Circle": AnnCircle, "Polygon": AnnPolygon, "PolyLine": AnnPolyLine, - "HighLight": AnnHighLight, + "Highlight": AnnHighLight, "Underline": AnnUnderline, "Squiggly": AnnSquiggly, "StrikeOut": AnnStrikeOut, @@ -99,6 +101,7 @@ var AnnotTypes = map[string]AnnotationType{ "Watermark": AnnWatermark, "3D": Ann3D, "Redact": AnnRedact, + "Custom": AnnCustom, } // AnnotTypeStrings manages string representations for annotation types. @@ -111,7 +114,7 @@ var AnnotTypeStrings = map[AnnotationType]string{ AnnCircle: "Circle", AnnPolygon: "Polygon", AnnPolyLine: "PolyLine", - AnnHighLight: "HighLight", + AnnHighLight: "Highlight", AnnUnderline: "Underline", AnnSquiggly: "Squiggly", AnnStrikeOut: "StrikeOut", @@ -129,6 +132,7 @@ var AnnotTypeStrings = map[AnnotationType]string{ AnnWatermark: "Watermark", Ann3D: "3D", AnnRedact: "Redact", + AnnCustom: "Custom", } // BorderStyle (see table 168) @@ -180,57 +184,136 @@ func borderEffectDict(cloudyBorder bool, intensity int) types.Dict { }) } +func borderArray(rx, ry, width float64) types.Array { + return types.NewNumberArray(rx, ry, width) +} + +// LineEndingStyle (see table 179) +type LineEndingStyle int + +const ( + LESquare LineEndingStyle = iota + LECircle + LEDiamond + LEOpenArrow + LEClosedArrow + LENone + LEButt + LEROpenArrow + LERClosedArrow + LESlash +) + +func LineEndingStyleName(les LineEndingStyle) string { + var s string + switch les { + case LESquare: + s = "Square" + case LECircle: + s = "Circle" + case LEDiamond: + s = "Diamond" + case LEOpenArrow: + s = "OpenArrow" + case LEClosedArrow: + s = "ClosedArrow" + case LENone: + s = "None" + case LEButt: + s = "Butt" + case LEROpenArrow: + s = "ROpenArrow" + case LERClosedArrow: + s = "RClosedArrow" + case LESlash: + s = "Slash" + } + return s +} + // AnnotationRenderer is the interface for PDF annotations. type AnnotationRenderer interface { - RenderDict(xRefTable *XRefTable, pageIndRef types.IndirectRef) (types.Dict, error) + RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) Type() AnnotationType RectString() string + APObjNrInt() int ID() string ContentString() string + CustomTypeString() string } -// Annotation represents a PDF annnotation. +// Annotation represents a PDF annotation. type Annotation struct { - SubType AnnotationType // The type of annotation that this dictionary describes. - Rect types.Rectangle // The annotation rectangle, defining the location of the annotation on the page in default user space units. - Contents string // Text that shall be displayed for the annotation. - P *types.IndirectRef // An indirect reference to the page object with which this annotation is associated. - NM string // (Since V1.4) The annotation name, a text string uniquely identifying it among all the annotations on its page. - ModDate string // The date and time when the annotation was most recently modified. - F AnnotationFlags // A set of flags specifying various characteristics of the annotation. - C *color.SimpleColor // The background color of the annotation’s icon when closed. + SubType AnnotationType // The type of annotation that this dictionary describes. + CustomSubType string // Out of spec annot type. + Rect types.Rectangle // The annotation rectangle, defining the location of the annotation on the page in default user space units. + APObjNr int // The objNr of the appearance stream dict. + Contents string // Text that shall be displayed for the annotation. + NM string // (Since V1.4) The annotation name, a text string uniquely identifying it among all the annotations on its page. + ModificationDate string // M - The date and time when the annotation was most recently modified. + P *types.IndirectRef // An indirect reference to the page object with which this annotation is associated. + F AnnotationFlags // A set of flags specifying various characteristics of the annotation. + C *color.SimpleColor // The background color of the annotation’s icon when closed, pop up title bar color, link ann border color. + BorderRadX float64 // Border radius X + BorderRadY float64 // Border radius Y + BorderWidth float64 // Border width + Hash uint32 + // StructParent int + // OC types.dict } // NewAnnotation returns a new annotation. func NewAnnotation( typ AnnotationType, + customTyp string, rect types.Rectangle, - contents string, - pageIndRef *types.IndirectRef, - nm string, + apObjNr int, + contents, id string, + modDate string, f AnnotationFlags, - col *color.SimpleColor) Annotation { + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, + borderWidth float64) Annotation { return Annotation{ - SubType: typ, - Rect: rect, - Contents: contents, - P: pageIndRef, - NM: nm, - F: f, - C: col} + SubType: typ, + CustomSubType: customTyp, + Rect: rect, + APObjNr: apObjNr, + Contents: contents, + NM: id, + ModificationDate: modDate, + F: f, + C: col, + BorderRadX: borderRadX, + BorderRadY: borderRadY, + BorderWidth: borderWidth, + } } // NewAnnotationForRawType returns a new annotation of a specific type. func NewAnnotationForRawType( typ string, rect types.Rectangle, - contents string, - pageIndRef *types.IndirectRef, - nm string, + apObjNr int, + contents, id string, + modDate string, f AnnotationFlags, - col *color.SimpleColor) Annotation { - return NewAnnotation(AnnotTypes[typ], rect, contents, pageIndRef, nm, f, col) + + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, + borderWidth float64) Annotation { + + annType, ok := AnnotTypes[typ] + if !ok { + annType = AnnotTypes["Custom"] + } else { + typ = "" + } + + return NewAnnotation(annType, typ, rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth) } // ID returns the annotation id. @@ -243,11 +326,20 @@ func (ann Annotation) ContentString() string { return ann.Contents } +// ContentString returns a string representation of ann's contents. +func (ann Annotation) CustomTypeString() string { + return ann.CustomSubType +} + // RectString returns ann's positioning rectangle. func (ann Annotation) RectString() string { return ann.Rect.ShortString() } +func (ann Annotation) APObjNrInt() int { + return ann.APObjNr +} + // Type returns ann's type. func (ann Annotation) Type() AnnotationType { return ann.SubType @@ -258,32 +350,88 @@ func (ann Annotation) TypeString() string { return AnnotTypeStrings[ann.SubType] } -// RenderDict is a stub for behavior that renders ann's PDF dict. -func (ann Annotation) RenderDict(xRefTable *XRefTable, pageIndRef types.IndirectRef) (types.Dict, error) { - return nil, nil +// HashString returns the annotation hash. +func (ann Annotation) HashString() uint32 { + return ann.Hash +} + +func (ann Annotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d := types.Dict(map[string]types.Object{ + "Type": types.Name("Annot"), + "Subtype": types.Name(ann.TypeString()), + "Rect": ann.Rect.Array(), + }) + + if pageIndRef != nil { + d["P"] = *pageIndRef + } + + if ann.Contents != "" { + s, err := types.EscapedUTF16String(ann.Contents) + if err != nil { + return nil, err + } + d.InsertString("Contents", *s) + } + + if ann.NM != "" { + d.InsertString("NM", ann.NM) + } + + modDate := types.DateString(time.Now()) + if ann.ModificationDate != "" { + _, ok := types.DateTime(ann.ModificationDate, xRefTable.ValidationMode == ValidationRelaxed) + if !ok { + return nil, errors.Errorf("pdfcpu: annotation renderDict - validateDateEntry: <%s> invalid date", ann.ModificationDate) + } + modDate = ann.ModificationDate + } + d.InsertString("ModDate", modDate) + + if ann.F != 0 { + d["F"] = types.Integer(ann.F) + } + + if ann.C != nil { + d["C"] = ann.C.Array() + } + + if ann.BorderWidth > 0 { + d["Border"] = borderArray(ann.BorderRadX, ann.BorderRadY, ann.BorderWidth) + } + + return d, nil } // PopupAnnotation represents PDF Popup annotations. type PopupAnnotation struct { Annotation - ParentIndRef *types.IndirectRef // The parent annotation with which this pop-up annotation shall be associated. + ParentIndRef *types.IndirectRef // The optional parent markup annotation with which this pop-up annotation shall be associated. Open bool // A flag specifying whether the annotation shall initially be displayed open. } // NewPopupAnnotation returns a new popup annotation. func NewPopupAnnotation( rect types.Rectangle, - pageIndRef *types.IndirectRef, + apObjNr int, contents, id string, + modDate string, f AnnotationFlags, - bgCol *color.SimpleColor, - parentIndRef *types.IndirectRef) PopupAnnotation { + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, + borderWidth float64, - ann := NewAnnotation(AnnPopup, rect, contents, pageIndRef, id, f, bgCol) + parentIndRef *types.IndirectRef, + displayOpen bool) PopupAnnotation { + + ann := NewAnnotation(AnnPopup, "", rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth) return PopupAnnotation{ Annotation: ann, - ParentIndRef: parentIndRef} + ParentIndRef: parentIndRef, + Open: displayOpen, + } } // ContentString returns a string representation of ann's content. @@ -295,193 +443,19 @@ func (ann PopupAnnotation) ContentString() string { return s } -// MarkupAnnotation represents a PDF markup annotation. -type MarkupAnnotation struct { - Annotation - T string // The text label that shall be displayed in the title bar of the annotation’s pop-up window when open and active. This entry shall identify the user who added the annotation. - PopupIndRef *types.IndirectRef // An indirect reference to a pop-up annotation for entering or editing the text associated with this annotation. - CA *float64 // (Default: 1.0) The constant opacity value that shall be used in painting the annotation. - RC string // A rich text string that shall be displayed in the pop-up window when the annotation is opened. - CreationDate string // The date and time when the annotation was created. - Subj string // Text representing a short description of the subject being addressed by the annotation. -} - -// NewMarkupAnnotation returns a new markup annotation. -func NewMarkupAnnotation( - subType AnnotationType, - rect types.Rectangle, - pageIndRef *types.IndirectRef, - contents, id, title string, - f AnnotationFlags, - bgCol *color.SimpleColor, - popupIndRef *types.IndirectRef, - ca *float64, - rc, subject string) MarkupAnnotation { - - ann := NewAnnotation(subType, rect, contents, pageIndRef, id, f, bgCol) - - return MarkupAnnotation{ - Annotation: ann, - T: title, - PopupIndRef: popupIndRef, - CreationDate: types.DateString(time.Now()), - CA: ca, - RC: rc, - Subj: subject} -} - -// TextAnnotation represents a PDF text annotation aka "Sticky Note". -type TextAnnotation struct { - MarkupAnnotation - Open bool // A flag specifying whether the annotation shall initially be displayed open. - Name string // The name of an icon that shall be used in displaying the annotation. Comment, Key, (Note), Help, NewParagraph, Paragraph, Insert -} - -// NewTextAnnotation returns a new text annotation. -func NewTextAnnotation( - rect types.Rectangle, - contents, id, title string, - f AnnotationFlags, - bgCol *color.SimpleColor, - ca *float64, - rc, subj string, - open bool, - name string) TextAnnotation { - - ma := NewMarkupAnnotation(AnnText, rect, nil, contents, id, title, f, bgCol, nil, ca, rc, subj) - - return TextAnnotation{ - MarkupAnnotation: ma, - Open: open, - Name: name, - } -} - -// RenderDict renders ann into a PDF annotation dict. -func (ann TextAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef types.IndirectRef) (types.Dict, error) { - subject := "Sticky Note" - if ann.Subj != "" { - subject = ann.Subj - } - d := types.Dict(map[string]types.Object{ - "Type": types.Name("Annot"), - "Subtype": types.Name(ann.TypeString()), - "Rect": ann.Rect.Array(), - "P": pageIndRef, - "F": types.Integer(ann.F), - "CreationDate": types.StringLiteral(ann.CreationDate), - "Subj": types.StringLiteral(subject), - "Open": types.Boolean(ann.Open), - }) - if ann.CA != nil { - d.Insert("CA", types.Float(*ann.CA)) - } - if ann.PopupIndRef != nil { - d.Insert("Popup", *ann.PopupIndRef) - } - if ann.RC != "" { - d.InsertString("RC", ann.RC) - } - if ann.Name != "" { - d.InsertName("Name", ann.Name) - } - if ann.Contents != "" { - d.InsertString("Contents", ann.Contents) - } - if ann.NM != "" { - d.InsertString("NM", ann.NM) // TODO check for uniqueness across annotations on this page. - } - if ann.T != "" { - d.InsertString("T", ann.T) - } - if ann.C != nil { - d.Insert("C", ann.C.Array()) - } - return d, nil -} - -// A series of alternating x and y coordinates in PDF user space, specifying points along the path. -type InkPath []float64 - -type InkAnnotation struct { - MarkupAnnotation - InkList []InkPath - BS types.Dict - AP types.Dict -} - -// NewInkAnnotation returns a new ink annotation. -func NewInkAnnotation( - rect types.Rectangle, - contents, id, title string, - ink []InkPath, - bs types.Dict, - f AnnotationFlags, - bgCol *color.SimpleColor, - ca *float64, - rc, subj string, - ap types.Dict, -) InkAnnotation { - - ann := NewMarkupAnnotation(AnnInk, rect, nil, contents, id, title, f, bgCol, nil, ca, rc, subj) - - return InkAnnotation{ - MarkupAnnotation: ann, - InkList: ink, - BS: bs, - AP: ap, +func (ann PopupAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.Annotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err } -} -func (ann InkAnnotation) RenderDict(pageIndRef types.IndirectRef) types.Dict { - subject := "Ink Annotation" - if ann.Subj != "" { - subject = ann.Subj - } - ink := types.Array{} - for i := range ann.InkList { - ink = append(ink, types.NewNumberArray(ann.InkList[i]...)) + if ann.ParentIndRef != nil { + d["Parent"] = *ann.ParentIndRef } - d := types.Dict(map[string]types.Object{ - "Type": types.Name("Annot"), - "Subtype": types.Name(ann.TypeString()), - "Rect": ann.Rect.Array(), - "P": pageIndRef, - "F": types.Integer(ann.F), - "CreationDate": types.StringLiteral(ann.CreationDate), - "Subj": types.StringLiteral(subject), - "InkList": ink, - }) - if ann.AP != nil { - d.Insert("AP", ann.AP) - } - if ann.CA != nil { - d.Insert("CA", types.Float(*ann.CA)) - } - if ann.PopupIndRef != nil { - d.Insert("Popup", *ann.PopupIndRef) - } - if ann.RC != "" { - d.InsertString("RC", ann.RC) - } - if ann.BS != nil { - d.Insert("BS", ann.BS) - } - if ann.Contents != "" { - d.InsertString("Contents", ann.Contents) - } - if ann.NM != "" { - d.InsertString("NM", ann.NM) // TODO check for uniqueness across annotations on this page. - } - if ann.T != "" { - d.InsertString("T", ann.T) - } - if ann.C != nil { - d.Insert("C", ann.C.Array()) - } + d["Open"] = types.Boolean(ann.Open) - return d + return d, nil } // LinkAnnotation represents a PDF link annotation. @@ -498,17 +472,20 @@ type LinkAnnotation struct { // NewLinkAnnotation returns a new link annotation. func NewLinkAnnotation( rect types.Rectangle, - quad types.QuadPoints, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + borderCol *color.SimpleColor, + dest *Destination, // supply dest or uri, dest takes precedence uri string, - id string, - f AnnotationFlags, + quad types.QuadPoints, + border bool, borderWidth float64, - borderStyle BorderStyle, - borderCol *color.SimpleColor, - border bool) LinkAnnotation { + borderStyle BorderStyle) LinkAnnotation { - ann := NewAnnotation(AnnLink, rect, "", nil, id, f, borderCol) + ann := NewAnnotation(AnnLink, "", rect, apObjNr, contents, id, modDate, f, borderCol, 0, 0, 0) return LinkAnnotation{ Annotation: ann, @@ -534,22 +511,10 @@ func (ann LinkAnnotation) ContentString() string { } // RenderDict renders ann into a page annotation dict. -func (ann LinkAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef types.IndirectRef) (types.Dict, error) { - d := types.Dict(map[string]types.Object{ - "Type": types.Name("Annot"), - "Subtype": types.Name(ann.TypeString()), - "Rect": ann.Rect.Array(), - "P": pageIndRef, - "F": types.Integer(ann.F), - "BS": borderStyleDict(ann.BorderWidth, ann.BorderStyle), - }) - - if !ann.Border { - d["Border"] = types.NewIntegerArray(0, 0, 0) - } else { - if ann.C != nil { - d["C"] = ann.C.Array() - } +func (ann LinkAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.Annotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err } if ann.Dest != nil { @@ -579,93 +544,345 @@ func (ann LinkAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef types.Indi }) d["A"] = actionDict } - if ann.NM != "" { - d.InsertString("NM", ann.NM) // TODO check for uniqueness across annotations on this page. - } + if ann.Quad != nil { d.Insert("QuadPoints", ann.Quad.Array()) } + + if !ann.Border { + d["Border"] = types.NewIntegerArray(0, 0, 0) + } else { + if ann.C != nil { + d["C"] = ann.C.Array() + } + } + + d["BS"] = borderStyleDict(ann.BorderWidth, ann.BorderStyle) + return d, nil } -// SquareAnnotation represents a square annotation. -type SquareAnnotation struct { +// MarkupAnnotation represents a PDF markup annotation. +type MarkupAnnotation struct { Annotation - FillCol *color.SimpleColor - Margins types.Array - BorderWidth float64 - BorderStyle BorderStyle - CloudyBorder bool - CloudyBorderIntensity int // 0,1,2 + T string // The text label that shall be displayed in the title bar of the annotation’s pop-up window when open and active. This entry shall identify the user who added the annotation. + PopupIndRef *types.IndirectRef // An indirect reference to a pop-up annotation for entering or editing the text associated with this annotation. + CA *float64 // (Default: 1.0) The constant opacity value that shall be used in painting the annotation. + RC string // A rich text string that shall be displayed in the pop-up window when the annotation is opened. + CreationDate string // The date and time when the annotation was created. + Subj string // Text representing a short description of the subject being addressed by the annotation. } -// NewSquareAnnotation returns a new square annotation. -func NewSquareAnnotation( +// NewMarkupAnnotation returns a new markup annotation. +func NewMarkupAnnotation( + subType AnnotationType, rect types.Rectangle, - contents string, - id string, + apObjNr int, + contents, id string, + modDate string, f AnnotationFlags, + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, borderWidth float64, - borderStyle BorderStyle, - borderCol *color.SimpleColor, - cloudyBorder bool, - cloudyBorderIntensity int, - fillCol *color.SimpleColor, - MLeft, MTop, MRight, MBot float64) SquareAnnotation { - ann := NewAnnotation(AnnSquare, rect, contents, nil, id, f, borderCol) - - if cloudyBorderIntensity < 0 || cloudyBorderIntensity > 2 { - cloudyBorderIntensity = 0 - } + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string) MarkupAnnotation { - squareAnn := SquareAnnotation{ - Annotation: ann, - FillCol: fillCol, - BorderWidth: borderWidth, - BorderStyle: borderStyle, - CloudyBorder: cloudyBorder, - CloudyBorderIntensity: cloudyBorderIntensity, - } + ann := NewAnnotation(subType, "", rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth) + + return MarkupAnnotation{ + Annotation: ann, + T: title, + PopupIndRef: popupIndRef, + CA: ca, + RC: rc, + CreationDate: types.DateString(time.Now()), + Subj: subject} +} + +// ContentString returns a string representation of ann's content. +func (ann MarkupAnnotation) ContentString() string { + s := "\"" + ann.Contents + "\"" + if ann.PopupIndRef != nil { + s += "-> #" + ann.PopupIndRef.ObjectNumber.String() + } + return s +} + +func (ann MarkupAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.Annotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } + + if ann.T != "" { + s, err := types.EscapedUTF16String(ann.T) + if err != nil { + return nil, err + } + d.InsertString("T", *s) + } + + if ann.PopupIndRef != nil { + d.Insert("Popup", *ann.PopupIndRef) + } + + if ann.CA != nil { + d.Insert("CA", types.Float(*ann.CA)) + } + + if ann.RC != "" { + s, err := types.EscapedUTF16String(ann.RC) + if err != nil { + return nil, err + } + d.InsertString("RC", *s) + } + + d.InsertString("CreationDate", ann.CreationDate) + + if ann.Subj != "" { + s, err := types.EscapedUTF16String(ann.Subj) + if err != nil { + return nil, err + } + d.InsertString("Subj", *s) + } + + return d, nil +} + +// TextAnnotation represents a PDF text annotation aka "Sticky Note". +type TextAnnotation struct { + MarkupAnnotation + Open bool // A flag specifying whether the annotation shall initially be displayed open. + Name string // The name of an icon that shall be used in displaying the annotation. Comment, Key, (Note), Help, NewParagraph, Paragraph, Insert +} + +// NewTextAnnotation returns a new text annotation. +func NewTextAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + borderRadX float64, + borderRadY float64, + borderWidth float64, + + displayOpen bool, + name string) TextAnnotation { + + ma := NewMarkupAnnotation(AnnText, rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth, title, popupIndRef, ca, rc, subject) + + return TextAnnotation{ + MarkupAnnotation: ma, + Open: displayOpen, + Name: name, + } +} + +// RenderDict renders ann into a PDF annotation dict. +func (ann TextAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } + + d["Open"] = types.Boolean(ann.Open) + + if ann.Name != "" { + d.InsertName("Name", ann.Name) + } + + return d, nil +} + +// FreeTextIntent represents the various free text annotation intents. +type FreeTextIntent int + +const ( + IntentFreeText FreeTextIntent = 1 << iota + IntentFreeTextCallout + IntentFreeTextTypeWriter +) + +func FreeTextIntentName(fti FreeTextIntent) string { + var s string + switch fti { + case IntentFreeText: + s = "FreeText" + case IntentFreeTextCallout: + s = "FreeTextCallout" + case IntentFreeTextTypeWriter: + s = "FreeTextTypeWriter" + } + return s +} + +// FreeText Annotation displays text directly on the page. +type FreeTextAnnotation struct { + MarkupAnnotation + Text string // Rich text string, see XFA 3.3 + HAlign types.HAlignment // Code specifying the form of quadding (justification) + FontName string // font name + FontSize int // font size + FontCol *color.SimpleColor // font color + DS string // Default style string + Intent string // Description of the intent of the free text annotation + CallOutLine types.Array // if intent is FreeTextCallout + CallOutLineEndingStyle string + Margins types.Array + BorderWidth float64 + BorderStyle BorderStyle + CloudyBorder bool + CloudyBorderIntensity int // 0,1,2 +} + +// XFA conform rich text string examples: +// The second and fourth words are bold. +// The second and fourth words are italicized. +// For more information see this web site. + +// NewFreeTextAnnotation returns a new free text annotation. +func NewFreeTextAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + text string, + hAlign types.HAlignment, + fontName string, + fontSize int, + fontCol *color.SimpleColor, + ds string, + intent *FreeTextIntent, + callOutLine types.Array, + callOutLineEndingStyle *LineEndingStyle, + MLeft, MTop, MRight, MBot float64, + borderWidth float64, + borderStyle BorderStyle, + cloudyBorder bool, + cloudyBorderIntensity int) FreeTextAnnotation { + + // validate required DA, DS + + // validate callOutline: 2 or 3 points => array of 4 or 6 numbers. + + ma := NewMarkupAnnotation(AnnFreeText, rect, apObjNr, contents, id, modDate, f, col, 0, 0, 0, title, popupIndRef, ca, rc, subject) + + if cloudyBorderIntensity < 0 || cloudyBorderIntensity > 2 { + cloudyBorderIntensity = 0 + } + + freeTextIntent := "" + if intent != nil { + freeTextIntent = FreeTextIntentName(*intent) + } + + leStyle := "" + if callOutLineEndingStyle != nil { + leStyle = LineEndingStyleName(*callOutLineEndingStyle) + } + + freeTextAnn := FreeTextAnnotation{ + MarkupAnnotation: ma, + Text: text, + HAlign: hAlign, + FontName: fontName, + FontSize: fontSize, + FontCol: fontCol, + DS: ds, + Intent: freeTextIntent, + CallOutLine: callOutLine, + CallOutLineEndingStyle: leStyle, + BorderWidth: borderWidth, + BorderStyle: borderStyle, + CloudyBorder: cloudyBorder, + CloudyBorderIntensity: cloudyBorderIntensity, + } if MLeft > 0 || MTop > 0 || MRight > 0 || MBot > 0 { - squareAnn.Margins = types.NewNumberArray(MLeft, MTop, MRight, MBot) + freeTextAnn.Margins = types.NewNumberArray(MLeft, MTop, MRight, MBot) } - return squareAnn + return freeTextAnn } -// RenderDict renders ann into a page annotation dict. -func (ann SquareAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef types.IndirectRef) (types.Dict, error) { - d := types.Dict(map[string]types.Object{ - "Type": types.Name("Annot"), - "Subtype": types.Name(ann.TypeString()), - "Rect": ann.Rect.Array(), - "P": pageIndRef, - "F": types.Integer(ann.F), - "BS": borderStyleDict(ann.BorderWidth, ann.BorderStyle), - }) +// RenderDict renders ann into a PDF annotation dict. +func (ann FreeTextAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } - if ann.NM != "" { - d.InsertString("NM", ann.NM) // TODO check for uniqueness across annotations on this page. + da := "" + + // TODO Implement Tf operator + + // fontID, err := xRefTable.EnsureFont(ann.FontName) // in root page Resources? + // if err != nil { + // return nil, err + // } + + // da := fmt.Sprintf("/%s %d Tf", fontID, ann.FontSize) + + if ann.FontCol != nil { + da += fmt.Sprintf(" %.2f %.2f %.2f rg", ann.FontCol.R, ann.FontCol.G, ann.FontCol.B) } + d["DA"] = types.StringLiteral(da) - if ann.Contents != "" { - d.InsertString("Contents", ann.Contents) + d.InsertInt("Q", int(ann.HAlign)) + + if ann.Text == "" { + if ann.Contents == "" { + return nil, errors.New("pdfcpu: FreeTextAnnotation missing \"text\"") + } + ann.Text = ann.Contents } + s, err := types.EscapedUTF16String(ann.Text) + if err != nil { + return nil, err + } + d.InsertString("RC", *s) - if ann.C != nil { - d["C"] = ann.C.Array() + if ann.DS != "" { + d.InsertString("DS", ann.DS) } - if ann.FillCol != nil { - d["IC"] = ann.FillCol.Array() + if ann.Intent != "" { + d.InsertName("IT", ann.Intent) + if ann.Intent == "FreeTextCallout" { + if len(ann.CallOutLine) > 0 { + d["CL"] = ann.CallOutLine + d.InsertName("LE", ann.CallOutLineEndingStyle) + } + } } if ann.Margins != nil { d["RD"] = ann.Margins } + if ann.BorderWidth > 0 { + d["BS"] = borderStyleDict(ann.BorderWidth, ann.BorderStyle) + } + if ann.CloudyBorder && ann.CloudyBorderIntensity > 0 { d["BE"] = borderEffectDict(ann.CloudyBorder, ann.CloudyBorderIntensity) } @@ -673,9 +890,185 @@ func (ann SquareAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef types.In return d, nil } -// CircleAnnotation represents a square annotation. -type CircleAnnotation struct { - Annotation +// LineIntent represents the various line annotation intents. +type LineIntent int + +const ( + IntentLineArrow LineIntent = 1 << iota + IntentLineDimension +) + +func LineIntentName(li LineIntent) string { + var s string + switch li { + case IntentLineArrow: + s = "LineArrow" + case IntentLineDimension: + s = "LineDimension" + } + return s +} + +// LineAnnotation represents a line annotation. +type LineAnnotation struct { + MarkupAnnotation + P1, P2 types.Point // Two points in default user space. + LineEndings types.Array // Optional array of two names that shall specify the line ending styles. + LeaderLineLength float64 // Length of leader lines in default user space that extend from each endpoint of the line perpendicular to the line itself. + LeaderLineOffset float64 // Non-negative number that shall represent the length of the leader line offset, which is the amount of empty space between the endpoints of the annotation and the beginning of the leader lines. + LeaderLineExtensionLength float64 // Non-negative number that shall represents the length of leader line extensions that extend from the line proper 180 degrees from the leader lines, + Intent string // Optional description of the intent of the line annotation. + Measure types.Dict // Optional measure dictionary that shall specify the scale and units that apply to the line annotation. + Caption bool // Use text specified by "Contents" or "RC" as caption. + CaptionPositionTop bool // if true the caption shall be on top of the line else caption shall be centred inside the line. + CaptionOffsetX float64 + CaptionOffsetY float64 + FillCol *color.SimpleColor + BorderWidth float64 + BorderStyle BorderStyle +} + +// NewLineAnnotation returns a new line annotation. +func NewLineAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + p1, p2 types.Point, + beginLineEndingStyle *LineEndingStyle, + endLineEndingStyle *LineEndingStyle, + leaderLineLength float64, + leaderLineOffset float64, + leaderLineExtensionLength float64, + intent *LineIntent, + measure types.Dict, + caption bool, + captionPosTop bool, + captionOffsetX float64, + captionOffsetY float64, + fillCol *color.SimpleColor, + borderWidth float64, + borderStyle BorderStyle) LineAnnotation { + + ma := NewMarkupAnnotation(AnnLine, rect, apObjNr, contents, id, modDate, f, col, 0, 0, 0, title, popupIndRef, ca, rc, subject) + + lineIntent := "" + if intent != nil { + lineIntent = LineIntentName(*intent) + } + + lineAnn := LineAnnotation{ + MarkupAnnotation: ma, + P1: p1, + P2: p2, + LeaderLineLength: leaderLineLength, + LeaderLineOffset: leaderLineOffset, + LeaderLineExtensionLength: leaderLineExtensionLength, + Intent: lineIntent, + Measure: measure, + Caption: caption, + CaptionPositionTop: captionPosTop, + CaptionOffsetX: captionOffsetX, + CaptionOffsetY: captionOffsetY, + FillCol: fillCol, + BorderWidth: borderWidth, + BorderStyle: borderStyle, + } + + if beginLineEndingStyle != nil && endLineEndingStyle != nil { + lineAnn.LineEndings = + types.NewNameArray( + LineEndingStyleName(*beginLineEndingStyle), + LineEndingStyleName(*endLineEndingStyle), + ) + } + + return lineAnn +} + +func (ann LineAnnotation) validateLeaderLineAttrs() error { + if ann.LeaderLineExtensionLength < 0 { + return errors.New("pdfcpu: LineAnnotation leader line extension length must not be negative.") + } + + if ann.LeaderLineExtensionLength > 0 && ann.LeaderLineLength == 0 { + return errors.New("pdfcpu: LineAnnotation leader line length missing.") + } + + if ann.LeaderLineOffset < 0 { + return errors.New("pdfcpu: LineAnnotation leader line offset must not be negative.") + } + + return nil +} + +// RenderDict renders ann into a PDF annotation dict. +func (ann LineAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } + + if err := ann.validateLeaderLineAttrs(); err != nil { + return nil, err + } + + d["L"] = types.NewNumberArray(ann.P1.X, ann.P1.Y, ann.P2.X, ann.P2.Y) + + if ann.LeaderLineExtensionLength > 0 { + d["LLE"] = types.Float(ann.LeaderLineExtensionLength) + } + + if ann.LeaderLineLength > 0 { + d["LL"] = types.Float(ann.LeaderLineLength) + if ann.LeaderLineOffset > 0 { + d["LLO"] = types.Float(ann.LeaderLineOffset) + } + } + + if len(ann.Measure) > 0 { + d["Measure"] = ann.Measure + } + + if ann.Intent != "" { + d.InsertName("IT", ann.Intent) + + } + + d["Cap"] = types.Boolean(ann.Caption) + if ann.Caption { + if ann.CaptionPositionTop { + d["CP"] = types.Name("Top") + } + d["CO"] = types.NewNumberArray(ann.CaptionOffsetX, ann.CaptionOffsetY) + } + + if ann.FillCol != nil { + d["IC"] = ann.FillCol.Array() + } + + if ann.BorderWidth > 0 { + d["BS"] = borderStyleDict(ann.BorderWidth, ann.BorderStyle) + } + + if len(ann.LineEndings) == 2 { + d["LE"] = ann.LineEndings + } + + return d, nil +} + +// SquareAnnotation represents a square annotation. +type SquareAnnotation struct { + MarkupAnnotation FillCol *color.SimpleColor Margins types.Array BorderWidth float64 @@ -684,28 +1077,34 @@ type CircleAnnotation struct { CloudyBorderIntensity int // 0,1,2 } -// NewCircleAnnotation returns a new circle annotation. -func NewCircleAnnotation( +// NewSquareAnnotation returns a new square annotation. +func NewSquareAnnotation( rect types.Rectangle, - contents string, - id string, + apObjNr int, + contents, id string, + modDate string, f AnnotationFlags, + col *color.SimpleColor, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + fillCol *color.SimpleColor, + MLeft, MTop, MRight, MBot float64, borderWidth float64, borderStyle BorderStyle, - borderCol *color.SimpleColor, cloudyBorder bool, - cloudyBorderIntensity int, - fillCol *color.SimpleColor, - MLeft, MTop, MRight, MBot float64) CircleAnnotation { + cloudyBorderIntensity int) SquareAnnotation { - ann := NewAnnotation(AnnCircle, rect, contents, nil, id, f, borderCol) + ma := NewMarkupAnnotation(AnnSquare, rect, apObjNr, contents, id, modDate, f, col, 0, 0, 0, title, popupIndRef, ca, rc, subject) if cloudyBorderIntensity < 0 || cloudyBorderIntensity > 2 { cloudyBorderIntensity = 0 } - circleAnn := CircleAnnotation{ - Annotation: ann, + squareAnn := SquareAnnotation{ + MarkupAnnotation: ma, FillCol: fillCol, BorderWidth: borderWidth, BorderStyle: borderStyle, @@ -714,33 +1113,17 @@ func NewCircleAnnotation( } if MLeft > 0 || MTop > 0 || MRight > 0 || MBot > 0 { - circleAnn.Margins = types.NewNumberArray(MLeft, MTop, MRight, MBot) + squareAnn.Margins = types.NewNumberArray(MLeft, MTop, MRight, MBot) } - return circleAnn + return squareAnn } // RenderDict renders ann into a page annotation dict. -func (ann CircleAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef types.IndirectRef) (types.Dict, error) { - d := types.Dict(map[string]types.Object{ - "Type": types.Name("Annot"), - "Subtype": types.Name(ann.TypeString()), - "Rect": ann.Rect.Array(), - "P": pageIndRef, - "F": types.Integer(ann.F), - "BS": borderStyleDict(ann.BorderWidth, ann.BorderStyle), - }) - - if ann.NM != "" { - d.InsertString("NM", ann.NM) // TODO check for uniqueness across annotations on this page. - } - - if ann.Contents != "" { - d.InsertString("Contents", ann.Contents) - } - - if ann.C != nil { - d["C"] = ann.C.Array() +func (ann SquareAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err } if ann.FillCol != nil { @@ -751,9 +1134,593 @@ func (ann CircleAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef types.In d["RD"] = ann.Margins } + if ann.BorderWidth > 0 { + d["BS"] = borderStyleDict(ann.BorderWidth, ann.BorderStyle) + } + if ann.CloudyBorder && ann.CloudyBorderIntensity > 0 { d["BE"] = borderEffectDict(ann.CloudyBorder, ann.CloudyBorderIntensity) } return d, nil } + +// CircleAnnotation represents a square annotation. +type CircleAnnotation struct { + MarkupAnnotation + FillCol *color.SimpleColor + Margins types.Array + BorderWidth float64 + BorderStyle BorderStyle + CloudyBorder bool + CloudyBorderIntensity int // 0,1,2 +} + +// NewCircleAnnotation returns a new circle annotation. +func NewCircleAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + fillCol *color.SimpleColor, + MLeft, MTop, MRight, MBot float64, + borderWidth float64, + borderStyle BorderStyle, + cloudyBorder bool, + cloudyBorderIntensity int) CircleAnnotation { + + ma := NewMarkupAnnotation(AnnCircle, rect, apObjNr, contents, id, modDate, f, col, 0, 0, 0, title, popupIndRef, ca, rc, subject) + + if cloudyBorderIntensity < 0 || cloudyBorderIntensity > 2 { + cloudyBorderIntensity = 0 + } + + circleAnn := CircleAnnotation{ + MarkupAnnotation: ma, + FillCol: fillCol, + BorderWidth: borderWidth, + BorderStyle: borderStyle, + CloudyBorder: cloudyBorder, + CloudyBorderIntensity: cloudyBorderIntensity, + } + + if MLeft > 0 || MTop > 0 || MRight > 0 || MBot > 0 { + circleAnn.Margins = types.NewNumberArray(MLeft, MTop, MRight, MBot) + } + + return circleAnn +} + +// RenderDict renders ann into a page annotation dict. +func (ann CircleAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } + + if ann.FillCol != nil { + d["IC"] = ann.FillCol.Array() + } + + if ann.Margins != nil { + d["RD"] = ann.Margins + } + + if ann.BorderWidth > 0 { + d["BS"] = borderStyleDict(ann.BorderWidth, ann.BorderStyle) + } + + if ann.CloudyBorder && ann.CloudyBorderIntensity > 0 { + d["BE"] = borderEffectDict(ann.CloudyBorder, ann.CloudyBorderIntensity) + } + + return d, nil +} + +// PolygonIntent represents the various polygon annotation intents. +type PolygonIntent int + +const ( + IntentPolygonCloud PolygonIntent = 1 << iota + IntentPolygonDimension +) + +func PolygonIntentName(pi PolygonIntent) string { + var s string + switch pi { + case IntentPolygonCloud: + s = "PolygonCloud" + case IntentPolygonDimension: + s = "PolygonDimension" + } + return s +} + +// PolygonAnnotation represents a polygon annotation. +type PolygonAnnotation struct { + MarkupAnnotation + Vertices types.Array // Array of numbers specifying the alternating horizontal and vertical coordinates, respectively, of each vertex, in default user space. + Path types.Array // Array of n arrays, each supplying the operands for a path building operator (m, l or c). + Intent string // Optional description of the intent of the polygon annotation. + Measure types.Dict // Optional measure dictionary that shall specify the scale and units that apply to the annotation. + FillCol *color.SimpleColor + BorderWidth float64 + BorderStyle BorderStyle + CloudyBorder bool + CloudyBorderIntensity int // 0,1,2 +} + +// NewPolygonAnnotation returns a new polygon annotation. +func NewPolygonAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + vertices types.Array, + path types.Array, + intent *PolygonIntent, + measure types.Dict, + fillCol *color.SimpleColor, + borderWidth float64, + borderStyle BorderStyle, + cloudyBorder bool, + cloudyBorderIntensity int) PolygonAnnotation { + + ma := NewMarkupAnnotation(AnnPolygon, rect, apObjNr, contents, id, modDate, f, col, 0, 0, 0, title, popupIndRef, ca, rc, subject) + + polygonIntent := "" + if intent != nil { + polygonIntent = PolygonIntentName(*intent) + } + + if cloudyBorderIntensity < 0 || cloudyBorderIntensity > 2 { + cloudyBorderIntensity = 0 + } + + polygonAnn := PolygonAnnotation{ + MarkupAnnotation: ma, + Vertices: vertices, + Path: path, + Intent: polygonIntent, + Measure: measure, + FillCol: fillCol, + BorderWidth: borderWidth, + BorderStyle: borderStyle, + CloudyBorder: cloudyBorder, + CloudyBorderIntensity: cloudyBorderIntensity, + } + + return polygonAnn +} + +// RenderDict renders ann into a PDF annotation dict. +func (ann PolygonAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } + + if len(ann.Measure) > 0 { + d["Measure"] = ann.Measure + } + + if len(ann.Vertices) > 0 && len(ann.Path) > 0 { + return nil, errors.New("pdfcpu: PolygonAnnotation supports \"Vertices\" or \"Path\" only") + } + + if len(ann.Vertices) > 0 { + d["Vertices"] = ann.Vertices + } else { + d["Path"] = ann.Path + } + + if ann.Intent != "" { + d.InsertName("IT", ann.Intent) + + } + + if ann.FillCol != nil { + d["IC"] = ann.FillCol.Array() + } + + if ann.BorderWidth > 0 { + d["BS"] = borderStyleDict(ann.BorderWidth, ann.BorderStyle) + } + + if ann.CloudyBorder && ann.CloudyBorderIntensity > 0 { + d["BE"] = borderEffectDict(ann.CloudyBorder, ann.CloudyBorderIntensity) + } + + return d, nil +} + +// PolyLineIntent represents the various polyline annotation intents. +type PolyLineIntent int + +const ( + IntentPolyLinePolygonCloud PolyLineIntent = 1 << iota + IntentPolyLineDimension +) + +func PolyLineIntentName(pi PolyLineIntent) string { + var s string + switch pi { + case IntentPolyLineDimension: + s = "PolyLineDimension" + } + return s +} + +type PolyLineAnnotation struct { + MarkupAnnotation + Vertices types.Array // Array of numbers specifying the alternating horizontal and vertical coordinates, respectively, of each vertex, in default user space. + Path types.Array // Array of n arrays, each supplying the operands for a path building operator (m, l or c). + Intent string // Optional description of the intent of the polyline annotation. + Measure types.Dict // Optional measure dictionary that shall specify the scale and units that apply to the annotation. + FillCol *color.SimpleColor + BorderWidth float64 + BorderStyle BorderStyle + LineEndings types.Array // Optional array of two names that shall specify the line ending styles. +} + +// NewPolyLineAnnotation returns a new polyline annotation. +func NewPolyLineAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + vertices types.Array, + path types.Array, + intent *PolyLineIntent, + measure types.Dict, + fillCol *color.SimpleColor, + borderWidth float64, + borderStyle BorderStyle, + beginLineEndingStyle *LineEndingStyle, + endLineEndingStyle *LineEndingStyle) PolyLineAnnotation { + + ma := NewMarkupAnnotation(AnnPolyLine, rect, apObjNr, contents, id, modDate, f, col, 0, 0, 0, title, popupIndRef, ca, rc, subject) + + polyLineIntent := "" + if intent != nil { + polyLineIntent = PolyLineIntentName(*intent) + } + + polyLineAnn := PolyLineAnnotation{ + MarkupAnnotation: ma, + Vertices: vertices, + Path: path, + Intent: polyLineIntent, + Measure: measure, + FillCol: fillCol, + BorderWidth: borderWidth, + BorderStyle: borderStyle, + } + + if beginLineEndingStyle != nil && endLineEndingStyle != nil { + polyLineAnn.LineEndings = + types.NewNameArray( + LineEndingStyleName(*beginLineEndingStyle), + LineEndingStyleName(*endLineEndingStyle), + ) + } + + return polyLineAnn +} + +// RenderDict renders ann into a PDF annotation dict. +func (ann PolyLineAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } + + if len(ann.Measure) > 0 { + d["Measure"] = ann.Measure + } + + if len(ann.Vertices) > 0 && len(ann.Path) > 0 { + return nil, errors.New("pdfcpu: PolyLineAnnotation supports \"Vertices\" or \"Path\" only") + } + + if len(ann.Vertices) > 0 { + d["Vertices"] = ann.Vertices + } else { + d["Path"] = ann.Path + } + + if ann.Intent != "" { + d.InsertName("IT", ann.Intent) + + } + + if ann.FillCol != nil { + d["IC"] = ann.FillCol.Array() + } + + if ann.BorderWidth > 0 { + d["BS"] = borderStyleDict(ann.BorderWidth, ann.BorderStyle) + } + + if len(ann.LineEndings) == 2 { + d["LE"] = ann.LineEndings + } + + return d, nil +} + +type TextMarkupAnnotation struct { + MarkupAnnotation + Quad types.QuadPoints +} + +func NewTextMarkupAnnotation( + subType AnnotationType, + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, + borderWidth float64, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + quad types.QuadPoints) TextMarkupAnnotation { + + ma := NewMarkupAnnotation(subType, rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth, title, popupIndRef, ca, rc, subject) + + return TextMarkupAnnotation{ + MarkupAnnotation: ma, + Quad: quad, + } +} + +func (ann TextMarkupAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } + + if ann.Quad != nil { + d.Insert("QuadPoints", ann.Quad.Array()) + } + + return d, nil +} + +type HighlightAnnotation struct { + TextMarkupAnnotation +} + +func NewHighlightAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, + borderWidth float64, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + quad types.QuadPoints) HighlightAnnotation { + + return HighlightAnnotation{ + NewTextMarkupAnnotation(AnnHighLight, rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth, title, popupIndRef, ca, rc, subject, quad), + } +} + +type UnderlineAnnotation struct { + TextMarkupAnnotation +} + +func NewUnderlineAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, + borderWidth float64, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + quad types.QuadPoints) UnderlineAnnotation { + + return UnderlineAnnotation{ + NewTextMarkupAnnotation(AnnUnderline, rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth, title, popupIndRef, ca, rc, subject, quad), + } +} + +type SquigglyAnnotation struct { + TextMarkupAnnotation +} + +func NewSquigglyAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, + borderWidth float64, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + quad types.QuadPoints) SquigglyAnnotation { + + return SquigglyAnnotation{ + NewTextMarkupAnnotation(AnnSquiggly, rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth, title, popupIndRef, ca, rc, subject, quad), + } +} + +type StrikeOutAnnotation struct { + TextMarkupAnnotation +} + +func NewStrikeOutAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, + borderWidth float64, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + quad types.QuadPoints) StrikeOutAnnotation { + + return StrikeOutAnnotation{ + NewTextMarkupAnnotation(AnnStrikeOut, rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth, title, popupIndRef, ca, rc, subject, quad), + } +} + +type CaretAnnotation struct { + MarkupAnnotation + RD *types.Rectangle // A set of four numbers that shall describe the numerical differences between two rectangles: the Rect entry of the annotation and the actual boundaries of the underlying caret. + Paragraph bool // A new paragraph symbol (¶) shall be associated with the caret. +} + +func NewCaretAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + borderRadX float64, + borderRadY float64, + borderWidth float64, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + rd *types.Rectangle, + paragraph bool) CaretAnnotation { + + ma := NewMarkupAnnotation(AnnCaret, rect, apObjNr, contents, id, modDate, f, col, borderRadX, borderRadY, borderWidth, title, popupIndRef, ca, rc, subject) + + return CaretAnnotation{ + MarkupAnnotation: ma, + RD: rd, + Paragraph: paragraph, + } +} + +func (ann CaretAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } + + if ann.RD != nil { + d["RD"] = ann.RD.Array() + } + + if ann.Paragraph { + d["Sy"] = types.Name("P") + } + + return d, nil +} + +// A series of alternating x and y coordinates in PDF user space, specifying points along the path. +type InkPath []float64 + +type InkAnnotation struct { + MarkupAnnotation + InkList []InkPath // Array of n arrays, each representing a stroked path of points in user space. + BorderWidth float64 + BorderStyle BorderStyle +} + +func NewInkAnnotation( + rect types.Rectangle, + apObjNr int, + contents, id string, + modDate string, + f AnnotationFlags, + col *color.SimpleColor, + title string, + popupIndRef *types.IndirectRef, + ca *float64, + rc, subject string, + + ink []InkPath, + borderWidth float64, + borderStyle BorderStyle) InkAnnotation { + + ma := NewMarkupAnnotation(AnnInk, rect, apObjNr, contents, id, modDate, f, col, 0, 0, 0, title, popupIndRef, ca, rc, subject) + + return InkAnnotation{ + MarkupAnnotation: ma, + InkList: ink, + BorderWidth: borderWidth, + BorderStyle: borderStyle, + } +} + +func (ann InkAnnotation) RenderDict(xRefTable *XRefTable, pageIndRef *types.IndirectRef) (types.Dict, error) { + d, err := ann.MarkupAnnotation.RenderDict(xRefTable, pageIndRef) + if err != nil { + return nil, err + } + + ink := types.Array{} + for i := range ann.InkList { + ink = append(ink, types.NewNumberArray(ann.InkList[i]...)) + } + d["InkList"] = ink + + if ann.BorderWidth > 0 { + d["BS"] = borderStyleDict(ann.BorderWidth, ann.BorderStyle) + } + + return d, nil +} diff --git a/pkg/pdfcpu/model/attach.go b/pkg/pdfcpu/model/attach.go index d5e7a235..4c7c8760 100644 --- a/pkg/pdfcpu/model/attach.go +++ b/pkg/pdfcpu/model/attach.go @@ -23,7 +23,6 @@ import ( "sort" "time" - "github.com/angel-one/pdfcpu/pkg/filter" "github.com/angel-one/pdfcpu/pkg/log" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -42,7 +41,7 @@ func (a Attachment) String() string { return fmt.Sprintf("Attachment: id:%s desc:%s modTime:%s", a.ID, a.Desc, a.ModTime) } -func decodeFileSpecStreamDict(sd *types.StreamDict, id string) error { +func decodeFileSpecStreamDict(sd *types.StreamDict) error { fpl := sd.FilterPipeline if fpl == nil { @@ -50,23 +49,6 @@ func decodeFileSpecStreamDict(sd *types.StreamDict, id string) error { return nil } - // Ignore filter chains with length > 1 - if len(fpl) > 1 { - if log.DebugEnabled() { - log.Debug.Printf("decodedFileSpecStreamDict: ignore %s, more than 1 filter.\n", id) - } - return nil - } - - // Only FlateDecode supported. - if fpl[0].Name != filter.Flate { - if log.DebugEnabled() { - log.Debug.Printf("decodedFileSpecStreamDict: ignore %s, %s filter unsupported.\n", id, fpl[0].Name) - } - return nil - } - - // Decode streamDict for supported filters only. return sd.Decode() } @@ -92,7 +74,7 @@ func fileSpecStreamDict(xRefTable *XRefTable, d types.Dict) (*types.StreamDict, } d, err := xRefTable.DereferenceDict(o) - if err != nil || o == nil { + if err != nil || d == nil { return nil, err } @@ -122,6 +104,27 @@ func (xRefTable *XRefTable) NewFileSpecDictForAttachment(a Attachment) (types.Di return xRefTable.NewFileSpecDict(a.ID, a.ID, a.Desc, *sd) } +func getModDate(xRefTable *XRefTable, obj types.Object) (*time.Time, error) { + errInvalidModDate := errors.New("pdfcpu: invalid date ModDate") + o, err := xRefTable.Dereference(obj) + if err != nil || o == nil { + return nil, errInvalidModDate + } + sl, ok := o.(types.StringLiteral) + if !ok { + return nil, errInvalidModDate + } + s, err := types.StringLiteralToString(sl) + if err != nil { + return nil, errInvalidModDate + } + md, ok := types.DateTime(s, xRefTable.ValidationMode == ValidationRelaxed) + if !ok { + return nil, errInvalidModDate + } + return &md, nil +} + func fileSpecStreamDictInfo(xRefTable *XRefTable, id string, o types.Object, decode bool) (*types.StreamDict, string, string, *time.Time, error) { d, err := xRefTable.DereferenceDict(o) if err != nil { @@ -149,16 +152,16 @@ func fileSpecStreamDictInfo(xRefTable *XRefTable, id string, o types.Object, dec var modDate *time.Time if d = sd.DictEntry("Params"); d != nil { - if s := d.StringEntry("ModDate"); s != nil { - dt, ok := types.DateTime(*s, xRefTable.ValidationMode == ValidationRelaxed) - if !ok { - return nil, desc, "", nil, errors.New("pdfcpu: invalid date ModDate") + obj, ok := d.Find("ModDate") + if ok { + modDate, err = getModDate(xRefTable, obj) + if err != nil { + return nil, desc, "", nil, err } - modDate = &dt } } - err = decodeFileSpecStreamDict(sd, id) + err = decodeFileSpecStreamDict(sd) return sd, desc, fileName, modDate, err } diff --git a/pkg/pdfcpu/model/booklet.go b/pkg/pdfcpu/model/booklet.go index 3847a41f..7e402df0 100644 --- a/pkg/pdfcpu/model/booklet.go +++ b/pkg/pdfcpu/model/booklet.go @@ -64,6 +64,11 @@ func (b BookletBinding) String() string { return "" } +type BookletPage struct { + Number int + Rotate bool +} + func drawGuideLineLabel(w io.Writer, x, y float64, s string, mb *types.Rectangle, fm FontMap, rot int) { fontName := "Helvetica" td := TextDescriptor{ @@ -145,8 +150,13 @@ func getCutFolds(nup *NUp) (horizontal cutOrFold, vertical cutOrFold) { // Really, it has two horizontal cuts. return cut, fold case 8: - // Also has a horizontal cut in the center. - return fold, cut + if nup.BookletBinding == LongEdge { + // Also has cuts in the center row & column. + return cut, cut + } else { + // short edge has the fold in the center col. cut on each row + return cut, fold + } } return none, none } @@ -206,10 +216,17 @@ func DrawBookletGuides(nup *NUp, w io.Writer) FontMap { drawGuideHorizontal(w, height*1/3, width, horz, nup, mb, fm) drawGuideHorizontal(w, height*2/3, width, horz, nup, mb, fm) case 8: - // 8up: middle cut and 1/4,3/4 folds - drawGuideHorizontal(w, height/2, width, cut, nup, mb, fm) - drawGuideHorizontal(w, height*1/4, width, fold, nup, mb, fm) - drawGuideHorizontal(w, height*3/4, width, fold, nup, mb, fm) + if nup.BookletBinding == LongEdge { + // 8up: middle cut and 1/4,3/4 folds + drawGuideHorizontal(w, height/2, width, cut, nup, mb, fm) + drawGuideHorizontal(w, height*1/4, width, fold, nup, mb, fm) + drawGuideHorizontal(w, height*3/4, width, fold, nup, mb, fm) + } else { + // short edge: cuts on rows + for i := 1; i < 4; i++ { + drawGuideHorizontal(w, height*float64(i)/4, width, cut, nup, mb, fm) + } + } } } if vert != none { diff --git a/pkg/pdfcpu/model/box.go b/pkg/pdfcpu/model/box.go index e5357b3e..67864955 100644 --- a/pkg/pdfcpu/model/box.go +++ b/pkg/pdfcpu/model/box.go @@ -181,7 +181,7 @@ func processBox(b **Box, boxID, paramValueStr string, unit types.DisplayUnit) er boxVal, err := resolveBoxType(paramValueStr) if err == nil { if boxVal == boxID { - return errors.Errorf("pdfcpu: invalid box self assigment: %s", boxID) + return errors.Errorf("pdfcpu: invalid box self assignment: %s", boxID) } *b = &Box{RefBox: boxVal} return nil diff --git a/pkg/pdfcpu/model/certificate.go b/pkg/pdfcpu/model/certificate.go new file mode 100644 index 00000000..c3cf1328 --- /dev/null +++ b/pkg/pdfcpu/model/certificate.go @@ -0,0 +1,114 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package model + +import ( + "crypto/x509" + "crypto/x509/pkix" + "fmt" + "os" + "strings" +) + +// CertDir is the location for installed certificates. +var CertDir string + +// UserCertPool contains all certificates loaded from CertDir. +var UserCertPool *x509.CertPool + +// TODO Do we need locking? +//var UserCertPoolLock = &sync.RWMutex{} + +func IsPEM(fname string) bool { + return strings.HasSuffix(strings.ToLower(fname), ".pem") +} + +func IsP7C(fname string) bool { + return strings.HasSuffix(strings.ToLower(fname), ".p7c") +} + +func strSliceString(ss []string) string { + if len(ss) == 0 { + return "" + } + ss1 := []string{} + ss1 = append(ss1, ss...) + return strings.Join(ss1, ",") +} + +func nameString(subj pkix.Name) string { + var sb strings.Builder + + sb.WriteString(fmt.Sprintf(" org : %s", strSliceString(subj.Organization))) + + if len(subj.OrganizationalUnit) > 0 { + sb.WriteString(fmt.Sprintf("\n unit : %s", strSliceString(subj.OrganizationalUnit))) + } + + if len(subj.CommonName) > 0 { + sb.WriteString(fmt.Sprintf("\n name : %s", subj.CommonName)) + } + + if len(subj.StreetAddress) > 0 { + sb.WriteString(fmt.Sprintf("\n street : %s", strSliceString(subj.StreetAddress))) + } + + if len(subj.Locality) > 0 { + sb.WriteString(fmt.Sprintf("\n locality : %s", strSliceString(subj.Locality))) + } + + if len(subj.Province) > 0 { + sb.WriteString(fmt.Sprintf("\n province : %s", strSliceString(subj.Province))) + } + + if len(subj.PostalCode) > 0 { + sb.WriteString(fmt.Sprintf("\n postalCode: %s", strSliceString(subj.PostalCode))) + } + + if len(subj.Country) > 0 { + sb.WriteString(fmt.Sprintf("\n country : %s", strSliceString(subj.Country))) + } + + return sb.String() +} + +func CertString(cert *x509.Certificate) string { + + return fmt.Sprintf( + " Subject:\n%s\n"+ + " Issuer:\n%s\n"+ + " from: %s\n"+ + " thru: %s\n"+ + " CA: %t\n", + nameString(cert.Subject), + nameString(cert.Issuer), + cert.NotBefore.Format("2006-01-02"), + cert.NotAfter.Format("2006-01-02"), + cert.IsCA, + ) +} + +func ResetCertificates() error { + + // remove certs/*.pem + + path, err := os.UserConfigDir() + if err != nil { + path = os.TempDir() + } + return EnsureDefaultConfigAt(path, true) +} diff --git a/pkg/pdfcpu/model/configuration.go b/pkg/pdfcpu/model/configuration.go index ef55aecb..089e6c9e 100644 --- a/pkg/pdfcpu/model/configuration.go +++ b/pkg/pdfcpu/model/configuration.go @@ -17,10 +17,12 @@ limitations under the License. package model import ( + "embed" _ "embed" "fmt" "os" "path/filepath" + "strings" "time" "github.com/angel-one/pdfcpu/pkg/font" @@ -121,6 +123,7 @@ const ( IMPORTBOOKMARKS EXPORTBOOKMARKS LISTIMAGES + UPDATEIMAGES CREATE DUMP LISTFORMFIELDS @@ -152,6 +155,12 @@ const ( SETVIEWERPREFERENCES RESETVIEWERPREFERENCES ZOOM + ADDSIGNATURE + VALIDATESIGNATURE + LISTCERTIFICATES + INSPECTCERTIFICATES + IMPORTCERTIFICATES + VALIDATESIGNATURES ) // Configuration of a Context. @@ -159,6 +168,10 @@ type Configuration struct { // Location of corresponding config.yml Path string + CreationDate string + + Version string + // Check filename extensions. CheckFileNameExt bool @@ -227,14 +240,39 @@ type Configuration struct { // Date format. DateFormat string - // Optimize duplicate content streams across pages. + // Optimize after reading and validating the xreftable but before processing. + Optimize bool + + // Optimize after processing but before writing. + // TODO add to config.yml + OptimizeBeforeWriting bool + + // Optimize page resources via content stream analysis. (assuming Optimize == true || OptimizeBeforeWriting == true) + OptimizeResourceDicts bool + + // Optimize duplicate content streams across pages. (assuming Optimize == true || OptimizeBeforeWriting == true) OptimizeDuplicateContentStreams bool - // Merge creates bookmarks + // Merge creates bookmarks. CreateBookmarks bool // PDF Viewer is expected to supply appearance streams for form fields. NeedAppearances bool + + // Internet availability. + Offline bool + + // HTTP timeout in seconds. + Timeout int + + // Http timeout in seconds for CRL revocation checking. + TimeoutCRL int + + // Http timeout in seconds for OCSP revocation checking. + TimeoutOCSP int + + // Preferred certificate revocation checking mechanism: CRL, OSCP + PreferredCertRevocationChecker int } // ConfigPath defines the location of pdfcpu's configuration directory. @@ -256,11 +294,29 @@ var configFileBytes []byte //go:embed resources/Roboto-Regular.ttf var robotoFontFileBytes []byte -func ensureConfigFileAt(path string) error { +//go:embed resources/certs/*.p7c +var certFilesEU embed.FS + +func ensureConfigFileAt(path string, override bool) error { f, err := os.Open(path) - if err != nil { + if err != nil || override { f.Close() - s := fmt.Sprintf("#############################\n# pdfcpu %s #\n# Created: %s #\n", VersionStr, time.Now().Format("2006-01-02 15:04")) + + s := fmt.Sprintf(` +############################# +# Default configuration # +############################# + +# Creation date +created: %s + +# version (Do not edit!) +version: %s + +`, + time.Now().Format("2006-01-02 15:04"), + VersionStr) + bb := append([]byte(s), configFileBytes...) if err := os.WriteFile(path, bb, os.ModePerm); err != nil { return err @@ -275,40 +331,117 @@ func ensureConfigFileAt(path string) error { return parseConfigFile(f, path) } -// EnsureDefaultConfigAt tries to load the default configuration from path. -// If path/pdfcpu/config.yaml is not found, it will be created. -func EnsureDefaultConfigAt(path string) error { - configDir := filepath.Join(path, "pdfcpu") - font.UserFontDir = filepath.Join(configDir, "fonts") - if err := os.MkdirAll(font.UserFontDir, os.ModePerm); err != nil { - return err - } - if err := ensureConfigFileAt(filepath.Join(configDir, "config.yml")); err != nil { - return err +func onlyHidden(files []os.DirEntry) bool { + for _, file := range files { + if !strings.HasPrefix(file.Name(), ".") { + return false + } } - //fmt.Println(loadedDefaultConfig) + return true +} +func initUserFonts() error { files, err := os.ReadDir(font.UserFontDir) if err != nil { return err } - if len(files) == 0 { + if onlyHidden(files) { // Ensure Roboto font for form filling. - fn := "Roboto-Regular" + fontname := "Roboto-Regular" if log.CLIEnabled() { log.CLI.Printf("installing user font:") } - if err := font.InstallFontFromBytes(font.UserFontDir, fn, robotoFontFileBytes); err != nil { - if log.CLIEnabled() { - log.CLI.Printf("%v", err) - } + if err := font.InstallFontFromBytes(font.UserFontDir, fontname, robotoFontFileBytes); err != nil { + return err } } return font.LoadUserFonts() } +func initCertificates() error { + // NOTE + // Load certs managed by The European Union Trusted Lists (EUTL) (https://eidas.ec.europa.eu/efda/trust-services/browse/eidas/tls). + // Additional certificates may be loaded using the corresponding CLI command: pdfcpu certificates import + // Certificates will be loaded by corresponding commands where applicable. + + files, err := os.ReadDir(CertDir) + if err != nil { + return err + } + if !onlyHidden(files) { + return nil + } + + files, err = certFilesEU.ReadDir("resources/certs") + if err != nil { + return err + } + + euDir := filepath.Join(CertDir, "eu") + if err := os.MkdirAll(euDir, os.ModePerm); err != nil { + return err + } + + for _, file := range files { + //fmt.Println("Embedded file:", file.Name()) + + content, err := certFilesEU.ReadFile("resources/certs/" + file.Name()) + if err != nil { + return err + } + + path := filepath.Join(euDir, file.Name()) + //fmt.Printf("writing to %s\n", path) + + destFile, err := os.Create(path) + if err != nil { + return err + } + defer destFile.Close() + + _, err = destFile.Write(content) + if err != nil { + return err + } + } + + return nil +} + +// EnsureDefaultConfigAt tries to load the default configuration from path. +// If path/pdfcpu/config.yaml is not found, it will be created. +func EnsureDefaultConfigAt(path string, override bool) error { + configDir := filepath.Join(path, "pdfcpu") + if err := os.MkdirAll(configDir, os.ModePerm); err != nil { + return err + } + if err := ensureConfigFileAt(filepath.Join(configDir, "config.yml"), override); err != nil { + return err + } + + font.UserFontDir = filepath.Join(configDir, "fonts") + if err := os.MkdirAll(font.UserFontDir, os.ModePerm); err != nil { + return err + } + if err := initUserFonts(); err != nil { + return err + } + + CertDir = filepath.Join(configDir, "certs") + if err := os.MkdirAll(CertDir, os.ModePerm); err != nil { + return err + } + if err := initCertificates(); err != nil { + return err + } + + //fmt.Println(loadedDefaultConfig) + + return nil +} + func newDefaultConfiguration() *Configuration { // NOTE: Needs to stay in sync with config.yml // @@ -316,6 +449,8 @@ func newDefaultConfiguration() *Configuration { // cli: supply -conf disable // api: call api.DisableConfigDir() return &Configuration{ + CreationDate: time.Now().Format("2006-01-02 15:04"), + Version: VersionStr, CheckFileNameExt: true, Reader15: true, DecodeAllStreams: false, @@ -329,10 +464,24 @@ func newDefaultConfiguration() *Configuration { Permissions: PermissionsPrint, TimestampFormat: "2006-01-02 15:04", DateFormat: "2006-01-02", + Optimize: true, + OptimizeBeforeWriting: true, + OptimizeResourceDicts: true, OptimizeDuplicateContentStreams: false, CreateBookmarks: true, NeedAppearances: false, + Offline: false, + Timeout: 5, + PreferredCertRevocationChecker: CRL, + } +} + +func ResetConfig() error { + path, err := os.UserConfigDir() + if err != nil { + path = os.TempDir() } + return EnsureDefaultConfigAt(path, true) } // NewDefaultConfiguration returns the default pdfcpu configuration. @@ -346,11 +495,11 @@ func NewDefaultConfiguration() *Configuration { if err != nil { path = os.TempDir() } - if err = EnsureDefaultConfigAt(path); err == nil { + if err = EnsureDefaultConfigAt(path, false); err == nil { c := *loadedDefaultConfig return &c } - fmt.Fprintf(os.Stderr, "pdfcpu: config dir problem: %v\n", err) + fmt.Fprintf(os.Stderr, "pdfcpu: config problem: %v\n", err) os.Exit(1) } // Bypass config.yml @@ -377,49 +526,6 @@ func NewRC4Configuration(userPW, ownerPW string, keyLength int) *Configuration { return c } -func (c Configuration) String() string { - path := "default" - if len(c.Path) > 0 { - path = c.Path - } - return fmt.Sprintf("pdfcpu configuration:\n"+ - "Path: %s\n"+ - "CheckFileNameExt: %t\n"+ - "Reader15: %t\n"+ - "DecodeAllStreams: %t\n"+ - "ValidationMode: %s\n"+ - "Eol: %s\n"+ - "WriteObjectStream: %t\n"+ - "WriteXrefStream: %t\n"+ - "EncryptUsingAES: %t\n"+ - "EncryptKeyLength: %d\n"+ - "Permissions: %d\n"+ - "Unit : %s\n"+ - "TimestampFormat: %s\n"+ - "DateFormat: %s\n"+ - "OptimizeDuplicateContentStreams %t\n"+ - "CreateBookmarks %t\n"+ - "NeedAppearances %t\n", - path, - c.CheckFileNameExt, - c.Reader15, - c.DecodeAllStreams, - c.ValidationModeString(), - c.EolString(), - c.WriteObjectStream, - c.WriteXRefStream, - c.EncryptUsingAES, - c.EncryptKeyLength, - c.Permissions, - c.UnitString(), - c.TimestampFormat, - c.DateFormat, - c.OptimizeDuplicateContentStreams, - c.CreateBookmarks, - c.NeedAppearances, - ) -} - // EolString returns a string rep for the eol in effect. func (c *Configuration) EolString() string { var s string @@ -442,6 +548,14 @@ func (c *Configuration) ValidationModeString() string { return "relaxed" } +// PreferredCertRevocationCheckerString returns a string rep for the preferred certificate revocation checker in effect. +func (c *Configuration) PreferredCertRevocationCheckerString() string { + if c.PreferredCertRevocationChecker == CRL { + return "CRL" + } + return "OSCP" +} + // UnitString returns a string rep for the display unit in effect. func (c *Configuration) UnitString() string { var s string diff --git a/pkg/pdfcpu/model/context.go b/pkg/pdfcpu/model/context.go index 98973a02..6d0e4a84 100644 --- a/pkg/pdfcpu/model/context.go +++ b/pkg/pdfcpu/model/context.go @@ -183,17 +183,18 @@ type ReadContext struct { FileSize int64 // Input file size. RS io.ReadSeeker // Input read seeker. EolCount int // 1 or 2 characters used for eol. - BinaryTotalSize int64 // total stream data - BinaryImageSize int64 // total image stream data - BinaryFontSize int64 // total font stream data (fontfiles) - BinaryImageDuplSize int64 // total obsolet image stream data after optimization - BinaryFontDuplSize int64 // total obsolet font stream data after optimization - Linearized bool // File is linearized. - Hybrid bool // File is a hybrid PDF file. - UsingObjectStreams bool // File is using object streams. - ObjectStreams types.IntSet // All object numbers of any object streams found which need to be decoded. - UsingXRefStreams bool // File is using xref streams. - XRefStreams types.IntSet // All object numbers of any xref streams found. + RepairOffset int64 + BinaryTotalSize int64 // total stream data + BinaryImageSize int64 // total image stream data + BinaryFontSize int64 // total font stream data (fontfiles) + BinaryImageDuplSize int64 // total obsolet image stream data after optimization + BinaryFontDuplSize int64 // total obsolet font stream data after optimization + Linearized bool // File is linearized. + Hybrid bool // File is a hybrid PDF file. + UsingObjectStreams bool // File is using object streams. + ObjectStreams types.IntSet // All object numbers of any object streams found which need to be decoded. + UsingXRefStreams bool // File is using xref streams. + XRefStreams types.IntSet // All object numbers of any xref streams found. } func newReadContext(rs io.ReadSeeker) (*ReadContext, error) { @@ -238,30 +239,6 @@ func (rc *ReadContext) ObjectStreamsString() (int, string) { return len(objStreams), strings.Join(objStreams, ",") } -// IsXRefStreamObject returns true if object #i is a an xref stream. -func (rc *ReadContext) IsXRefStreamObject(i int) bool { - return rc.XRefStreams[i] -} - -// XRefStreamsString returns a formatted string and the number of xref stream objects. -func (rc *ReadContext) XRefStreamsString() (int, string) { - - var objs []int - for k := range rc.XRefStreams { - if rc.XRefStreams[k] { - objs = append(objs, k) - } - } - sort.Ints(objs) - - var xrefStreams []string - for _, i := range objs { - xrefStreams = append(xrefStreams, fmt.Sprintf("%d", i)) - } - - return len(xrefStreams), strings.Join(xrefStreams, ",") -} - // LogStats logs stats for read file. func (rc *ReadContext) LogStats(optimized bool) { if !log.StatsEnabled() { @@ -302,22 +279,23 @@ func (rc *ReadContext) ReadFileSize() int { return int(rc.FileSize) } -// OptimizationContext represents the context for the optimiziation of a PDF file. +// OptimizationContext represents the context for the optimization of a PDF file. type OptimizationContext struct { // Font section - PageFonts []types.IntSet // For each page a registry of font object numbers. - FontObjects map[int]*FontObject // FontObject lookup table by font object number. - FormFontObjects map[int]*FontObject // FormFontObject lookup table by font object number. - Fonts map[string][]int // All font object numbers registered for a font name. - DuplicateFonts map[int]types.Dict // Registry of duplicate font dicts. - DuplicateFontObjs types.IntSet // The set of objects that represents the union of the object graphs of all duplicate font dicts. + PageFonts []types.IntSet // For each page a registry of font object numbers. + FontObjects map[int]*FontObject // FontObject lookup table by font object number. + FormFontObjects map[int]*FontObject // FormFontObject lookup table by font object number. + Fonts map[string][]int // All font object numbers registered for a font name. + DuplicateFonts map[int]types.Dict // Registry of duplicate font dicts. + DuplicateFontObjs types.IntSet // The set of objects that represents the union of the object graphs of all duplicate font dicts. + CorruptFontResDicts []types.Dict // Corrupted fontDicts encountered during bypassing xreftable. // Image section - PageImages []types.IntSet // For each page a registry of image object numbers. - ImageObjects map[int]*ImageObject // ImageObject lookup table by image object number. - DuplicateImages map[int]*types.StreamDict // Registry of duplicate image dicts. - DuplicateImageObjs types.IntSet // The set of objects that represents the union of the object graphs of all duplicate image dicts. + PageImages []types.IntSet // For each page a registry of image object numbers. + ImageObjects map[int]*ImageObject // ImageObject lookup table by image object number. + DuplicateImages map[int]*DuplicateImageObject // Registry of duplicate image dicts. + DuplicateImageObjs types.IntSet // The set of objects that represents the union of the object graphs of all duplicate image dicts. ContentStreamCache map[int]*types.StreamDict FormStreamCache map[int]*types.StreamDict @@ -331,13 +309,14 @@ type OptimizationContext struct { func newOptimizationContext() *OptimizationContext { return &OptimizationContext{ - FontObjects: map[int]*FontObject{}, - FormFontObjects: map[int]*FontObject{}, - Fonts: map[string][]int{}, - DuplicateFonts: map[int]types.Dict{}, - DuplicateFontObjs: types.IntSet{}, + FontObjects: map[int]*FontObject{}, + FormFontObjects: map[int]*FontObject{}, + Fonts: map[string][]int{}, + DuplicateFonts: map[int]types.Dict{}, + DuplicateFontObjs: types.IntSet{}, + ImageObjects: map[int]*ImageObject{}, - DuplicateImages: map[int]*types.StreamDict{}, + DuplicateImages: map[int]*DuplicateImageObject{}, DuplicateImageObjs: types.IntSet{}, DuplicateInfoObjects: types.IntSet{}, ContentStreamCache: map[int]*types.StreamDict{}, @@ -544,7 +523,10 @@ func (oc *OptimizationContext) collectImageInfo(logStr []string) []string { for _, objectNumber := range objectNumbers { imageObject := oc.ImageObjects[objectNumber] - logStr = append(logStr, fmt.Sprintf("#%-6d %s\n", objectNumber, imageObject.ResourceNamesString())) + resName, ok := imageObject.ResourceNames[i] + if ok { + logStr = append(logStr, fmt.Sprintf("#%-6d %s\n", objectNumber, resName)) + } } } @@ -601,6 +583,8 @@ type WriteContext struct { BinaryFontSize int64 // total font stream data (fontfiles) = copy of Read.BinaryFontSize. Table map[int]int64 // object write offsets Offset int64 // current write offset + OffsetSigByteRange int64 // write offset of signature dict value for "ByteRange" + OffsetSigContents int64 // write offset of signature dict value for "Contents" WriteToObjectStream bool // if true start to embed objects into object streams and obey ObjectStreamMaxObjects. CurrentObjStream *int // if not nil, any new non-stream-object gets added to the object stream with this object number. Eol string // end of line char sequence diff --git a/pkg/pdfcpu/model/cut.go b/pkg/pdfcpu/model/cut.go index 47562195..180b916a 100644 --- a/pkg/pdfcpu/model/cut.go +++ b/pkg/pdfcpu/model/cut.go @@ -49,7 +49,7 @@ func parseHorCut(v string, cut *Cut) (err error) { return errors.Errorf("pdfcpu: cut position must be a float value: %s\n", s) } if f <= 0 || f >= 1 { - return errors.Errorf("pdfcpu: invalid cut poistion %.2f: 0 < i < 1.0\n", f) + return errors.Errorf("pdfcpu: invalid cut position %.2f: 0 < i < 1.0\n", f) } cut.Hor = append(cut.Hor, f) } @@ -65,7 +65,7 @@ func parseVertCut(v string, cut *Cut) (err error) { return errors.Errorf("pdfcpu: cut position must be a float value: %s\n", s) } if f <= 0 || f >= 1 { - return errors.Errorf("pdfcpu: invalid cut poistion %.2f: 0 < i < 1.0\n", f) + return errors.Errorf("pdfcpu: invalid cut position %.2f: 0 < i < 1.0\n", f) } cut.Vert = append(cut.Vert, f) } diff --git a/pkg/pdfcpu/model/dereference.go b/pkg/pdfcpu/model/dereference.go index f4d0c2f5..0a812693 100644 --- a/pkg/pdfcpu/model/dereference.go +++ b/pkg/pdfcpu/model/dereference.go @@ -17,15 +17,53 @@ limitations under the License. package model import ( + "context" "strings" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" ) -func (xRefTable *XRefTable) indRefToObject(ir *types.IndirectRef) (types.Object, error) { +func processDictRefCounts(xRefTable *XRefTable, d types.Dict) { + for _, e := range d { + switch o1 := e.(type) { + case types.IndirectRef: + xRefTable.IncrementRefCount(&o1) + case types.Dict: + ProcessRefCounts(xRefTable, o1) + case types.Array: + ProcessRefCounts(xRefTable, o1) + } + } +} + +func processArrayRefCounts(xRefTable *XRefTable, a types.Array) { + for _, e := range a { + switch o1 := e.(type) { + case types.IndirectRef: + xRefTable.IncrementRefCount(&o1) + case types.Dict: + ProcessRefCounts(xRefTable, o1) + case types.Array: + ProcessRefCounts(xRefTable, o1) + } + } +} + +func ProcessRefCounts(xRefTable *XRefTable, o types.Object) { + switch o := o.(type) { + case types.Dict: + processDictRefCounts(xRefTable, o) + case types.StreamDict: + processDictRefCounts(xRefTable, o.Dict) + case types.Array: + processArrayRefCounts(xRefTable, o) + } +} + +func (xRefTable *XRefTable) indRefToObject(ir *types.IndirectRef, decodeLazy bool) (types.Object, int, error) { if ir == nil { - return nil, errors.New("pdfcpu: indRefToObject: input argument is nil") + return nil, 0, errors.New("pdfcpu: indRefToObject: input argument is nil") } // 7.3.10 @@ -33,13 +71,23 @@ func (xRefTable *XRefTable) indRefToObject(ir *types.IndirectRef) (types.Object, // it shall be treated as a reference to the null object. entry, found := xRefTable.FindTableEntryForIndRef(ir) if !found || entry.Free { - return nil, nil + return nil, 0, nil } xRefTable.CurObj = int(ir.ObjectNumber) - // return dereferenced object - return entry.Object, nil + if l, ok := entry.Object.(types.LazyObjectStreamObject); ok && decodeLazy { + ob, err := l.DecodedObject(context.TODO()) + if err != nil { + return nil, 0, err + } + + ProcessRefCounts(xRefTable, ob) + entry.Object = ob + } + + // return dereferenced object and increment nr. + return entry.Object, entry.Incr, nil } // Dereference resolves an indirect object and returns the resulting PDF object. @@ -50,7 +98,32 @@ func (xRefTable *XRefTable) Dereference(o types.Object) (types.Object, error) { return o, nil } - return xRefTable.indRefToObject(&ir) + obj, _, err := xRefTable.indRefToObject(&ir, true) + return obj, err +} + +// Dereference resolves an indirect object and returns the resulting PDF object. +// It also returns the number of the written PDF Increment this object is part of. +// The higher the increment number the older the object. +func (xRefTable *XRefTable) DereferenceWithIncr(o types.Object) (types.Object, int, error) { + ir, ok := o.(types.IndirectRef) + if !ok { + // Nothing do dereference. + return o, 0, nil + } + + return xRefTable.indRefToObject(&ir, true) +} + +func (xRefTable *XRefTable) DereferenceForWrite(o types.Object) (types.Object, error) { + ir, ok := o.(types.IndirectRef) + if !ok { + // Nothing do dereference. + return o, nil + } + + obj, _, err := xRefTable.indRefToObject(&ir, false) + return obj, err } // DereferenceBoolean resolves and validates a boolean object, which may be an indirect reference. @@ -280,6 +353,24 @@ func (xRefTable *XRefTable) DereferenceDict(o types.Object) (types.Dict, error) return d, nil } +// DereferenceDictWithIncr resolves and validates a dictionary object, which may be an indirect reference. +// It also returns the number of the written PDF Increment this object is part of. +// The higher the increment number the older the object. +func (xRefTable *XRefTable) DereferenceDictWithIncr(o types.Object) (types.Dict, int, error) { + + o, incr, err := xRefTable.DereferenceWithIncr(o) + if err != nil || o == nil { + return nil, 0, err + } + + d, ok := o.(types.Dict) + if !ok { + return nil, 0, errors.Errorf("pdfcpu: dereferenceDictWithIncr: wrong type %T <%v>", o, o) + } + + return d, incr, nil +} + // DereferenceFontDict returns the font dict referenced by indRef. func (xRefTable *XRefTable) DereferenceFontDict(indRef types.IndirectRef) (types.Dict, error) { d, err := xRefTable.DereferenceDict(indRef) @@ -290,12 +381,14 @@ func (xRefTable *XRefTable) DereferenceFontDict(indRef types.IndirectRef) (types return nil, nil } - if d.Type() == nil { - return nil, errors.Errorf("pdfcpu: DereferenceFontDict: missing dict type %s\n", indRef) - } + if xRefTable.ValidationMode == ValidationStrict { + if d.Type() == nil { + return nil, errors.Errorf("pdfcpu: DereferenceFontDict: missing dict type %s\n", indRef) + } - if *d.Type() != "Font" { - return nil, errors.Errorf("pdfcpu: DereferenceFontDict: expected Type=Font, unexpected Type: %s", *d.Type()) + if *d.Type() != "Font" { + return nil, errors.Errorf("pdfcpu: DereferenceFontDict: expected Type=Font, unexpected Type: %s", *d.Type()) + } } return d, nil @@ -338,21 +431,27 @@ func (xRefTable *XRefTable) dereferenceDestArray(o types.Object) (types.Array, e } arr, ok := o1.(types.Array) if !ok { - errors.Errorf("pdfcpu: corrupted dest array:\n%s\n", o) + errors.Errorf("pdfcpu: invalid dest array:\n%s\n", o) } return arr, nil } - return nil, errors.Errorf("pdfcpu: corrupted dest array:\n%s\n", o) + return nil, errors.Errorf("pdfcpu: invalid dest array:\n%s\n", o) } // DereferenceDestArray resolves the destination for key. func (xRefTable *XRefTable) DereferenceDestArray(key string) (types.Array, error) { - o, ok := xRefTable.Names["Dests"].Value(key) - if !ok { - return nil, errors.Errorf("pdfcpu: corrupted named destination for: %s", key) + if dNames := xRefTable.Names["Dests"]; dNames != nil { + if o, ok := dNames.Value(key); ok { + return xRefTable.dereferenceDestArray(o) + } } - return xRefTable.dereferenceDestArray(o) + + if o, ok := xRefTable.Dests[key]; ok { + return xRefTable.dereferenceDestArray(o) + } + + return nil, errors.Errorf("pdfcpu: invalid named destination for: %s", key) } // DereferenceDictEntry returns a dereferenced dict entry. diff --git a/pkg/pdfcpu/model/font.go b/pkg/pdfcpu/model/font.go new file mode 100644 index 00000000..bcd855d6 --- /dev/null +++ b/pkg/pdfcpu/model/font.go @@ -0,0 +1,25 @@ +/* +Copyright 2024 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package model + +type FontInfo struct { + Prefix string `json:"prefix"` + Name string `json:"name"` + Type string `json:"type"` + Encoding string `json:"encoding"` + Embedded bool `json:"embedded"` +} diff --git a/pkg/pdfcpu/model/image.go b/pkg/pdfcpu/model/image.go index 7c92500f..9dcc1e70 100644 --- a/pkg/pdfcpu/model/image.go +++ b/pkg/pdfcpu/model/image.go @@ -18,17 +18,22 @@ package model import ( "bytes" + "encoding/binary" + "fmt" "image" "image/color" "image/draw" "image/jpeg" _ "image/png" + "io" "math" "os" "path/filepath" "strings" + "github.com/hhrutter/tiff" + "github.com/angel-one/pdfcpu/pkg/filter" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -111,7 +116,8 @@ func createSMaskObject(xRefTable *XRefTable, buf []byte, w, h, bpc int) (*types. return xRefTable.IndRefForNewObject(*sd) } -func createFlateImageObject(xRefTable *XRefTable, buf, sm []byte, w, h, bpc int, cs string) (*types.StreamDict, error) { +// CreateFlateImageStreamDict returns a flate stream dict. +func CreateFlateImageStreamDict(xRefTable *XRefTable, buf, sm []byte, w, h, bpc int, cs string) (*types.StreamDict, error) { var softMaskIndRef *types.IndirectRef if sm != nil { var err error @@ -121,14 +127,22 @@ func createFlateImageObject(xRefTable *XRefTable, buf, sm []byte, w, h, bpc int, } } - // Create Flate stream dict. - sd, _ := xRefTable.NewStreamDictForBuf(buf) - sd.InsertName("Type", "XObject") - sd.InsertName("Subtype", "Image") - sd.InsertInt("Width", w) - sd.InsertInt("Height", h) - sd.InsertInt("BitsPerComponent", bpc) - sd.InsertName("ColorSpace", cs) + sd := &types.StreamDict{ + Dict: types.Dict( + map[string]types.Object{ + "Type": types.Name("XObject"), + "Subtype": types.Name("Image"), + "Width": types.Integer(w), + "Height": types.Integer(h), + "BitsPerComponent": types.Integer(bpc), + "ColorSpace": types.Name(cs), + }, + ), + Content: buf, + FilterPipeline: []types.PDFFilter{{Name: filter.Flate, DecodeParms: nil}}, + } + + sd.InsertName("Filter", filter.Flate) if softMaskIndRef != nil { sd.Insert("SMask", *softMaskIndRef) @@ -145,8 +159,8 @@ func createFlateImageObject(xRefTable *XRefTable, buf, sm []byte, w, h, bpc int, return sd, nil } -// CreateDCTImageObject returns a DCT encoded stream dict. -func CreateDCTImageObject(xRefTable *XRefTable, buf []byte, w, h, bpc int, cs string) (*types.StreamDict, error) { +// CreateDCTImageStreamDict returns a DCT encoded stream dict. +func CreateDCTImageStreamDict(xRefTable *XRefTable, buf []byte, w, h, bpc int, cs string) (*types.StreamDict, error) { sd := &types.StreamDict{ Dict: types.Dict( map[string]types.Object{ @@ -184,15 +198,29 @@ func CreateDCTImageObject(xRefTable *XRefTable, buf []byte, w, h, bpc int, cs st return sd, nil } -func writeRGBAImageBuf(img image.Image) []byte { +func writeRGBAImageBuf(img image.Image) ([]byte, []byte) { w := img.Bounds().Dx() h := img.Bounds().Dy() i := 0 + var sm []byte buf := make([]byte, w*h*3) + var softMask bool for y := 0; y < h; y++ { for x := 0; x < w; x++ { c := img.At(x, y).(color.RGBA) + if !softMask { + if c.A != 0xFF { + softMask = true + sm = []byte{} + for j := 0; j < y*w+x; j++ { + sm = append(sm, 0xFF) + } + sm = append(sm, c.A) + } + } else { + sm = append(sm, c.A) + } buf[i] = c.R buf[i+1] = c.G buf[i+2] = c.B @@ -200,7 +228,7 @@ func writeRGBAImageBuf(img image.Image) []byte { } } - return buf + return buf, sm } func writeRGBA64ImageBuf(img image.Image) []byte { @@ -225,25 +253,6 @@ func writeRGBA64ImageBuf(img image.Image) []byte { return buf } -// func writeYCbCrToRGBAImageBuf(img image.Image) []byte { -// w := img.Bounds().Dx() -// h := img.Bounds().Dy() -// i := 0 -// buf := make([]byte, w*h*3) - -// for y := 0; y < h; y++ { -// for x := 0; x < w; x++ { -// c := img.At(x, y).(color.YCbCr) -// r, g, b, _ := c.RGBA() -// buf[i] = uint8(r >> 8 & 0xFF) -// buf[i+1] = uint8(g >> 8 & 0xFF) -// buf[i+2] = uint8(b >> 8 & 0xFF) -// i += 3 -// } -// } -// return buf -// } - func writeNRGBAImageBuf(xRefTable *XRefTable, img image.Image) ([]byte, []byte) { w := img.Bounds().Dx() h := img.Bounds().Dy() @@ -376,11 +385,27 @@ func writeCMYKImageBuf(img image.Image) []byte { func convertToRGBA(img image.Image) *image.RGBA { b := img.Bounds() - m := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy())) + m := image.NewRGBA(b) draw.Draw(m, m.Bounds(), img, b.Min, draw.Src) return m } +func convertNYCbCrAToRGBA(img *image.NYCbCrA) *image.RGBA { + b := img.Bounds() + m := image.NewRGBA(b) + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + ycbr := img.YCbCrAt(x, y) + stride := img.Bounds().Dx() + alphaOffset := (y-b.Min.Y)*stride + (x - b.Min.X) + alpha := img.A[alphaOffset] + r, g, b := color.YCbCrToRGB(ycbr.Y, ycbr.Cb, ycbr.Cr) + m.Set(x, y, color.RGBA{R: r, G: g, B: b, A: alpha}) + } + } + return m +} + func convertToGray(img image.Image) *image.Gray { b := img.Bounds() m := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) @@ -413,18 +438,18 @@ func convertToSepia(img image.Image) *image.RGBA { return m } -func createImageDict(xRefTable *XRefTable, buf, softMask []byte, w, h, bpc int, format, cs string) (*types.StreamDict, int, int, error) { +func createImageStreamDict(xRefTable *XRefTable, buf, softMask []byte, w, h, bpc int, format, cs string) (*types.StreamDict, error) { var ( sd *types.StreamDict err error ) switch format { case "jpeg": - sd, err = CreateDCTImageObject(xRefTable, buf, w, h, bpc, cs) + sd, err = CreateDCTImageStreamDict(xRefTable, buf, w, h, bpc, cs) default: - sd, err = createFlateImageObject(xRefTable, buf, softMask, w, h, bpc, cs) + sd, err = CreateFlateImageStreamDict(xRefTable, buf, softMask, w, h, bpc, cs) } - return sd, w, h, err + return sd, err } func encodeJPEG(img image.Image) ([]byte, string, error) { @@ -457,13 +482,13 @@ func createImageBuf(xRefTable *XRefTable, img image.Image, format string) ([]byt var cs string - switch img.(type) { + switch img := img.(type) { case *image.RGBA: // A 32-bit alpha-premultiplied color, having 8 bits for each of red, green, blue and alpha. // An alpha-premultiplied color component C has been scaled by alpha (A), so it has valid values 0 <= C <= A. cs = DeviceRGBCS bpc = 8 - buf = writeRGBAImageBuf(img) + buf, sm = writeRGBAImageBuf(img) case *image.RGBA64: // A 64-bit alpha-premultiplied color, having 16 bits for each of red, green, blue and alpha. @@ -511,16 +536,18 @@ func createImageBuf(xRefTable *XRefTable, img image.Image, format string) ([]byt case *image.YCbCr: cs = DeviceRGBCS bpc = 8 - buf = writeRGBAImageBuf(convertToRGBA(img)) + buf, sm = writeRGBAImageBuf(convertToRGBA(img)) case *image.NYCbCrA: - return buf, sm, bpc, cs, errors.New("pdfcpu: unsupported image type: NYCbCrA") + cs = DeviceRGBCS + bpc = 8 + buf, sm = writeRGBAImageBuf(convertNYCbCrAToRGBA(img)) case *image.Paletted: // In-memory image of uint8 indices into a given palette. cs = DeviceRGBCS bpc = 8 - buf = writeRGBAImageBuf(convertToRGBA(img)) + buf, sm = writeRGBAImageBuf(convertToRGBA(img)) default: return buf, sm, bpc, cs, errors.Errorf("pdfcpu: unsupported image type: %T", img) @@ -541,40 +568,142 @@ func colorSpaceForJPEGColorModel(cm color.Model) string { return "" } -func createDCTImageObjectForJPEG(xRefTable *XRefTable, c image.Config, bb bytes.Buffer) (*types.StreamDict, int, int, error) { +func createDCTImageStreamDictForJPEG(xRefTable *XRefTable, c image.Config, bb bytes.Buffer) (*types.StreamDict, error) { cs := colorSpaceForJPEGColorModel(c.ColorModel) if cs == "" { - return nil, 0, 0, errors.New("pdfcpu: unexpected color model for JPEG") + return nil, errors.New("pdfcpu: unexpected color model for JPEG") } - sd, err := CreateDCTImageObject(xRefTable, bb.Bytes(), c.Width, c.Height, 8, cs) + return CreateDCTImageStreamDict(xRefTable, bb.Bytes(), c.Width, c.Height, 8, cs) +} - return sd, c.Width, c.Height, err +func createImageResourcesForJPEG(xRefTable *XRefTable, c image.Config, bb bytes.Buffer) ([]ImageResource, error) { + sd, err := createDCTImageStreamDictForJPEG(xRefTable, c, bb) + if err != nil { + return nil, err + } + + indRef, err := xRefTable.IndRefForNewObject(*sd) + if err != nil { + return nil, err + } + + res := Resource{ID: "Im0", IndRef: indRef} + ir := ImageResource{Res: res, Width: c.Width, Height: c.Height} + return []ImageResource{ir}, err } -// CreateImageStreamDict returns a stream dict for image data represented by r and applies optional filters. -func CreateImageStreamDict(xRefTable *XRefTable, r io.Reader, gray, sepia bool) (*types.StreamDict, int, int, error) { +func decodeImage(xRefTable *XRefTable, buf *bytes.Reader, currentOffset int64, gray, sepia bool, byteOrder binary.ByteOrder, imgResources *[]ImageResource) (int64, error) { + img, err := tiff.DecodeAt(buf, currentOffset) + if err != nil { + return 0, err + } - var bb bytes.Buffer - tee := io.TeeReader(r, &bb) + if gray { + switch img.(type) { + case *image.Gray, *image.Gray16: + default: + img = convertToGray(img) + } + } - var sniff bytes.Buffer - if _, err := io.Copy(&sniff, tee); err != nil { - return nil, 0, 0, err + if sepia { + switch img.(type) { + case *image.Gray, *image.Gray16: + default: + img = convertToSepia(img) + } } - c, format, err := image.DecodeConfig(&sniff) + imgBuf, softMask, bpc, cs, err := createImageBuf(xRefTable, img, "tiff") if err != nil { - return nil, 0, 0, err + return 0, err } - if format == "jpeg" && !gray && !sepia { - return createDCTImageObjectForJPEG(xRefTable, c, bb) + w, h := img.Bounds().Dx(), img.Bounds().Dy() + + sd, err := createImageStreamDict(xRefTable, imgBuf, softMask, w, h, bpc, "tiff", cs) + if err != nil { + return 0, err + } + + indRef, err := xRefTable.IndRefForNewObject(*sd) + if err != nil { + return 0, err + } + + res := Resource{ID: "Im0", IndRef: indRef} + ir := ImageResource{Res: res, Width: w, Height: h} + *imgResources = append(*imgResources, ir) + + if _, err := buf.Seek(currentOffset, io.SeekStart); err != nil { + return 0, err + } + + var numEntries uint16 + if err := binary.Read(buf, byteOrder, &numEntries); err != nil { + return 0, err + } + + if _, err := buf.Seek(int64(numEntries)*12, io.SeekCurrent); err != nil { + return 0, err + } + + var nextIFDOffset uint32 + if err := binary.Read(buf, byteOrder, &nextIFDOffset); err != nil { + return 0, err } + // if nextIFDOffset >= uint32(bb.Len()) { + // fmt.Println("Invalid next IFD offset, stopping.") + // break + // } + + return int64(nextIFDOffset), nil +} + +func createImageResourcesForTIFF(xRefTable *XRefTable, bb bytes.Buffer, gray, sepia bool) ([]ImageResource, error) { + imgResources := []ImageResource{} + + buf := bytes.NewReader(bb.Bytes()) + + var header [8]byte + if _, err := io.ReadFull(buf, header[:]); err != nil { + return nil, err + } + + var byteOrder binary.ByteOrder + if string(header[:2]) == "II" { + byteOrder = binary.LittleEndian + } else if string(header[:2]) == "MM" { + byteOrder = binary.BigEndian + } else { + return nil, fmt.Errorf("invalid TIFF byte order") + } + + firstIFDOffset := byteOrder.Uint32(header[4:]) + if firstIFDOffset < 8 || firstIFDOffset >= uint32(bb.Len()) { + return nil, fmt.Errorf("invalid TIFF file: no valid IFD") + } + + var err error + + off := int64(firstIFDOffset) + + for off != 0 && off < int64(bb.Len()) { + off, err = decodeImage(xRefTable, buf, off, gray, sepia, byteOrder, &imgResources) + if err != nil { + return nil, err + } + } + + return imgResources, nil +} + +func createImageResources(xRefTable *XRefTable, c image.Config, bb bytes.Buffer, gray, sepia bool) ([]ImageResource, error) { img, format, err := image.Decode(&bb) if err != nil { - return nil, 0, 0, err + return nil, err } if gray { @@ -593,19 +722,107 @@ func CreateImageStreamDict(xRefTable *XRefTable, r io.Reader, gray, sepia bool) } } + imgBuf, softMask, bpc, cs, err := createImageBuf(xRefTable, img, format) + if err != nil { + return nil, err + } + + w, h := img.Bounds().Dx(), img.Bounds().Dy() + if w != c.Width || h != c.Height { + return nil, errors.New("pdfcpu: unexpected width or height") + } + + sd, err := createImageStreamDict(xRefTable, imgBuf, softMask, w, h, bpc, format, cs) + if err != nil { + return nil, err + } + + indRef, err := xRefTable.IndRefForNewObject(*sd) + if err != nil { + return nil, err + } + + res := Resource{ID: "Im0", IndRef: indRef} + ir := ImageResource{Res: res, Width: w, Height: h} + return []ImageResource{ir}, err +} + +// CreateImageResources creates a new XObject for given image data represented by r and applies optional filters. +func CreateImageResources(xRefTable *XRefTable, r io.Reader, gray, sepia bool) ([]ImageResource, error) { + + var bb bytes.Buffer + tee := io.TeeReader(r, &bb) + + var sniff bytes.Buffer + if _, err := io.Copy(&sniff, tee); err != nil { + return nil, err + } + + c, format, err := image.DecodeConfig(&sniff) + if err != nil { + return nil, err + } + + if format == "tiff" { + return createImageResourcesForTIFF(xRefTable, bb, gray, sepia) + } + + if format == "jpeg" && !gray && !sepia { + return createImageResourcesForJPEG(xRefTable, c, bb) + } + + return createImageResources(xRefTable, c, bb, gray, sepia) +} + +// CreateImageStreamDict returns a stream dict for image data represented by r and applies optional filters. +func CreateImageStreamDict(xRefTable *XRefTable, r io.Reader) (*types.StreamDict, int, int, error) { + + var bb bytes.Buffer + tee := io.TeeReader(r, &bb) + + var sniff bytes.Buffer + if _, err := io.Copy(&sniff, tee); err != nil { + return nil, 0, 0, err + } + + c, format, err := image.DecodeConfig(&sniff) + if err != nil { + return nil, 0, 0, err + } + + if format == "jpeg" { + sd, err := createDCTImageStreamDictForJPEG(xRefTable, c, bb) + if err != nil { + return nil, 0, 0, err + } + return sd, c.Width, c.Height, nil + } + + img, format, err := image.Decode(&bb) + if err != nil { + return nil, 0, 0, err + } + imgBuf, softMask, bpc, cs, err := createImageBuf(xRefTable, img, format) if err != nil { return nil, 0, 0, err } w, h := img.Bounds().Dx(), img.Bounds().Dy() + if w != c.Width || h != c.Height { + return nil, 0, 0, errors.New("pdfcpu: unexpected width or height") + } - return createImageDict(xRefTable, imgBuf, softMask, w, h, bpc, format, cs) + sd, err := createImageStreamDict(xRefTable, imgBuf, softMask, w, h, bpc, format, cs) + if err != nil { + return nil, 0, 0, err + } + return sd, c.Width, c.Height, nil } // CreateImageResource creates a new XObject for given image data represented by r and applies optional filters. -func CreateImageResource(xRefTable *XRefTable, r io.Reader, gray, sepia bool) (*types.IndirectRef, int, int, error) { - sd, w, h, err := CreateImageStreamDict(xRefTable, r, gray, sepia) +func CreateImageResource(xRefTable *XRefTable, r io.Reader) (*types.IndirectRef, int, int, error) { + sd, w, h, err := CreateImageStreamDict(xRefTable, r) if err != nil { return nil, 0, 0, err } diff --git a/pkg/pdfcpu/model/message.go b/pkg/pdfcpu/model/message.go new file mode 100644 index 00000000..b427caf2 --- /dev/null +++ b/pkg/pdfcpu/model/message.go @@ -0,0 +1,61 @@ +/* +Copyright 2024 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package model + +import ( + "fmt" + + "github.com/angel-one/pdfcpu/pkg/log" +) + +func ShowMsg(msg string) { + s := "pdfcpu " + msg + if log.DebugEnabled() { + log.Debug.Println(s) + } + if log.ReadEnabled() { + log.Read.Println(s) + } + if log.ValidateEnabled() { + log.Validate.Println(s) + } + if log.CLIEnabled() { + log.CLI.Println(s) + } +} + +func ShowMsgTopic(topic, msg string) { + msg = topic + ": " + msg + ShowMsg(msg) +} + +func ShowRepaired(msg string) { + ShowMsgTopic("repaired", msg) +} + +func ShowSkipped(msg string) { + ShowMsgTopic("skipped", msg) +} + +func ShowDigestedSpecViolation(msg string) { + ShowMsgTopic("digested", msg) +} + +func ShowDigestedSpecViolationError(xRefTable *XRefTable, err error) { + msg := fmt.Sprintf("spec violation around obj#(%d): %v\n", xRefTable.CurObj, err) + ShowMsgTopic("digested", msg) +} diff --git a/pkg/pdfcpu/model/metadata.go b/pkg/pdfcpu/model/metadata.go new file mode 100644 index 00000000..aae62cb0 --- /dev/null +++ b/pkg/pdfcpu/model/metadata.go @@ -0,0 +1,155 @@ +/* +Copyright 2024 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package model + +import ( + "encoding/xml" + "strings" + "time" +) + +type UserDate time.Time + +const userDateFormatNoTimeZone = "2006-01-02T15:04:05Z" +const userDateFormatNegTimeZone = "2006-01-02T15:04:05-07:00" +const userDateFormatPosTimeZone = "2006-01-02T15:04:05+07:00" + +func (ud *UserDate) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + dateString := "" + err := d.DecodeElement(&dateString, &start) + if err != nil { + return err + } + dat, err := time.Parse(userDateFormatNoTimeZone, dateString) + if err == nil { + *ud = UserDate(dat) + return nil + } + dat, err = time.Parse(userDateFormatPosTimeZone, dateString) + if err == nil { + *ud = UserDate(dat) + return nil + } + dat, err = time.Parse(userDateFormatNegTimeZone, dateString) + if err == nil { + *ud = UserDate(dat) + return nil + } + return err +} + +type Alt struct { + //XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Alt"` + Entries []string `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# li"` +} + +type Seq struct { + //XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Seq"` + Entries []string `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# li"` +} + +type Title struct { + //XMLName xml.Name `xml:"http://purl.org/dc/elements/1.1/ title"` + Alt Alt `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Alt"` +} + +type Desc struct { + //XMLName xml.Name `xml:"http://purl.org/dc/elements/1.1/ description"` + Alt Alt `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Alt"` +} + +type Creator struct { + //XMLName xml.Name `xml:"http://purl.org/dc/elements/1.1/ creator"` + Seq Seq `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Seq"` +} + +type Description struct { + //XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Description"` + Title Title `xml:"http://purl.org/dc/elements/1.1/ title"` + Author Creator `xml:"http://purl.org/dc/elements/1.1/ creator"` + Subject Desc `xml:"http://purl.org/dc/elements/1.1/ description"` + Creator string `xml:"http://ns.adobe.com/xap/1.0/ CreatorTool"` + CreationDate UserDate `xml:"http://ns.adobe.com/xap/1.0/ CreateDate"` + ModDate UserDate `xml:"http://ns.adobe.com/xap/1.0/ ModifyDate"` + Producer string `xml:"http://ns.adobe.com/pdf/1.3/ Producer"` + Trapped bool `xml:"http://ns.adobe.com/pdf/1.3/ Trapped"` + Keywords string `xml:"http://ns.adobe.com/pdf/1.3/ Keywords"` +} + +type RDF struct { + XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# RDF"` + Description Description +} + +type XMPMeta struct { + XMLName xml.Name `xml:"adobe:ns:meta/ xmpmeta"` + RDF RDF +} + +func removeTag(s, kw string) string { + kwLen := len(kw) + i := strings.Index(s, kw) + if i < 0 { + return "" + } + + j := i + kwLen + + i = strings.LastIndex(s[:i], "<") + if i < 0 { + return "" + } + + block1 := s[:i] + + s = s[j:] + i = strings.Index(s, kw) + if i < 0 { + return "" + } + + j = i + kwLen + + block2 := s[j:] + + s1 := block1 + block2 + + return s1 +} + +func RemoveKeywords(metadata *[]byte) error { + + // Opt for simple byte removal instead of xml de/encoding. + + s := string(*metadata) + if len(s) == 0 { + return nil + } + + s = removeTag(s, "Keywords>") + if len(s) == 0 { + return nil + } + + // Possible Acrobat bug. + // Acrobat seems to use dc:subject for keywords but ***does not*** show the content in Subject. + s = removeTag(s, "subject>") + + *metadata = []byte(s) + + return nil +} diff --git a/pkg/pdfcpu/model/nameTree.go b/pkg/pdfcpu/model/nameTree.go index 39c599b9..51b3d671 100644 --- a/pkg/pdfcpu/model/nameTree.go +++ b/pkg/pdfcpu/model/nameTree.go @@ -306,10 +306,6 @@ func (n *Node) Add(xRefTable *XRefTable, k string, v types.Object, m NameMap, na return n.HandleLeaf(xRefTable, k, v, m, nameRefDictKeys) } - if k == n.Kmin || k == n.Kmax { - return nil - } - if keyLess(k, n.Kmin) { n.Kmin = k } else if keyLess(n.Kmax, k) { diff --git a/pkg/pdfcpu/model/nup.go b/pkg/pdfcpu/model/nup.go index 1f7edddd..520d4683 100644 --- a/pkg/pdfcpu/model/nup.go +++ b/pkg/pdfcpu/model/nup.go @@ -72,7 +72,8 @@ type NUp struct { PageDim *types.Dim // Page dimensions in display unit. PageSize string // Paper size eg. A4L, A4P, A4(=default=A4P), see paperSize.go UserDim bool // true if one of dimensions or paperSize provided overriding the default. - Orient orientation // One of rd(=default),dr,ld,dl + Orient orientation // One of rd(=default),dr,ld,dl - grid orientation + Enforce bool // enforce best-fit orientation of individual content on grid. Grid *types.Dim // Intra page grid dimensions eg (2,2) PageGrid bool // Create a m x n grid of pages for PDF inputfiles only (think "extra page n-Up"). ImgInputFile bool // Process image or PDF input files. @@ -95,6 +96,7 @@ func DefaultNUpConfig() *NUp { Orient: RightDown, Margin: 3, Border: true, + Enforce: true, } } @@ -197,7 +199,7 @@ func createNUpFormForPDF(xRefTable *XRefTable, resDict *types.IndirectRef, conte } // NUpTilePDFBytesForPDF applies nup tiles to content bytes. -func NUpTilePDFBytes(wr io.Writer, rSrc, rDest *types.Rectangle, formResID string, nup *NUp, rotate, enforceOrient bool) { +func NUpTilePDFBytes(wr io.Writer, rSrc, rDest *types.Rectangle, formResID string, nup *NUp, rotate bool) { // rScr is a rectangular region represented by form formResID in form space. @@ -227,7 +229,7 @@ func NUpTilePDFBytes(wr io.Writer, rSrc, rDest *types.Rectangle, formResID strin // Best fit translation of a source rectangle into a destination rectangle. // For nup we enforce the dest orientation, // whereas in cases where the original orientation needs to be preserved eg. for booklets, we don't. - w, h, dx, dy, r := types.BestFitRectIntoRect(rSrc, rDestCr, enforceOrient, false) + w, h, dx, dy, r := types.BestFitRectIntoRect(rSrc, rDestCr, nup.Enforce, false) if nup.BgColor != nil { if nup.ImgInputFile { @@ -319,7 +321,7 @@ func (ctx *Context) NUpTilePDFBytesForPDF( } // Retrieve content stream bytes. - bb, err := ctx.PageContent(d) + bb, err := ctx.PageContent(d, pageNr) if err == ErrNoContent { // TODO render if has annotations. return nil @@ -358,7 +360,7 @@ func (ctx *Context) NUpTilePDFBytesForPDF( formsResDict.Insert(formResID, *formIndRef) // Append to content stream buf of destination page. - NUpTilePDFBytes(buf, cropBox, rDest, formResID, nup, rotate, true) + NUpTilePDFBytes(buf, cropBox, rDest, formResID, nup, rotate) return nil } diff --git a/pkg/pdfcpu/model/parse.go b/pkg/pdfcpu/model/parse.go index b6583ef2..03157291 100644 --- a/pkg/pdfcpu/model/parse.go +++ b/pkg/pdfcpu/model/parse.go @@ -18,6 +18,7 @@ package model import ( "context" + "fmt" "strconv" "strings" "unicode" @@ -33,6 +34,7 @@ var ( errArrayNotTerminated = errors.New("pdfcpu: parse: unterminated array") errDictionaryCorrupt = errors.New("pdfcpu: parse: corrupt dictionary") errDictionaryNotTerminated = errors.New("pdfcpu: parse: unterminated dictionary") + errDictionaryDuplicateKey = errors.New("pdfcpu: parse: duplicate key") errHexLiteralCorrupt = errors.New("pdfcpu: parse: corrupt hex literal") errHexLiteralNotTerminated = errors.New("pdfcpu: parse: hex literal not terminated") errNameObjectCorrupt = errors.New("pdfcpu: parse: corrupt name object") @@ -45,6 +47,8 @@ var ( errXrefStreamCorruptIndex = errors.New("pdfcpu: parse: xref stream dict corrupt entry Index") errObjStreamMissingN = errors.New("pdfcpu: parse: obj stream dict missing entry W") errObjStreamMissingFirst = errors.New("pdfcpu: parse: obj stream dict missing entry First") + + ErrCorruptObjectOffset = errors.New("pdfcpu: corrupt object offset") ) func positionToNextWhitespace(s string) (int, string) { @@ -74,15 +78,15 @@ func positionToNextWhitespaceOrChar(s, chars string) (int, string) { return -1, s } -func positionToNextEOL(s string) string { +func positionToNextEOL(s string) (string, int) { for i, c := range s { for _, m := range "\x0A\x0D" { if c == m { - return s[i:] + return s[i:], i } } } - return "" + return "", 0 } // trimLeftSpace trims leading whitespace and trailing comment. @@ -118,7 +122,7 @@ func trimLeftSpace(s string, relaxed bool) (string, bool) { break } // trim PDF comment (= '%' up to eol) - s = positionToNextEOL(s) + s, _ = positionToNextEOL(s) if log.ParseEnabled() { log.Parse.Printf("2 outstr: <%s>\n", s) } @@ -227,9 +231,38 @@ func delimiter(b byte) bool { return false } -// ParseObjectAttributes parses object number and generation of the next object for given string buffer. -func ParseObjectAttributes(line *string) (objectNumber *int, generationNumber *int, err error) { +func detectObj(s string) (string, string, error) { + i := strings.Index(s, "obj") + if i > 0 { + return s[:i], s[i+3:], nil + } + + i = strings.Index(s, "bj") + if i > 0 { + return s[:i], s[i+2:], nil + } + return "", "", errors.New("pdfcpu: ParseObjectAttributes: can't find \"obj\"") +} + +func cleanObjProlog(s string) (string, error) { + s, _ = trimLeftSpace(s, false) + if len(s) == 0 { + return "", errors.New("pdfcpu: ParseObjectAttributes: can't find object number") + } + + var b strings.Builder + for _, r := range s { + if r >= '0' && r <= '9' || r == ' ' { + b.WriteRune(r) + } + } + return b.String(), nil +} + +// ParseObjectAttributes parses object number and generation of the next object for given string buffer. +func ParseObjectAttributes(line *string) (*int, *int, error) { + // TODO always called twice ? if line == nil || len(*line) == 0 { return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: buf not available") } @@ -238,58 +271,55 @@ func ParseObjectAttributes(line *string) (objectNumber *int, generationNumber *i log.Parse.Printf("ParseObjectAttributes: buf=<%s>\n", *line) } - l := *line - var remainder string - - i := strings.Index(l, "obj") - if i < 0 { - return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: can't find \"obj\"") + l, remainder, err := detectObj(*line) + if err != nil { + return nil, nil, err } - remainder = l[i+len("obj"):] - l = l[:i] - // object number - l, _ = trimLeftSpace(l, false) - if len(l) == 0 { - return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: can't find object number") + l, err = cleanObjProlog(l) + if err != nil { + return nil, nil, err } - i, _ = positionToNextWhitespaceOrChar(l, "%") - if i <= 0 { - return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: can't find end of object number") + i, _ := positionToNextWhitespaceOrChar(l, "%") + s := l + if i > 0 { + s = l[:i] } - objNr, err := strconv.Atoi(l[:i]) + objNr, err := strconv.Atoi(strings.TrimSpace(s)) if err != nil { - return nil, nil, err + return nil, nil, ErrCorruptObjectOffset } // generation number - l = l[i:] - l, _ = trimLeftSpace(l, false) - if len(l) == 0 { - return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: can't find generation number") - } + genNr := 0 - i, _ = positionToNextWhitespaceOrChar(l, "%") - if i <= 0 { - return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: can't find end of generation number") - } + if i > 0 { - genNr, err := strconv.Atoi(l[:i]) - if err != nil { - return nil, nil, err - } + l = l[i:] + l, _ = trimLeftSpace(l, false) + if len(l) == 0 { + return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: can't find generation number") + } - objectNumber = &objNr - generationNumber = &genNr + i, _ = positionToNextWhitespaceOrChar(l, "%") + if i <= 0 { + return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: can't find end of generation number") + } + + genNr, err = strconv.Atoi(l[:i]) + if err != nil { + return nil, nil, err + } + } *line = remainder - return objectNumber, generationNumber, nil + return &objNr, &genNr, nil } func parseArray(c context.Context, line *string) (*types.Array, error) { @@ -387,10 +417,6 @@ func parseStringLiteral(line *string) (types.Object, error) { return nil, errBufNotAvailable } - if log.ParseEnabled() { - log.Parse.Printf("ParseObject: value = String Literal: <%s>\n", *line) - } - l := *line if log.ParseEnabled() { @@ -451,7 +477,10 @@ func parseHexLiteral(line *string) (types.Object, error) { hexStr, ok := hexString(strings.TrimSpace(l[:eov])) if !ok { - return nil, errHexLiteralCorrupt + // Skip junk + *line = forwardParseBuf(l[eov:], 1) + return nil, nil + //return nil, errHexLiteralCorrupt } // position behind '>' @@ -510,18 +539,19 @@ func parseName(line *string) (*types.Name, error) { return &nameObj, nil } -func insertKey(d types.Dict, key string, val types.Object) error { +func insertKey(d types.Dict, key string, val types.Object, relaxed bool) error { if _, found := d[key]; !found { d[key] = val } else { - // for now we digest duplicate keys. - // TODO - // if !validationRelaxed { + + // was: for now we ignore duplicate keys - config flag ? + + // if !relaxed { // return errDictionaryDuplicateKey // } - // if log.CLIEnabled() { - // log.CLI.Printf("ParseDict: digesting duplicate key\n") - // } + + d[key] = val + ShowDigestedSpecViolation(fmt.Sprintf("duplicate key \"%s\"", key)) } if log.ParseEnabled() { @@ -531,12 +561,16 @@ func insertKey(d types.Dict, key string, val types.Object) error { return nil } +func dictString(l string) bool { + return len(l) > 0 && !strings.HasPrefix(l, ">>") +} + func processDictKeys(c context.Context, line *string, relaxed bool) (types.Dict, error) { l := *line var eol bool d := types.NewDict() - for !strings.HasPrefix(l, ">>") { + for dictString(l) { if err := c.Err(); err != nil { return nil, err @@ -544,7 +578,11 @@ func processDictKeys(c context.Context, line *string, relaxed bool) (types.Dict, keyName, err := parseName(&l) if err != nil { - return nil, err + if !relaxed { + return nil, err + } + // Skip junk. + l = forwardParseBuf(l, 1) } if log.ParseEnabled() { @@ -554,10 +592,12 @@ func processDictKeys(c context.Context, line *string, relaxed bool) (types.Dict, // Position to first non whitespace after key. l, eol = trimLeftSpace(l, relaxed) + if err != nil && relaxed { + // Skip junk. + continue + } + if len(l) == 0 { - if log.ParseEnabled() { - log.Parse.Println("ParseDict: only whitespace after key") - } // Only whitespace after key. return nil, errDictionaryNotTerminated } @@ -576,13 +616,13 @@ func processDictKeys(c context.Context, line *string, relaxed bool) (types.Dict, // Specifying the null object as the value of a dictionary entry (7.3.7, "Dictionary Objects") // shall be equivalent to omitting the entry entirely. if val != nil { - if err := insertKey(d, string(*keyName), val); err != nil { + if err := insertKey(d, string(*keyName), val, relaxed); err != nil { return nil, err } } // We are positioned on the char behind the last parsed dict value. - if len(l) == 0 { + if len(l) < 2 { return nil, errDictionaryNotTerminated } @@ -665,9 +705,10 @@ func startParseNumericOrIndRef(l string) (string, string, int) { 0.000000000 */ if len(str) > 1 && str[0] == '0' { - if str[1] == '+' || str[1] == '-' { + switch str[1] { + case '+', '-': str = str[1:] - } else if str[1] == '.' { + case '.': var i int for i = 2; len(str) > i && str[i] == '0'; i++ { } @@ -688,8 +729,7 @@ func isRangeError(err error) bool { return false } -func parseIndRef(s, l, l1 string, line *string, i, i2 int, rangeErr bool) (types.Object, error) { - +func parseIndRef(s, l, l1 string, line *string, i, i2 int) (types.Object, error) { g, err := strconv.Atoi(s) if err != nil { // 2nd int(generation number) not available. @@ -705,9 +745,6 @@ func parseIndRef(s, l, l1 string, line *string, i, i2 int, rangeErr bool) (types l, _ = trimLeftSpace(l, false) if len(l) == 0 { - if rangeErr { - return nil, err - } // only whitespace *line = l1 return types.Integer(i), nil @@ -715,17 +752,10 @@ func parseIndRef(s, l, l1 string, line *string, i, i2 int, rangeErr bool) (types if l[0] == 'R' { *line = forwardParseBuf(l, 1) - if rangeErr { - return nil, nil - } // We have all 3 components to create an indirect reference. return *types.NewIndirectRef(i, g), nil } - if rangeErr { - return nil, err - } - // 'R' not available. // Can't be an indirect reference. if log.ParseEnabled() { @@ -737,15 +767,27 @@ func parseIndRef(s, l, l1 string, line *string, i, i2 int, rangeErr bool) (types } func parseFloat(s string) (types.Object, error) { - f, err := strconv.ParseFloat(s, 64) - if err != nil { - return nil, err + // Replace ',' with '.' to accept comma as decimal separator + s = strings.Replace(s, ",", ".", 1) + + f, n := strconv.ParseFloat(s, 64) + if n != nil { + // Fallback: handle ".-" case (e.g., ".-5") + s = strings.Replace(s, ".-", ".", 1) + f, err := strconv.ParseFloat(s, 64) + if err != nil { + // Skip junk + return nil, nil + } + if log.ParseEnabled() { + log.Parse.Printf("parseFloat: value is: %f\n", f) + } + return types.Float(f), nil } if log.ParseEnabled() { log.Parse.Printf("parseFloat: value is: %f\n", f) } - return types.Float(f), nil } @@ -762,29 +804,22 @@ func parseNumericOrIndRef(line *string) (types.Object, error) { s, l1, i1 := startParseNumericOrIndRef(l) // Try int - var rangeErr bool i, err := strconv.Atoi(s) if err != nil { - rangeErr = isRangeError(err) - if !rangeErr { - // Try float + if isRangeError(err) { + // #407 + i = 0 *line = l1 - return parseFloat(s) + return types.Integer(i), nil } - - // #407 - i = 0 + *line = l1 + return parseFloat(s) } // We have an Int! // if not followed by whitespace return sole integer value. if i1 <= 0 || delimiter(l[i1]) { - - if rangeErr { - return nil, err - } - if log.ParseEnabled() { log.Parse.Printf("parseNumericOrIndRef: value is numeric int: %d\n", i) } @@ -799,9 +834,6 @@ func parseNumericOrIndRef(line *string) (types.Object, error) { l, _ = trimLeftSpace(l, false) if len(l) == 0 { // only whitespace - if rangeErr { - return nil, err - } *line = l1 return types.Integer(i), nil } @@ -811,9 +843,6 @@ func parseNumericOrIndRef(line *string) (types.Object, error) { // if only 2 token, can't be indirect reference. // if not followed by whitespace return sole integer value. if i2 <= 0 || delimiter(l[i2]) { - if rangeErr { - return nil, err - } if log.ParseEnabled() { log.Parse.Printf("parseNumericOrIndRef: 2 objects => value is numeric int: %d\n", i) } @@ -826,7 +855,7 @@ func parseNumericOrIndRef(line *string) (types.Object, error) { s = l[:i2] } - return parseIndRef(s, l, l1, line, i, i2, rangeErr) + return parseIndRef(s, l, l1, line, i, i2) } func parseHexLiteralOrDict(c context.Context, l *string) (val types.Object, err error) { @@ -862,9 +891,16 @@ func parseHexLiteralOrDict(c context.Context, l *string) (val types.Object, err return val, nil } -func parseBooleanOrNull(l string) (val types.Object, s string, ok bool) { +func parseBooleanOrNull(l string) (types.Object, string, bool) { + + if len(l) < 4 { + return nil, "", false + } + + s := strings.ToLower(l[:4]) + // null, absent object - if strings.HasPrefix(l, "null") { + if strings.HasPrefix(s, "null") { if log.ParseEnabled() { log.Parse.Println("parseBoolean: value = null") } @@ -872,15 +908,21 @@ func parseBooleanOrNull(l string) (val types.Object, s string, ok bool) { } // boolean true - if strings.HasPrefix(l, "true") { + if strings.HasPrefix(s, "true") { if log.ParseEnabled() { log.Parse.Println("parseBoolean: value = true") } return types.Boolean(true), "true", true } + if len(l) < 5 { + return nil, "", false + } + + s += strings.ToLower(l[4:5]) + // boolean false - if strings.HasPrefix(l, "false") { + if strings.HasPrefix(s, "false") { if log.ParseEnabled() { log.Parse.Println("parseBoolean: value = false") } @@ -1098,3 +1140,254 @@ func ObjectStreamDict(sd *types.StreamDict) (*types.ObjectStreamDict, error) { return &osd, nil } + +func isMarkerTerminated(r rune) bool { + return r == 0x00 || unicode.IsSpace(r) +} + +func detectMarker(line, marker string) int { + i := strings.Index(line, marker) + if i < 0 { + return i + } + if i+len(marker) >= len(line) { + return -1 + } + off := i + len(marker) + ind := i + for !isMarkerTerminated(rune(line[off])) { + line = line[off:] + if marker == "endobj" { + j := strings.Index(line, "xref") + if j >= 0 { + r := rune(line[j+4]) + if isMarkerTerminated(r) { + return ind + } + } + } + i = strings.Index(line, marker) + if i < 0 { + return -1 + } + if i+len(marker) >= len(line) { + return -1 + } + off = i + len(marker) + ind += off + } + + return ind +} + +func detectMarkers(line string, endInd, streamInd *int) { + //fmt.Printf("buflen=%d\n%s", len(line), hex.Dump([]byte(line))) + if *endInd == 0 { + *endInd = detectMarker(line, "endobj") + + } + if *streamInd == 0 { + *streamInd = detectMarker(line, "stream") + } +} + +func positionAfterStringLiteral(line string) (string, int, error) { + i := balancedParenthesesPrefix(line) + if i < 0 { + return "", 0, errStringLiteralCorrupt + } + + line = forwardParseBuf(line[i:], 1) + + return line, i + 1, nil +} + +func posFloor(pos1, pos2 int) int { + if pos1 < 0 { + return pos2 + } + if pos1 < pos2 { + return pos1 + } + if pos2 < 0 { + return pos1 + } + return pos2 +} + +func detectNonEscaped(line, s string) int { + var ind int + for { + i := strings.Index(line, s) + if i < 0 { + // did not find s + return -1 + } + if i == 0 { + // found s at pos 0 + return ind + } + if line[i-1] != 0x5c { + // found s at pos i + return ind + i + } + // found escaped s + if i == len(line)-1 { + // last is escaped s -> did not find s + return -1 + } + // moving on after escaped s + line = line[i+1:] + ind += i + 1 + } +} + +func applyOffBoth(endInd, streamInd, off int) (int, int, error) { + if endInd >= 0 { + endInd += off + } + if streamInd >= 0 { + streamInd += off + } + return endInd, streamInd, nil +} + +func applyOffEndIndFirst(endInd, streamInd, off, floor int) (int, int, error) { + endInd += off + if streamInd > 0 { + if streamInd > floor { + // stream after any ( or % to skip + streamInd = -1 + } else { + streamInd += off + } + } + return endInd, streamInd, nil +} + +func applyOffStreamIndFirst(endInd, streamInd, off, floor int) (int, int, error) { + streamInd += off + if endInd > 0 { + if endInd > floor { + // endobj after any ( or % to skip + endInd = -1 + } else { + endInd += off + } + } + return endInd, streamInd, nil +} + +func isComment(commentPos, strLitPos int) bool { + return commentPos >= 0 && (strLitPos < 0 || commentPos < strLitPos) +} + +func DetectKeywords(line string) (endInd int, streamInd int, err error) { + return DetectKeywordsWithContext(context.Background(), line) +} + +func skipComment(line string, commentPos int, off, endInd, streamInd *int) string { + l, i := positionToNextEOL(line[commentPos:]) + if l == "" { + return l + } + delta := commentPos + i + *off += delta + + // Adjust found positions for changed line. + if *endInd > delta { + *endInd -= delta + } else if *endInd != -1 { + *endInd = 0 + } + if *streamInd > delta { + *streamInd -= delta + } else if *streamInd != -1 { + *streamInd = 0 + } + return l +} + +func skipStringLit(line string, strLitPos int, off, endInd, streamInd *int) (string, error) { + l, i, err := positionAfterStringLiteral(line[strLitPos:]) + if err != nil { + return "", err + } + delta := strLitPos + i + *off += delta + // Adjust found positions for changed line. + if *endInd > delta { + *endInd -= delta + } else if *endInd != -1 { + *endInd = 0 + } + if *streamInd > delta { + *streamInd -= delta + } else if *streamInd != -1 { + *streamInd = 0 + } + return l, nil +} + +func skipCommentOrStringLiteral(line string, commentPos, slPos int, off, endInd, streamInd *int) (string, error) { + if isComment(commentPos, slPos) { + // skip comment if % before any ( + line = skipComment(line, commentPos, off, endInd, streamInd) + if line == "" { + return "", nil + } + return line, nil + } + return skipStringLit(line, slPos, off, endInd, streamInd) +} + +func DetectKeywordsWithContext(c context.Context, line string) (endInd int, streamInd int, err error) { + // return endInd or streamInd which ever first encountered. + off := 0 + strLitPos, commentPos := 0, 0 + for { + if err := c.Err(); err != nil { + return -1, -1, err + } + + detectMarkers(line, &endInd, &streamInd) + + if off == 0 && endInd < 0 && streamInd < 0 { + return -1, -1, nil + } + + // Don't re-search in partial line if known to be not present. + if strLitPos != -1 { + strLitPos = detectNonEscaped(line, "(") + } + if commentPos != -1 { + commentPos = detectNonEscaped(line, "%") + } + + if strLitPos < 0 && commentPos < 0 { + // neither ( nor % to skip + return applyOffBoth(endInd, streamInd, off) + } + + floor := posFloor(strLitPos, commentPos) + + if endInd > 0 { + if endInd < floor { + // endobj before any ( or % to skip + return applyOffEndIndFirst(endInd, streamInd, off, floor) + } + } + + if streamInd > 0 { + if streamInd < floor { + // stream before any ( or % to skip + return applyOffStreamIndFirst(endInd, streamInd, off, floor) + } + } + + line, err = skipCommentOrStringLiteral(line, commentPos, strLitPos, &off, &endInd, &streamInd) + if err != nil { + return -1, -1, err + } + } +} diff --git a/pkg/pdfcpu/model/parseConfig.go b/pkg/pdfcpu/model/parseConfig.go index f0397bf2..44571d46 100644 --- a/pkg/pdfcpu/model/parseConfig.go +++ b/pkg/pdfcpu/model/parseConfig.go @@ -22,6 +22,7 @@ package model import ( "bytes" "io" + "strings" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -29,6 +30,8 @@ import ( ) type configuration struct { + CreationDate string `yaml:"created"` + Version string `yaml:"version"` CheckFileNameExt bool `yaml:"checkFileNameExt"` Reader15 bool `yaml:"reader15"` DecodeAllStreams bool `yaml:"decodeAllStreams"` @@ -41,18 +44,27 @@ type configuration struct { EncryptKeyLength int `yaml:"encryptKeyLength"` Permissions int `yaml:"permissions"` Unit string `yaml:"unit"` - Units string `yaml:"units"` // Be flexible if version < v0.3.8 TimestampFormat string `yaml:"timestampFormat"` DateFormat string `yaml:"dateFormat"` + Optimize bool `yaml:"optimize"` + OptimizeBeforeWriting bool `yaml:"optimizeBeforeWriting"` + OptimizeResourceDicts bool `yaml:"optimizeResourceDicts"` OptimizeDuplicateContentStreams bool `yaml:"optimizeDuplicateContentStreams"` CreateBookmarks bool `yaml:"createBookmarks"` NeedAppearances bool `yaml:"needAppearances"` + Offline bool `yaml:"offline"` + Timeout int `yaml:"timeout"` + TimeoutCRL int `yaml:"timeoutCRL"` + TimeoutOCSP int `yaml:"timeoutOCSP"` + PreferredCertRevocationChecker string `yaml:"preferredCertRevocationChecker"` } func loadedConfig(c configuration, configPath string) *Configuration { var conf Configuration conf.Path = configPath + conf.CreationDate = c.CreationDate + conf.Version = c.Version conf.CheckFileNameExt = c.CheckFileNameExt conf.Reader15 = c.Reader15 conf.DecodeAllStreams = c.DecodeAllStreams @@ -93,9 +105,26 @@ func loadedConfig(c configuration, configPath string) *Configuration { conf.TimestampFormat = c.TimestampFormat conf.DateFormat = c.DateFormat + conf.Optimize = c.Optimize + + // TODO add to config.yml + conf.OptimizeBeforeWriting = true + + conf.OptimizeResourceDicts = c.OptimizeResourceDicts conf.OptimizeDuplicateContentStreams = c.OptimizeDuplicateContentStreams conf.CreateBookmarks = c.CreateBookmarks conf.NeedAppearances = c.NeedAppearances + conf.Offline = c.Offline + conf.Timeout = c.Timeout + conf.TimeoutCRL = c.TimeoutCRL + conf.TimeoutOCSP = c.TimeoutOCSP + + switch strings.ToLower(c.PreferredCertRevocationChecker) { + case "crl": + conf.PreferredCertRevocationChecker = CRL + case "ocsp": + conf.PreferredCertRevocationChecker = OCSP + } return &conf } @@ -118,15 +147,11 @@ func parseConfigFile(r io.Reader, configPath string) error { if !types.MemberOf(c.ValidationMode, []string{"ValidationStrict", "ValidationRelaxed"}) { return errors.Errorf("invalid validationMode: %s", c.ValidationMode) } + if !types.MemberOf(c.Eol, []string{"EolLF", "EolCR", "EolCRLF"}) { return errors.Errorf("invalid eol: %s", c.Eol) } - if c.Unit == "" { - // v0.3.8 modifies "units" to "unit". - if c.Units != "" { - c.Unit = c.Units - } - } + if !types.MemberOf(c.Unit, []string{"points", "inches", "cm", "mm"}) { return errors.Errorf("invalid unit: %s", c.Unit) } @@ -135,6 +160,13 @@ func parseConfigFile(r io.Reader, configPath string) error { return errors.Errorf("encryptKeyLength possible values: 40, 128, 256, got: %s", c.Unit) } + if !types.MemberOf(c.PreferredCertRevocationChecker, []string{"crl", "ocsp"}) { + if c.PreferredCertRevocationChecker != "" { + return errors.Errorf("invalid preferred certificate revocation checker: %s", c.PreferredCertRevocationChecker) + } + c.PreferredCertRevocationChecker = "crl" + } + loadedDefaultConfig = loadedConfig(c, configPath) return nil diff --git a/pkg/pdfcpu/model/parseConfig_js.go b/pkg/pdfcpu/model/parseConfig_js.go index 26a4387d..59184a86 100644 --- a/pkg/pdfcpu/model/parseConfig_js.go +++ b/pkg/pdfcpu/model/parseConfig_js.go @@ -28,6 +28,16 @@ import ( // This gets rid of the gopkg.in/yaml.v2 dependency for wasm builds. +func handleCreationDate(v string, c *Configuration) error { + c.CreationDate = v + return nil +} + +func handleVersion(v string, c *Configuration) error { + c.Version = v + return nil +} + func handleCheckFileNameExt(k, v string, c *Configuration) error { v = strings.ToLower(v) if v != "true" && v != "false" { @@ -131,6 +141,33 @@ func handleConfEncryptKeyLength(v string, c *Configuration) error { return nil } +func handleTimeout(v string, c *Configuration) error { + i, err := strconv.Atoi(v) + if err != nil { + return errors.Errorf("timeout is numeric > 0, got: %s", v) + } + c.Timeout = i + return nil +} + +func handleTimeoutCRL(v string, c *Configuration) error { + i, err := strconv.Atoi(v) + if err != nil { + return errors.Errorf("timeoutCRL is numeric > 0, got: %s", v) + } + c.TimeoutCRL = i + return nil +} + +func handleTimeoutOCSP(v string, c *Configuration) error { + i, err := strconv.Atoi(v) + if err != nil { + return errors.Errorf("timeoutOCSP is numeric > 0, got: %s", v) + } + c.TimeoutOCSP = i + return nil +} + func handleConfPermissions(v string, c *Configuration) error { i, err := strconv.Atoi(v) if err != nil { @@ -157,6 +194,21 @@ func handleConfUnit(v string, c *Configuration) error { return nil } +func handlePreferredCertRevocationChecker(v string, c *Configuration) error { + v1 := strings.ToLower(v) + switch v1 { + case "crl": + c.PreferredCertRevocationChecker = CRL + case "ocsp": + c.PreferredCertRevocationChecker = OCSP + case "": + c.PreferredCertRevocationChecker = CRL + default: + return errors.Errorf("invalid preferredCertRevocationChecker: %s", v) + } + return nil +} + func handleTimestampFormat(v string, c *Configuration) error { c.TimestampFormat = v return nil @@ -167,36 +219,23 @@ func handleDateFormat(v string, c *Configuration) error { return nil } -func handleOptimizeDuplicateContentStreams(k, v string, c *Configuration) error { - v = strings.ToLower(v) - if v != "true" && v != "false" { - return errors.Errorf("config key %s is boolean", k) - } - c.OptimizeDuplicateContentStreams = v == "true" - return nil -} - -func handleCreateBookmarks(k, v string, c *Configuration) error { - v = strings.ToLower(v) - if v != "true" && v != "false" { - return errors.Errorf("config key %s is boolean", k) - } - c.CreateBookmarks = v == "true" - return nil -} - -func handleNeedAppearances(k, v string, c *Configuration) error { +func boolean(k, v string) (bool, error) { v = strings.ToLower(v) if v != "true" && v != "false" { - return errors.Errorf("config key %s is boolean", k) + return false, errors.Errorf("config key %s is boolean", k) } - c.NeedAppearances = v == "true" - return nil + return v == "true", nil } func parseKeysPart1(k, v string, c *Configuration) (bool, error) { switch k { + case "created": + return true, handleCreationDate(v, c) + + case "version": + return true, handleVersion(v, c) + case "checkFileNameExt": return true, handleCheckFileNameExt(k, v, c) @@ -225,38 +264,66 @@ func parseKeysPart1(k, v string, c *Configuration) (bool, error) { return false, nil } -func parseKeysPart2(k, v string, c *Configuration) error { +func parseKeysPart2(k, v string, c *Configuration) (bool, error) { switch k { case "encryptUsingAES": - return handleConfEncryptUsingAES(k, v, c) + return true, handleConfEncryptUsingAES(k, v, c) case "encryptKeyLength": - return handleConfEncryptKeyLength(v, c) + return true, handleConfEncryptKeyLength(v, c) case "permissions": - return handleConfPermissions(v, c) + return true, handleConfPermissions(v, c) case "unit", "units": - return handleConfUnit(v, c) + return true, handleConfUnit(v, c) case "timestampFormat": - return handleTimestampFormat(v, c) + return true, handleTimestampFormat(v, c) case "dateFormat": - return handleDateFormat(v, c) + return true, handleDateFormat(v, c) + + case "timeout": + return true, handleTimeout(v, c) + + case "timeoutCRL": + return true, handleTimeoutCRL(v, c) + + case "timeoutOCSP": + return true, handleTimeoutOCSP(v, c) + + case "preferredCertRevocationChecker": + return true, handlePreferredCertRevocationChecker(v, c) + } + + return false, nil +} + +func parseKeysPart3(k, v string, c *Configuration) (err error) { + switch k { + + case "optimize": + c.Optimize, err = boolean(k, v) + + case "optimizeResourceDicts": + c.OptimizeResourceDicts, err = boolean(k, v) case "optimizeDuplicateContentStreams": - return handleOptimizeDuplicateContentStreams(k, v, c) + c.OptimizeDuplicateContentStreams, err = boolean(k, v) case "createBookmarks": - return handleCreateBookmarks(k, v, c) + c.CreateBookmarks, err = boolean(k, v) case "needAppearances": - return handleNeedAppearances(k, v, c) + c.NeedAppearances, err = boolean(k, v) + + case "offline": + c.Offline, err = boolean(k, v) } - return nil + return err } func parseKeyValue(k, v string, c *Configuration) error { @@ -267,7 +334,16 @@ func parseKeyValue(k, v string, c *Configuration) error { if ok { return nil } - return parseKeysPart2(k, v, c) + + ok, err = parseKeysPart2(k, v, c) + if err != nil { + return err + } + if ok { + return nil + } + + return parseKeysPart3(k, v, c) } func parseConfigFile(r io.Reader, configPath string) error { @@ -275,6 +351,9 @@ func parseConfigFile(r io.Reader, configPath string) error { var conf Configuration conf.Path = configPath + // TODO add to config.yml + conf.OptimizeBeforeWriting = true + s := bufio.NewScanner(r) for s.Scan() { t := s.Text() diff --git a/pkg/pdfcpu/model/parseContent.go b/pkg/pdfcpu/model/parseContent.go index 7a5fa33a..f4626894 100644 --- a/pkg/pdfcpu/model/parseContent.go +++ b/pkg/pdfcpu/model/parseContent.go @@ -48,23 +48,31 @@ func skipDict(l *string) error { return errDictionaryCorrupt } if s[i] == '<' { - j++ + if i == len(s)-1 { + return errDictionaryCorrupt + } + if s[i+1] == '<' { + j++ + s = s[i+2:] + continue + } s = s[i+1:] continue } if s[i] == '>' { - if j > 0 { - j-- - s = s[i+1:] - continue - } - // >> ? - s = s[i:] - if !strings.HasPrefix(s, ">>") { + if i == len(s)-1 { return errDictionaryCorrupt } - *l = s[2:] - break + if s[i+1] == '>' { + if j > 0 { + j-- + s = s[i+2:] + continue + } + *l = s[i+2:] + break + } + s = s[i+1:] } } return nil @@ -75,9 +83,17 @@ func skipStringLiteral(l *string) error { i := 0 for { i = strings.IndexByte(s, byte(')')) - if i <= 0 || i > 0 && s[i-1] != '\\' || i > 1 && s[i-2] == '\\' { + if i <= 0 || i > 0 && s[i-1] != '\\' { + break + } + k := 0 + for j := i - 1; j >= 0 && s[j] == '\\'; j-- { + k++ + } + if k%2 == 0 { break } + // Skip \) s = s[i+1:] } if i < 0 { @@ -128,21 +144,70 @@ func skipTJ(l *string) error { return nil } +func lookupEI(l *string) (int, error) { + s := *l + //fmt.Printf("\n%s\n", hex.Dump([]byte(s))) + for i := 2; i <= len(s)-2; i++ { + if s[i:i+2] != "EI" { + continue + } + j := i + 2 + ws := 0 + for j < len(s) && unicode.IsSpace(rune(s[j])) && ws < 2 { + j++ + ws++ + } + switch { + case j == len(s) && ws <= 2: + // "EI" at end or followed by 1–2 spaces till end + return i, nil + case ws >= 1 && ws <= 2 && j < len(s) && s[j] == 'Q': + // "EI" followed by 1–2 spaces, then 'Q' + return i, nil + case ws == 0 && j == len(s): + // suffix "EI" + return i, nil + } + } + return 0, errBIExpressionCorrupt +} + func skipBI(l *string, prn PageResourceNames) error { s := *l + //fmt.Printf("skipBI <%s>\n", s) for { s = strings.TrimLeftFunc(s, whitespaceOrEOL) - if strings.HasPrefix(s, "EI") && whitespaceOrEOL(rune(s[2])) { - s = s[2:] + if strings.HasPrefix(s, "ID") && whitespaceOrEOL(rune(s[2])) { + i, err := lookupEI(&s) + if err != nil { + return err + } + s = s[i+2:] break } - // TODO Check len(s) > 0 + if len(s) == 0 { + return errBIExpressionCorrupt + } if s[0] == '/' { s = s[1:] i, _ := positionToNextWhitespaceOrChar(s, "/") if i < 0 { return errBIExpressionCorrupt } + token := s[:i] + if token == "CS" || token == "ColorSpace" { + s = s[i:] + s, _ = trimLeftSpace(s, false) + s = s[1:] + i, _ = positionToNextWhitespaceOrChar(s, "/") + if i < 0 { + return errBIExpressionCorrupt + } + name := s[:i] + if !types.MemberOf(name, []string{"DeviceGray", "DeviceRGB", "DeviceCMYK", "Indexed", "G", "RGB", "CMYK", "I"}) { + prn["ColorSpace"][name] = true + } + } s = s[i:] continue } @@ -164,6 +229,12 @@ func positionToNextContentToken(line *string, prn PageResourceNames) (bool, erro // whitespace or eol only return true, nil } + if l[0] == '%' { + // Skip comment. + l, _ = positionToNextEOL(l) + continue + } + if l[0] == '[' { // Skip TJ expression: // [()...()] TJ @@ -200,12 +271,12 @@ func positionToNextContentToken(line *string, prn PageResourceNames) (bool, erro } } -func nextContentToken(line *string, prn PageResourceNames) (string, error) { +func nextContentToken(pre string, line *string, prn PageResourceNames) (string, error) { // A token is either a name or some chunk terminated by white space or one of /, (, [ if noBuf(line) { return "", nil } - l := *line + l := pre + *line t := "" //log.Parse.Printf("nextContentToken: start buf= <%s>\n", *line) @@ -258,9 +329,8 @@ func nextContentToken(line *string, prn PageResourceNames) (string, error) { return t, nil } -func resourceNameAtPos1(s, name string, prn PageResourceNames) bool { - switch s { - case "cs", "CS": +func colorSpace(s, name string, prn PageResourceNames) bool { + if strings.HasPrefix(s, "cs") || strings.HasPrefix(s, "CS") { if !types.MemberOf(name, []string{"DeviceGray", "DeviceRGB", "DeviceCMYK", "Pattern"}) { prn["ColorSpace"][name] = true if log.ParseEnabled() { @@ -268,72 +338,90 @@ func resourceNameAtPos1(s, name string, prn PageResourceNames) bool { } } return true + } + return false +} + +func resourceNameAtPos1(s, name string, prn PageResourceNames) (string, bool) { + if colorSpace(s, name, prn) { + return s[2:], true + } - case "gs": + if strings.HasPrefix(s, "gs") { prn["ExtGState"][name] = true if log.ParseEnabled() { log.Parse.Printf("ExtGState[%s]\n", name) } - return true + return s[2:], true + } - case "Do": + if strings.HasPrefix(s, "Do") { prn["XObject"][name] = true if log.ParseEnabled() { log.Parse.Printf("XObject[%s]\n", name) } - return true + return s[2:], true + } - case "sh": + if strings.HasPrefix(s, "sh") { prn["Shading"][name] = true if log.ParseEnabled() { log.Parse.Printf("Shading[%s]\n", name) } - return true + return s[2:], true + } - case "scn", "SCN": + if strings.HasPrefix(s, "scn") || strings.HasPrefix(s, "SCN") { prn["Pattern"][name] = true if log.ParseEnabled() { log.Parse.Printf("Pattern[%s]\n", name) } - return true + return s[3:], true + } - case "ri", "BMC", "MP": - return true + if strings.HasPrefix(s, "ri") || strings.HasPrefix(s, "MP") { + return s[2:], true + } + if strings.HasPrefix(s, "BMC") { + return s[3:], true } - return false + return "", false } -func resourceNameAtPos2(s, name string, prn PageResourceNames) bool { +func resourceNameAtPos2(s, name string, prn PageResourceNames) (string, bool) { switch s { case "Tf": prn["Font"][name] = true if log.ParseEnabled() { log.Parse.Printf("Font[%s]\n", name) } - return true + return "", true case "BDC", "DP": prn["Properties"][name] = true if log.ParseEnabled() { log.Parse.Printf("Properties[%s]\n", name) } - return true + return "", true } - return false + return "", false } func parseContent(s string) (PageResourceNames, error) { var ( + pre string name string n bool + ok bool ) prn := NewPageResourceNames() //fmt.Printf("parseContent:\n%s\n", hex.Dump([]byte(s))) for pos := 0; ; { - t, err := nextContentToken(&s, prn) + t, err := nextContentToken(pre, &s, prn) + pre = "" if log.ParseEnabled() { log.Parse.Printf("t = <%s>\n", t) } @@ -367,17 +455,22 @@ func parseContent(s string) (PageResourceNames, error) { pos++ if pos == 1 { - if resourceNameAtPos1(t, name, prn) { + if pre, ok = resourceNameAtPos1(t, name, prn); ok { n = false } continue } if pos == 2 { - if resourceNameAtPos2(t, name, prn) { + if pre, ok = resourceNameAtPos2(t, name, prn); ok { n = false } continue } - return nil, errPageContentCorrupt + ShowSkipped("corrupt page content") + n = false + if log.ParseEnabled() { + log.Parse.Printf("skip:%s\n", t) + } + //return nil, errPageContentCorrupt } } diff --git a/pkg/pdfcpu/model/parseContent_test.go b/pkg/pdfcpu/model/parseContent_test.go index 2fd17a5f..2bd38f4c 100644 --- a/pkg/pdfcpu/model/parseContent_test.go +++ b/pkg/pdfcpu/model/parseContent_test.go @@ -26,11 +26,12 @@ func TestParseContent(t *testing.T) { Span<>>, Span<>>, Span<>> BDC /a1 BMC/a2 MP /a3 /MC0 BDC/P0 scn/RelativeColorimetric ri/P1 SCN/GS0 gs[(Q[i,j]/2.)16.6(The/]maxi\)-)]TJ/CS1 CS/a4<>> BDC /a5 <>> BDC (0.5*\(1/8\)*64 or +/4.\))Tj/T1_0 1 Tf <00150015> Tj /Im5 Do/a5 << /A >> BDC/a6/MC1 DP /a7<<>>DP - BI /IM true/W 1/CS/InlineCS/H 1/BPC 1 ID EI Q /Pattern cs/Span<>> BDC/SH1 sh` + BI /IM true/W 1/CS/CS2/H 1/BPC 1 ID EI Q /Pattern cs/Span<>> BDC/SH1 sh` want := NewPageResourceNames() want["ColorSpace"]["CS0"] = true want["ColorSpace"]["CS1"] = true + want["ColorSpace"]["CS2"] = true want["ExtGState"]["GS0"] = true want["Font"]["T1_0"] = true want["Pattern"]["P0"] = true diff --git a/pkg/pdfcpu/model/parse_array_test.go b/pkg/pdfcpu/model/parse_array_test.go index 8a41e1b2..44d6bd5d 100644 --- a/pkg/pdfcpu/model/parse_array_test.go +++ b/pkg/pdfcpu/model/parse_array_test.go @@ -165,9 +165,11 @@ func TestParseArray(t *testing.T) { doTestParseArrayFail("[", t) doTestParseArrayFail("[ ", t) doTestParseArrayFail("[<0ab> ", t) - doTestParseArrayFail("[a]", t) - doTestParseArrayFail("[<0ab> a]", t) - doTestParseArrayFail("[<0ab> a]", t) + + doTestParseArrayOK("[a]", t) + + doTestParseArrayOK("[<0ab> a]", t) + doTestParseArrayOK("[<0ab> a]", t) doTestParseArrayOK("[]", t) doTestParseArrayOK("[< AB >]", t) diff --git a/pkg/pdfcpu/model/parse_dict_test.go b/pkg/pdfcpu/model/parse_dict_test.go index b6004f76..bcad1285 100644 --- a/pkg/pdfcpu/model/parse_dict_test.go +++ b/pkg/pdfcpu/model/parse_dict_test.go @@ -74,9 +74,9 @@ func doTestParseDictHexLiteral(t *testing.T) { doTestParseDictFail("<>", t) doTestParseDictFail("<", t) doTestParseDictFail("<", t) - doTestParseDictFail("<>>", t) - doTestParseDictFail("<>>", t) - doTestParseDictFail("<", t) + doTestParseDictOK("<>>", t) + doTestParseDictOK("<>>", t) + doTestParseDictOK("<", t) // Skip junk doTestParseDictOK("</Key2>>", t) doTestParseDictOK("<< /Key1 /Key2 >>", t) doTestParseDictOK("<>>", t) @@ -116,14 +116,15 @@ func doTestParseDictBool(t *testing.T) { doTestParseDictOK("<>", t) doTestParseDictOK("<>", t) doTestParseDictOK("<>", t) - doTestParseDictFail("<>", t) + doTestParseDictOK("<>", t) + doTestParseDictOK("<>", t) } func doTestParseDictNumerics(t *testing.T) { // Numerics doTestParseDictOK("<>", t) doTestParseDictOK("<>", t) - doTestParseDictFail("<>", t) + doTestParseDictOK("<>", t) } func doTestParseDictIndirectRefs(t *testing.T) { diff --git a/pkg/pdfcpu/model/parse_test.go b/pkg/pdfcpu/model/parse_test.go index ddd191ec..fb9ed70b 100644 --- a/pkg/pdfcpu/model/parse_test.go +++ b/pkg/pdfcpu/model/parse_test.go @@ -66,3 +66,55 @@ func TestDecodeNameHexValid(t *testing.T) { } } } + +func TestDetectNonEscaped(t *testing.T) { + testcases := []struct { + input string + want int + }{ + {"", -1}, + {" ( ", 1}, + {" \\( )", -1}, + {"\\(", -1}, + {" \\( ", -1}, + {"\\()(", 3}, + {" \\(\\((abc)", 5}, + } + for _, tc := range testcases { + got := detectNonEscaped(tc.input, "(") + if tc.want != got { + t.Errorf("%s, want: %d, got: %d", tc.input, tc.want, got) + } + } +} + +func TestDetectKeywords(t *testing.T) { + msg := "detectKeywords" + + // process: # gen obj ... obj dict ... {stream ... data ... endstream} endobj + // streamInd endInd + // -1 if absent -1 if absent + + //s := "5 0 obj\n<>\nendobj\n" //78 + + s := "1 0 obj\n<<\n /Lang (en-endobject-stream-UK%) % comment \n>>\nendobj\n\n2 0 obj\n" + // 0....... ..1 .........2.........3.........4.........5..... ... .6 + endInd, _, err := DetectKeywords(s) + if err != nil { + t.Errorf("%s failed: %v", msg, err) + } + if endInd != 59 { + t.Errorf("%s failed: want %d, got %d", msg, 59, endInd) + } + + // negative test + s = "1 0 obj\n<<\n /Lang (en-endobject-stream-UK%) % endobject" + endInd, _, err = DetectKeywords(s) + if err != nil { + t.Errorf("%s failed: %v", msg, err) + } + if endInd > 0 { + t.Errorf("%s failed: want %d, got %d", msg, 0, endInd) + } + +} diff --git a/pkg/pdfcpu/model/resource.go b/pkg/pdfcpu/model/resource.go index 6eb47224..4dd0587e 100644 --- a/pkg/pdfcpu/model/resource.go +++ b/pkg/pdfcpu/model/resource.go @@ -18,6 +18,7 @@ package model import ( "fmt" + "sort" "strings" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" @@ -31,6 +32,7 @@ type FontObject struct { FontDict types.Dict Data []byte Extension string + Embedded bool } // AddResourceName adds a resourceName referring to this font. @@ -79,45 +81,46 @@ func (fo FontObject) Encoding() string { return encoding } -// Embedded returns true if the font is embedded into this PDF file. -func (fo FontObject) Embedded() (embedded bool) { - - _, embedded = fo.FontDict.Find("FontDescriptor") - - if !embedded { - _, embedded = fo.FontDict.Find("DescendantFonts") - } - - return -} - func (fo FontObject) String() string { return fmt.Sprintf("%-10s %-30s %-10s %-20s %-8v %s\n", fo.Prefix, fo.FontName, fo.SubType(), fo.Encoding(), - fo.Embedded(), fo.ResourceNamesString()) + fo.Embedded, fo.ResourceNamesString()) } // ImageObject represents an image used in a PDF file. type ImageObject struct { - ResourceNames []string + ResourceNames map[int]string ImageDict *types.StreamDict } -// AddResourceName adds a resourceName to this imageObject's ResourceNames dict. -func (io *ImageObject) AddResourceName(resourceName string) { - for _, resName := range io.ResourceNames { - if resName == resourceName { - return - } - } - io.ResourceNames = append(io.ResourceNames, resourceName) +// DuplicateImageObject represents a redundant image. +type DuplicateImageObject struct { + ImageDict *types.StreamDict + NewObjNr int +} + +// AddResourceName adds a resourceName to this imageObject's ResourceNames map. +func (io *ImageObject) AddResourceName(pageNr int, resourceName string) { + io.ResourceNames[pageNr] = resourceName } // ResourceNamesString returns a string representation of the ResourceNames for this image. func (io ImageObject) ResourceNamesString() string { + pageNrs := make([]int, 0, len(io.ResourceNames)) + for k := range io.ResourceNames { + pageNrs = append(pageNrs, k) + } + sort.Ints(pageNrs) + var sb strings.Builder + for i, pageNr := range pageNrs { + if i > 0 { + sb.WriteString(", ") + } + sb.WriteString(fmt.Sprintf("%d:%s", pageNr, io.ResourceNames[pageNr])) + } var resNames []string - resNames = append(resNames, io.ResourceNames...) + resNames = append(resNames, sb.String()) return strings.Join(resNames, ",") } diff --git a/pkg/pdfcpu/model/resources/certs/ades-lotl.p7c b/pkg/pdfcpu/model/resources/certs/ades-lotl.p7c new file mode 100644 index 00000000..fd18d8c7 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/ades-lotl.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/at.p7c b/pkg/pdfcpu/model/resources/certs/at.p7c new file mode 100644 index 00000000..687f1d7e Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/at.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/be.p7c b/pkg/pdfcpu/model/resources/certs/be.p7c new file mode 100644 index 00000000..3ca98e9a Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/be.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/bg.p7c b/pkg/pdfcpu/model/resources/certs/bg.p7c new file mode 100644 index 00000000..709d9b0d Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/bg.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/cy.p7c b/pkg/pdfcpu/model/resources/certs/cy.p7c new file mode 100644 index 00000000..2bf8de24 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/cy.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/cz.p7c b/pkg/pdfcpu/model/resources/certs/cz.p7c new file mode 100644 index 00000000..bbf1b89f Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/cz.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/de.p7c b/pkg/pdfcpu/model/resources/certs/de.p7c new file mode 100644 index 00000000..443be20a Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/de.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/dk.p7c b/pkg/pdfcpu/model/resources/certs/dk.p7c new file mode 100644 index 00000000..8affe6e6 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/dk.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/ee.p7c b/pkg/pdfcpu/model/resources/certs/ee.p7c new file mode 100644 index 00000000..6b2229f8 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/ee.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/el.p7c b/pkg/pdfcpu/model/resources/certs/el.p7c new file mode 100644 index 00000000..abdc17b9 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/el.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/es.p7c b/pkg/pdfcpu/model/resources/certs/es.p7c new file mode 100644 index 00000000..8e351029 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/es.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/euiba-tl.p7c b/pkg/pdfcpu/model/resources/certs/euiba-tl.p7c new file mode 100644 index 00000000..65ee2317 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/euiba-tl.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/eutl.p7c b/pkg/pdfcpu/model/resources/certs/eutl.p7c new file mode 100644 index 00000000..3c794b3a Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/eutl.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/fi.p7c b/pkg/pdfcpu/model/resources/certs/fi.p7c new file mode 100644 index 00000000..c0900276 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/fi.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/fr.p7c b/pkg/pdfcpu/model/resources/certs/fr.p7c new file mode 100644 index 00000000..c42bf51a Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/fr.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/hr.p7c b/pkg/pdfcpu/model/resources/certs/hr.p7c new file mode 100644 index 00000000..56e7fadf Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/hr.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/hu.p7c b/pkg/pdfcpu/model/resources/certs/hu.p7c new file mode 100644 index 00000000..06de2bc2 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/hu.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/ie.p7c b/pkg/pdfcpu/model/resources/certs/ie.p7c new file mode 100644 index 00000000..032b2945 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/ie.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/is.p7c b/pkg/pdfcpu/model/resources/certs/is.p7c new file mode 100644 index 00000000..f247b20b Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/is.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/it.p7c b/pkg/pdfcpu/model/resources/certs/it.p7c new file mode 100644 index 00000000..a3316494 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/it.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/li.p7c b/pkg/pdfcpu/model/resources/certs/li.p7c new file mode 100644 index 00000000..6861c16d Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/li.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/lt.p7c b/pkg/pdfcpu/model/resources/certs/lt.p7c new file mode 100644 index 00000000..494e24d4 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/lt.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/lu.p7c b/pkg/pdfcpu/model/resources/certs/lu.p7c new file mode 100644 index 00000000..7695a277 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/lu.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/lv.p7c b/pkg/pdfcpu/model/resources/certs/lv.p7c new file mode 100644 index 00000000..8269039c Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/lv.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/mt.p7c b/pkg/pdfcpu/model/resources/certs/mt.p7c new file mode 100644 index 00000000..bbc733b1 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/mt.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/nl.p7c b/pkg/pdfcpu/model/resources/certs/nl.p7c new file mode 100644 index 00000000..b7dee80f Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/nl.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/no.p7c b/pkg/pdfcpu/model/resources/certs/no.p7c new file mode 100644 index 00000000..ac36f939 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/no.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/pl.p7c b/pkg/pdfcpu/model/resources/certs/pl.p7c new file mode 100644 index 00000000..0ffe3956 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/pl.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/pt.p7c b/pkg/pdfcpu/model/resources/certs/pt.p7c new file mode 100644 index 00000000..de8a62a9 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/pt.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/ro.p7c b/pkg/pdfcpu/model/resources/certs/ro.p7c new file mode 100644 index 00000000..28a18ec3 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/ro.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/se.p7c b/pkg/pdfcpu/model/resources/certs/se.p7c new file mode 100644 index 00000000..38efbc34 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/se.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/si.p7c b/pkg/pdfcpu/model/resources/certs/si.p7c new file mode 100644 index 00000000..f330e3ff Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/si.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/sk.p7c b/pkg/pdfcpu/model/resources/certs/sk.p7c new file mode 100644 index 00000000..226f5a84 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/sk.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/ua.p7c b/pkg/pdfcpu/model/resources/certs/ua.p7c new file mode 100644 index 00000000..709da8d7 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/ua.p7c differ diff --git a/pkg/pdfcpu/model/resources/certs/uk.p7c b/pkg/pdfcpu/model/resources/certs/uk.p7c new file mode 100644 index 00000000..43710cf0 Binary files /dev/null and b/pkg/pdfcpu/model/resources/certs/uk.p7c differ diff --git a/pkg/pdfcpu/model/resources/config.yml b/pkg/pdfcpu/model/resources/config.yml index 3a9aa5f5..5ef39759 100644 --- a/pkg/pdfcpu/model/resources/config.yml +++ b/pkg/pdfcpu/model/resources/config.yml @@ -1,7 +1,3 @@ -############################# -# Default configuration # -############################# - # toggle for inFilename extension check (.pdf) checkFileNameExt: true @@ -14,7 +10,7 @@ decodeAllStreams: false # ValidationRelaxed, validationMode: ValidationRelaxed -# validate cross reference table right before writing +# validate cross reference table right before writing. postProcessValidate: true # eol for writing: @@ -46,17 +42,40 @@ unit: points # timestamp format: yyyy-mm-dd hh:mm # Switch month and year by using: 2006-02-01 15:04 -# See more at https://pkg.go.dev/time@go1.17.1#pkg-constants +# See more at https://pkg.go.dev/time@go1.22#pkg-constants timestampFormat: 2006-01-02 15:04 # date format: yyyy-mm-dd dateFormat: 2006-01-02 -# optimize duplicate content streams across pages +# toggle optimization. +optimize: true + +# optimize page resources via content stream analysis. +optimizeResourceDicts: true + +# optimize duplicate content streams across pages. optimizeDuplicateContentStreams: false -# merge creates bookmarks +# merge creates bookmarks. createBookmarks: true -# Viewer is expected to supply appearance streams for form fields. +# viewer is expected to supply appearance streams for form fields. needAppearances: false + +# internet availability. +offline: false + +# http timeout in seconds. +timeout: 5 + +# http timeout in seconds for CRL revocation checking. +timeoutCRL: 10 + +# http timeout in seconds for OCSP revocation checking. +timeoutOCSP: 10 + +# preferred certificate revocation checking mechanism: +# crl +# ocsp +preferredCertRevocationChecker: crl diff --git a/pkg/pdfcpu/model/sign.go b/pkg/pdfcpu/model/sign.go new file mode 100644 index 00000000..1c81e6f2 --- /dev/null +++ b/pkg/pdfcpu/model/sign.go @@ -0,0 +1,503 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package model + +import ( + "fmt" + "strings" + "time" +) + +const ( + Unknown = iota + False // aka invalid, not ok + True // aka valid, ok +) + +// Preferred cert revocation checking mechanism values +const ( + CRL = iota + OCSP +) + +const ( + CertifiedSigPermNone = iota + CertifiedSigPermNoChangesAllowed + CertifiedSigPermFillingAndSigningOK + CertifiedSigPermFillingAnnotatingAndSigningOK +) + +const ( + SigTypeForm = iota + SigTypePage + SigTypeUR + SigTypeDTS +) + +const SignTSFormat = "2006-01-02 15:04:05 -0700" + +type RevocationDetails struct { + Status int + Reason string +} + +func (rd RevocationDetails) String() string { + ss := []string{} + ss = append(ss, fmt.Sprintf(" Status: %s", validString(rd.Status))) + if len(rd.Reason) > 0 { + ss = append(ss, fmt.Sprintf(" Reason: %s", rd.Reason)) + } + return strings.Join(ss, "\n") +} + +type TrustDetails struct { + Status int + Reason string + SourceObtainedFrom string + AllowSignDocuments bool + AllowCertifyDocuments bool + AllowExecuteDynamicContent bool + AllowExecuteJavaScript bool + AllowExecutePrivilegedSystemOperation bool +} + +func (td TrustDetails) String() string { + ss := []string{} + ss = append(ss, fmt.Sprintf(" Status: %s", validString(td.Status))) + if len(td.Reason) > 0 { + ss = append(ss, fmt.Sprintf(" Reason: %s", td.Reason)) + } + // if td.Status == True { + // ss = append(ss, fmt.Sprintf(" SourceObtainedFrom: %s", td.SourceObtainedFrom)) + // ss = append(ss, fmt.Sprintf(" AllowSignDocuments: %t", td.AllowSignDocuments)) + // ss = append(ss, fmt.Sprintf(" AllowCertifyDocuments: %t", td.AllowCertifyDocuments)) + // ss = append(ss, fmt.Sprintf(" AllowExecuteDynamicContent: %t", td.AllowExecuteDynamicContent)) + // ss = append(ss, fmt.Sprintf(" AllowExecuteJavaScript: %t", td.AllowExecuteJavaScript)) + // ss = append(ss, fmt.Sprintf(" AllowExecutePrivilegedSystemOperation: %t", td.AllowExecutePrivilegedSystemOperation)) + // } + return strings.Join(ss, "\n") +} + +type CertificateDetails struct { + Leaf bool + SelfSigned bool + Subject string + Issuer string + SerialNumber string + ValidFrom time.Time + ValidThru time.Time + Expired bool + Qualified bool + CA bool + Usage string + Version int + SignAlg string + KeySize int + Revocation RevocationDetails + Trust TrustDetails + IssuerCertificate *CertificateDetails +} + +func (cd CertificateDetails) String() string { + ss := []string{} + ss = append(ss, fmt.Sprintf(" Subject: %s", cd.Subject)) + ss = append(ss, fmt.Sprintf(" Issuer: %s", cd.Issuer)) + ss = append(ss, fmt.Sprintf(" SerialNr: %s", cd.SerialNumber)) + ss = append(ss, fmt.Sprintf(" Valid From: %s", cd.ValidFrom.Format(SignTSFormat))) + ss = append(ss, fmt.Sprintf(" Valid Thru: %s", cd.ValidThru.Format(SignTSFormat))) + ss = append(ss, fmt.Sprintf(" Expired: %t", cd.Expired)) + ss = append(ss, fmt.Sprintf(" Qualified: %t", cd.Qualified)) + ss = append(ss, fmt.Sprintf(" CA: %t", cd.CA)) + ss = append(ss, fmt.Sprintf(" Usage: %s", cd.Usage)) + ss = append(ss, fmt.Sprintf(" Version: %d", cd.Version)) + ss = append(ss, fmt.Sprintf(" SignAlg: %s", cd.SignAlg)) + ss = append(ss, fmt.Sprintf(" Key Size: %d bits", cd.KeySize)) + ss = append(ss, fmt.Sprintf(" SelfSigned: %t", cd.SelfSigned)) + ss = append(ss, fmt.Sprintf(" Trust:%s", cd.Trust)) + if cd.Leaf && !cd.SelfSigned { + ss = append(ss, fmt.Sprintf(" Revocation:%s", cd.Revocation)) + } + + if cd.IssuerCertificate != nil { + s := " Intermediate" + if cd.IssuerCertificate.IssuerCertificate == nil { + s = " Root" + } + if cd.IssuerCertificate.CA { + s += "CA" + } + ss = append(ss, s+":") + ss = append(ss, cd.IssuerCertificate.String()) + } + return strings.Join(ss, "\n") +} + +// Signature represents a digital signature. +type Signature struct { + Type int + Certified bool + Authoritative bool + Visible bool + Signed bool + ObjNr int + PageNr int +} + +func (sig Signature) String(status SignatureStatus) string { + s := "" + if sig.Type == SigTypeForm { + s = "form signature (" + } else if sig.Type == SigTypePage { + s = "page signature (" + } else if sig.Type == SigTypeUR { + s = "usage rights signature (" + } else { + s = "document timestamp (" + } + + if sig.Type != SigTypeDTS { + if sig.Certified { + s += "certified, " + } else if sig.Authoritative { + s += "authoritative, " + } + } + + if sig.Type == SigTypeDTS { + s1 := "trusted, " + if status != SignatureStatusValid { + s1 = "not " + s1 + } + s += s1 + } + + if sig.Visible { + s += "visible, " + } else { + s += "invisible, " + } + + if sig.Signed { + s += "signed)" + } else { + s += "unsigned)" + } + + if sig.Visible { + s += fmt.Sprintf(" on page %d", sig.PageNr) + } + + //s += fmt.Sprintf(" objNr%d", sig.ObjNr) + + return s +} + +// SignatureStats represents signature stats for a file. +type SignatureStats struct { + FormSigned int + FormSignedVisible int + FormUnsigned int + FormUnsignedVisible int + PageSigned int + PageSignedVisible int + PageUnsigned int + PageUnsignedVisible int + URSigned int + URSignedVisible int + URUnsigned int + URUnsignedVisible int + DTSSigned int + DTSSignedVisible int + DTSUnsigned int + DTSUnsignedVisible int + + Total int +} + +func (sigStats SignatureStats) Counter(svr *SignatureValidationResult) (*int, *int, *int, *int) { + switch svr.Type { + case SigTypeForm: + return &sigStats.FormSigned, &sigStats.FormSignedVisible, &sigStats.FormUnsigned, &sigStats.FormUnsignedVisible + case SigTypePage: + return &sigStats.PageSigned, &sigStats.PageSignedVisible, &sigStats.PageUnsigned, &sigStats.PageUnsignedVisible + case SigTypeUR: + return &sigStats.URSigned, &sigStats.URSignedVisible, &sigStats.URUnsigned, &sigStats.URUnsignedVisible + case SigTypeDTS: + return &sigStats.DTSSigned, &sigStats.DTSSignedVisible, &sigStats.DTSUnsigned, &sigStats.DTSUnsignedVisible + } + return nil, nil, nil, nil +} + +// SignatureStatus represents all possible signature statuses. +type SignatureStatus int + +const ( + SignatureStatusUnknown SignatureStatus = 1 << iota + SignatureStatusValid + SignatureStatusInvalid +) + +// SignatureStatusStrings manages string representations for signature statuses. +var SignatureStatusStrings = map[SignatureStatus]string{ + SignatureStatusUnknown: "validity of the signature is unknown", + SignatureStatusValid: "signature is valid", + SignatureStatusInvalid: "signature is invalid", +} + +func (st SignatureStatus) String() string { + return SignatureStatusStrings[st] +} + +type SignatureReason int + +const ( + SignatureReasonUnknown SignatureReason = 1 << iota + SignatureReasonDocNotModified + SignatureReasonDocModified + SignatureReasonSignatureForged + SignatureReasonSigningTimeInvalid + SignatureReasonTimestampTokenInvalid + SignatureReasonCertInvalid + SignatureReasonCertNotTrusted + SignatureReasonCertExpired + SignatureReasonCertRevoked + SignatureReasonInternal + SignatureReasonSelfSignedCertErr +) + +// SignatureReasonStrings manages string representations for signature reasons. +var SignatureReasonStrings = map[SignatureReason]string{ + SignatureReasonUnknown: "no reason", + SignatureReasonDocNotModified: "document has not been modified", + SignatureReasonDocModified: "document has been modified", + SignatureReasonSignatureForged: "signer's signature is not authentic", + SignatureReasonTimestampTokenInvalid: "timestamp token is invalid", + SignatureReasonCertInvalid: "signer's certificate is invalid", + SignatureReasonCertNotTrusted: "signer's certificate chain is not in the trusted list of Root CAs", + SignatureReasonCertExpired: "signer's certificate or one of its parent certificates has expired", + SignatureReasonCertRevoked: "signer's certificate or one of its parent certificates has been revoked", + SignatureReasonInternal: "internal error", + SignatureReasonSelfSignedCertErr: "signer's self signed certificate is not trusted", +} + +func (sr SignatureReason) String() string { + return SignatureReasonStrings[sr] +} + +type Signer struct { + Certificate *CertificateDetails + CertificatePathStatus int + HasTimestamp bool + Timestamp time.Time // signature timestamp attribute (which contains a timestamp token) + LTVEnabled bool // needs timestamp token & revocation info + PAdES string // baseline level: B-B, B-T, B-LT, B-LTA + Certified bool // indicated by DocMDP entry + Authoritative bool // true if certified or first (youngest) signature + Permissions int // see table 257 + Problems []string +} + +func (signer *Signer) AddProblem(s string) { + signer.Problems = append(signer.Problems, s) +} + +func permString(i int) string { + switch i { + case CertifiedSigPermNoChangesAllowed: + return "no changes allowed" + case CertifiedSigPermFillingAndSigningOK: + return "filling forms, signing" + case CertifiedSigPermFillingAnnotatingAndSigningOK: + return "filling forms, annotating, signing" + } + return "" +} + +func (signer Signer) String(dts bool) string { + ss := []string{} + s := "false" + if signer.HasTimestamp { + if signer.Timestamp.IsZero() { + s = "invalid" + } else { + s = signer.Timestamp.Format(SignTSFormat) + } + } + + ss = append(ss, fmt.Sprintf(" Timestamp: %s", s)) + if !dts { + ss = append(ss, fmt.Sprintf(" LTVEnabled: %t", signer.LTVEnabled)) + if signer.PAdES != "" { + ss = append(ss, fmt.Sprintf(" PAdES: %s", signer.PAdES)) + } + ss = append(ss, fmt.Sprintf(" Certified: %t", signer.Certified)) + ss = append(ss, fmt.Sprintf(" Authoritative: %t", signer.Authoritative)) + if signer.Certified && signer.Permissions > 0 { + ss = append(ss, fmt.Sprintf(" Permissions: %s", permString(signer.Permissions))) + } + } + if signer.Certificate != nil { + s := " Certificate" + if signer.Certificate.CA { + s += "(CA)" + } + ss = append(ss, s+":") + ss = append(ss, signer.Certificate.String()) + } + + for i, s := range signer.Problems { + if i == 0 { + ss = append(ss, fmt.Sprintf(" Problems: %s", s)) + continue + } + ss = append(ss, fmt.Sprintf(" %s", s)) + } + + return strings.Join(ss, "\n") +} + +type SignatureDetails struct { + SubFilter string // Signature Dict SubFilter + SignerIdentity string // extracted from signature + SignerName string // Signature Dict Name + ContactInfo string // Signature Dict ContactInfo + Location string // Signature Dict Location + Reason string // Signature Dict + SigningTime time.Time // Signature Dict M + FieldName string // Signature Field T + Signers []*Signer +} + +func (sd *SignatureDetails) AddSigner(s *Signer) { + sd.Signers = append(sd.Signers, s) +} + +func (sd *SignatureDetails) IsETSI_CAdES_detached() bool { + return sd.SubFilter == "ETSI.CAdES.detached" +} + +func (sd *SignatureDetails) IsETSI_RFC3161() bool { + return sd.SubFilter == "ETSI.RFC3161" +} + +func (sd *SignatureDetails) Permissions() int { + for _, signer := range sd.Signers { + if signer.Certified { + return signer.Permissions + } + } + return CertifiedSigPermNone +} + +func (sd SignatureDetails) String() string { + ss := []string{} + ss = append(ss, fmt.Sprintf(" SubFilter: %s", sd.SubFilter)) + ss = append(ss, fmt.Sprintf(" SignerIdentity: %s", sd.SignerIdentity)) + ss = append(ss, fmt.Sprintf(" SignerName: %s", sd.SignerName)) + if !sd.IsETSI_RFC3161() { + ss = append(ss, fmt.Sprintf(" ContactInfo: %s", sd.ContactInfo)) + ss = append(ss, fmt.Sprintf(" Location: %s", sd.Location)) + ss = append(ss, fmt.Sprintf(" Reason: %s", sd.Reason)) + } + ss = append(ss, fmt.Sprintf(" SigningTime: %s", sd.SigningTime.Format(SignTSFormat))) + ss = append(ss, fmt.Sprintf(" Field: %s", sd.FieldName)) + + if len(sd.Signers) == 1 { + ss = append(ss, " Signer:") + ss = append(ss, sd.Signers[0].String(sd.IsETSI_RFC3161())) + } else { + for i, signer := range sd.Signers { + ss = append(ss, fmt.Sprintf(" Signer %d:", i+1)) + ss = append(ss, signer.String(sd.IsETSI_RFC3161())) + } + } + + return strings.Join(ss, "\n") +} + +type SignatureValidationResult struct { + Signature + Status SignatureStatus + Reason SignatureReason + Details SignatureDetails + DocModified int + Problems []string +} + +func (svr *SignatureValidationResult) AddProblem(s string) { + svr.Problems = append(svr.Problems, s) +} + +func (svr *SignatureValidationResult) Certified() bool { + return svr.Signature.Certified +} + +func (svr *SignatureValidationResult) Permissions() int { + return svr.Details.Permissions() +} + +func (svr *SignatureValidationResult) SigningTime() string { + if !svr.Details.SigningTime.IsZero() { + return svr.Details.SigningTime.Format(SignTSFormat) + } + return "not available" +} + +func (svr SignatureValidationResult) String() string { + ss := []string{} + + ss = append(ss, fmt.Sprintf(" Type: %s", svr.Signature.String(svr.Status))) + if !svr.Signed { + return strings.Join(ss, "\n") + } + + ss = append(ss, fmt.Sprintf(" Status: %s", svr.Status.String())) + ss = append(ss, fmt.Sprintf(" Reason: %s", svr.Reason.String())) + ss = append(ss, fmt.Sprintf(" Signed: %s", svr.SigningTime())) + ss = append(ss, fmt.Sprintf("DocModified: %s", statusString(svr.DocModified))) + ss = append(ss, fmt.Sprintf(" Details:\n%s", svr.Details)) + + for i, s := range svr.Problems { + if i == 0 { + ss = append(ss, fmt.Sprintf(" Problems: %s", s)) + continue + } + ss = append(ss, fmt.Sprintf(" %s", s)) + } + + return strings.Join(ss, "\n") +} + +func statusString(status int) string { + switch status { + case False: + return "false" + case True: + return "true" + } + return "unknown" +} + +func validString(status int) string { + switch status { + case False: + return "not ok" + case True: + return "ok" + } + return "unknown" +} diff --git a/pkg/pdfcpu/model/text.go b/pkg/pdfcpu/model/text.go index cfd95d16..5cf136b6 100644 --- a/pkg/pdfcpu/model/text.go +++ b/pkg/pdfcpu/model/text.go @@ -173,8 +173,8 @@ func calcBoundingBoxForLines(lines []string, x, y float64, fontName string, font return box, maxLine } -func PrepBytes(xRefTable *XRefTable, s, fontName string, embed, rtl bool) string { - if font.IsUserFont(fontName) { +func PrepBytes(xRefTable *XRefTable, s, fontName string, embed, rtl, fillFont bool) string { + if font.IsUserFont(fontName) && (!fillFont || !embed) { if rtl { s = types.Reverse(s) } @@ -213,7 +213,7 @@ func PrepBytes(xRefTable *XRefTable, s, fontName string, embed, rtl bool) string } func writeStringToBuf(xRefTable *XRefTable, w io.Writer, s string, x, y float64, td TextDescriptor) { - s = PrepBytes(xRefTable, s, td.FontName, td.Embed, td.RTL) + s = PrepBytes(xRefTable, s, td.FontName, td.Embed, td.RTL, false) fmt.Fprintf(w, "BT 0 Tw %.2f %.2f %.2f RG %.2f %.2f %.2f rg %.2f %.2f Td %d Tr (%s) Tj ET ", td.StrokeCol.R, td.StrokeCol.G, td.StrokeCol.B, td.FillCol.R, td.FillCol.G, td.FillCol.B, x, y, td.RMode, s) } @@ -251,7 +251,7 @@ func horAdjustBoundingBoxForLines(r, box *types.Rectangle, dx, dy float64, x, y } func prepJustifiedLine(xRefTable *XRefTable, lines *[]string, strbuf []string, strWidth, w float64, fontSize int, fontName string, embed, rtl bool) { - blank := PrepBytes(xRefTable, " ", fontName, embed, false) + blank := PrepBytes(xRefTable, " ", fontName, embed, true, false) var sb strings.Builder sb.WriteString("[") wc := len(strbuf) @@ -261,7 +261,7 @@ func prepJustifiedLine(xRefTable *XRefTable, lines *[]string, strbuf []string, s if rtl { j = wc - 1 - i } - s := PrepBytes(xRefTable, strbuf[j], fontName, embed, rtl) + s := PrepBytes(xRefTable, strbuf[j], fontName, embed, rtl, false) sb.WriteString(fmt.Sprintf(" (%s)", s)) if i < wc-1 { sb.WriteString(fmt.Sprintf(" %d (%s)", -int(dx), blank)) @@ -294,7 +294,7 @@ func newPrepJustifiedString( if len(s) == 0 { if len(strbuf) > 0 { - s1 := PrepBytes(xRefTable, strings.Join(strbuf, " "), fontName, embed, rtl) + s1 := PrepBytes(xRefTable, strings.Join(strbuf, " "), fontName, embed, rtl, false) if rtl { dx := font.GlyphSpaceUnits(w-strWidth, *fontSize) s = fmt.Sprintf("[ %d (%s) ] TJ ", -int(dx), s1) diff --git a/pkg/pdfcpu/model/version.go b/pkg/pdfcpu/model/version.go index af18636a..2b997b46 100644 --- a/pkg/pdfcpu/model/version.go +++ b/pkg/pdfcpu/model/version.go @@ -18,12 +18,14 @@ package model import ( "fmt" + "strings" + "github.com/angel-one/pdfcpu/pkg/log" "github.com/pkg/errors" ) // VersionStr is the current pdfcpu version. -var VersionStr = "v0.7.0 dev" +var VersionStr = "v0.11.0 dev" // Version is a type for the internal representation of PDF versions. type Version int @@ -42,7 +44,6 @@ const ( // PDFVersion returns the PDFVersion for a version string. func PDFVersion(versionStr string) (Version, error) { - switch versionStr { case "1.0": return V10, nil @@ -63,7 +64,14 @@ func PDFVersion(versionStr string) (Version, error) { case "2.0": return V20, nil } + return -1, errors.New(versionStr) +} +func PDFVersionRelaxed(versionStr string) (Version, error) { + switch versionStr { + case "1.7.0": + return V17, nil + } return -1, errors.New(versionStr) } @@ -74,3 +82,34 @@ func (v Version) String() string { } return "1." + fmt.Sprintf("%d", v) } + +func identicalMajorAndMinorVersions(v1, v2 string) bool { + ss1 := strings.Split(v1, ".") + if len(ss1) < 2 { + return false + } + + ss2 := strings.Split(v2, ".") + if len(ss2) < 2 { + return false + } + + return ss1[0] == ss2[0] && ss1[1] == ss2[1] +} + +// CheckConfigVersion prints a warning if the configuration is outdated. +func CheckConfigVersion(v string) { + + if identicalMajorAndMinorVersions(v, VersionStr) { + return + } + + if log.CLIEnabled() { + log.CLI.Println(` +**************************** WARNING **************************** +* Your configuration is not based on the current major version. * +* Please backup and then reset your configuration: * +* $ pdfcpu config reset * +*****************************************************************`) + } +} diff --git a/pkg/pdfcpu/model/watermark.go b/pkg/pdfcpu/model/watermark.go index 5fa46291..62100101 100644 --- a/pkg/pdfcpu/model/watermark.go +++ b/pkg/pdfcpu/model/watermark.go @@ -67,7 +67,7 @@ type Watermark struct { InpUnit types.DisplayUnit // input display unit. Pos types.Anchor // position anchor, one of tl,tc,tr,l,c,r,bl,bc,br. Dx, Dy float64 // anchor offset. - HAlign *types.HAlignment // horizonal alignment for text watermarks. + HAlign *types.HAlignment // horizontal alignment for text watermarks. FontName string // supported are Adobe base fonts only. (as of now: Helvetica, Times-Roman, Courier) FontSize int // font scaling factor. ScaledFontSize int // font scaling factor for a specific page diff --git a/pkg/pdfcpu/model/xreftable.go b/pkg/pdfcpu/model/xreftable.go index 70411cf2..a8846b7a 100644 --- a/pkg/pdfcpu/model/xreftable.go +++ b/pkg/pdfcpu/model/xreftable.go @@ -22,6 +22,7 @@ import ( "encoding/hex" "fmt" "io" + "math" "os" "path" "sort" @@ -49,12 +50,14 @@ type XRefTableEntry struct { Free bool Offset *int64 Generation *int + Incr int RefCount int Object types.Object Compressed bool ObjectStream *int ObjectStreamInd *int Valid bool + BeingValidated bool } // NewXRefTableEntryGen0 returns a cross reference table entry for an object with generation 0. @@ -99,11 +102,13 @@ type PgAnnots map[AnnotationType]Annot // XRefTable represents a PDF cross reference table plus stats for a PDF file. type XRefTable struct { Table map[int]*XRefTableEntry - Size *int // Object count from PDF trailer dict. + Size *int // from trailer dict. + MaxObjNr int // after reading in all objects from xRef table. PageCount int // Number of pages. Root *types.IndirectRef // Pointer to catalog (reference to root object). RootDict types.Dict // Catalog Names map[string]*Node // Cache for name trees as found in catalog. + Dests types.Dict // Named destinations NameRefs map[string]NameMap // Name refs for merging only Encrypt *types.IndirectRef // Encrypt dict. E *Enc @@ -117,17 +122,19 @@ type XRefTable struct { RootVersion *Version // Optional PDF version taking precedence over the header version. // Document information section - ID types.Array // from trailer - Info *types.IndirectRef // Infodict (reference to info dict object) - Title string - Subject string - Keywords string - Author string - Creator string - Producer string - CreationDate string - ModDate string - Properties map[string]string + ID types.Array // from trailer + Info *types.IndirectRef // Infodict (reference to info dict object) + Title string + Subject string + Author string + Creator string + Producer string + CreationDate string + ModDate string + Keywords string + KeywordList types.StringSet + Properties map[string]string + CatalogXMPMeta *XMPMeta PageLayout *PageLayout PageMode *PageMode @@ -144,13 +151,20 @@ type XRefTable struct { // Thumbnail images PageThumbs map[int]types.IndirectRef + Signatures map[int]map[int]Signature // form signatures and signatures located via page annotations only keyed by increment #. + URSignature types.Dict // usage rights signature + CertifiedSigObjNr int // authoritative signature + DSS types.Dict // document security store, currently unsupported + DTS time.Time // trusted digital timestamp + // Offspec section AdditionalStreams *types.Array // array of IndirectRef - trailer :e.g., Oasis "Open Doc" // Statistics Stats PDFStats - Tagged bool // File is using tags. This is important for ??? + Tagged bool // File is using tags. + CustomExtensions bool // File is using custom extensions for annotations and/or keywords. // Validation CurPage int // current page during validation @@ -169,24 +183,29 @@ type XRefTable struct { AppendOnly bool // Fonts - UsedGIDs map[string]map[uint16]bool + UsedGIDs map[string]map[uint16]bool + FillFonts map[string]types.IndirectRef } // NewXRefTable creates a new XRefTable. +// TODO Export func newXRefTable(conf *Configuration) (xRefTable *XRefTable) { return &XRefTable{ Table: map[int]*XRefTableEntry{}, Names: map[string]*Node{}, NameRefs: map[string]NameMap{}, + KeywordList: types.StringSet{}, Properties: map[string]string{}, LinearizationObjs: types.IntSet{}, PageAnnots: map[int]PgAnnots{}, PageThumbs: map[int]types.IndirectRef{}, + Signatures: map[int]map[int]Signature{}, Stats: NewPDFStats(), ValidationMode: conf.ValidationMode, ValidateLinks: conf.ValidateLinks, URIs: map[int]map[string]string{}, UsedGIDs: map[string]map[uint16]bool{}, + FillFonts: map[string]types.IndirectRef{}, Conf: conf, } } @@ -223,7 +242,7 @@ func (xRefTable *XRefTable) ParseRootVersion() (v *string, err error) { // ValidateVersion validates against the xRefTable's version. func (xRefTable *XRefTable) ValidateVersion(element string, sinceVersion Version) error { if xRefTable.Version() < sinceVersion { - return errors.Errorf("%s: unsupported in version %s\n", element, xRefTable.VersionString()) + return errors.Errorf("%s: unsupported in version %s", element, xRefTable.VersionString()) } return nil @@ -419,6 +438,12 @@ func (xRefTable *XRefTable) InsertObject(obj types.Object) (objNr int, err error return xRefTable.InsertNew(*xRefTableEntry), nil } +// IndRefForNewObject inserts object at objNr into the xRefTable and returns an indirect reference to it. +func (xRefTable *XRefTable) IndRefForObject(objNr int, obj types.Object) (*types.IndirectRef, error) { + xRefTable.Table[objNr] = NewXRefTableEntryGen0(obj) + return types.NewIndirectRef(objNr, 0), nil +} + // IndRefForNewObject inserts an object into the xRefTable and returns an indirect reference to it. func (xRefTable *XRefTable) IndRefForNewObject(obj types.Object) (*types.IndirectRef, error) { xRefTableEntry := NewXRefTableEntryGen0(obj) @@ -652,13 +677,13 @@ func (xRefTable *XRefTable) NewFileSpecDict(f, uf, desc string, indRefStreamDict d := types.NewDict() d.InsertName("Type", "Filespec") - s, err := types.EscapeUTF16String(f) + s, err := types.EscapedUTF16String(f) if err != nil { return nil, err } d.InsertString("F", *s) - if s, err = types.EscapeUTF16String(uf); err != nil { + if s, err = types.EscapedUTF16String(uf); err != nil { return nil, err } d.InsertString("UF", *s) @@ -669,7 +694,7 @@ func (xRefTable *XRefTable) NewFileSpecDict(f, uf, desc string, indRefStreamDict d.Insert("EF", efDict) if desc != "" { - if s, err = types.EscapeUTF16String(desc); err != nil { + if s, err = types.EscapedUTF16String(desc); err != nil { return nil, err } d.InsertString("Desc", *s) @@ -889,21 +914,38 @@ func (xRefTable *XRefTable) UndeleteObject(objectNumber int) error { return nil } -// IsValidObj returns true if the object with objNr and genNr is valid. -func (xRefTable *XRefTable) IsValidObj(objNr, genNr int) (bool, error) { +// IsObjValid returns true if the object with objNr and genNr is valid. +func (xRefTable *XRefTable) IsObjValid(objNr, genNr int) (bool, error) { entry, found := xRefTable.FindTableEntry(objNr, genNr) if !found { - return false, errors.Errorf("pdfcpu: IsValid: no entry for obj#%d\n", objNr) + return false, errors.Errorf("pdfcpu: IsObjValid: no entry for obj#%d\n", objNr) } if entry.Free { - return false, errors.Errorf("pdfcpu: IsValid: unexpected free entry for obj#%d\n", objNr) + return false, errors.Errorf("pdfcpu: IsObjValid: unexpected free entry for obj#%d\n", objNr) } return entry.Valid, nil } // IsValid returns true if the object referenced by ir is valid. func (xRefTable *XRefTable) IsValid(ir types.IndirectRef) (bool, error) { - return xRefTable.IsValidObj(ir.ObjectNumber.Value(), ir.GenerationNumber.Value()) + return xRefTable.IsObjValid(ir.ObjectNumber.Value(), ir.GenerationNumber.Value()) +} + +// IsObjBeingValidated returns true if the object with objNr and genNr is being validated. +func (xRefTable *XRefTable) IsObjBeingValidated(objNr, genNr int) (bool, error) { + entry, found := xRefTable.FindTableEntry(objNr, genNr) + if !found { + return false, errors.Errorf("pdfcpu: IsObjBeingValidated: no entry for obj#%d\n", objNr) + } + if entry.Free { + return false, errors.Errorf("pdfcpu: IsObjBeingValidated: unexpected free entry for obj#%d\n", objNr) + } + return entry.BeingValidated, nil +} + +// IsBeingValidated returns true if the object referenced by ir is being validated. +func (xRefTable *XRefTable) IsBeingValidated(ir types.IndirectRef) (bool, error) { + return xRefTable.IsObjBeingValidated(ir.ObjectNumber.Value(), ir.GenerationNumber.Value()) } // SetValid marks the xreftable entry of the object referenced by ir as valid. @@ -916,6 +958,23 @@ func (xRefTable *XRefTable) SetValid(ir types.IndirectRef) error { return errors.Errorf("pdfcpu: SetValid: unexpected free entry for obj#%d\n", ir.ObjectNumber.Value()) } entry.Valid = true + entry.BeingValidated = false + + return nil +} + +// SetBeingValidated marks the xreftable entry of the object referenced by ir as being validated. +func (xRefTable *XRefTable) SetBeingValidated(ir types.IndirectRef) error { + entry, found := xRefTable.FindTableEntry(ir.ObjectNumber.Value(), ir.GenerationNumber.Value()) + if !found { + return errors.Errorf("pdfcpu: SetBeingValidated: no entry for obj#%d\n", ir.ObjectNumber.Value()) + } + if entry.Free { + return errors.Errorf("pdfcpu: SetBeingValidated: unexpected free entry for obj#%d\n", ir.ObjectNumber.Value()) + } + entry.BeingValidated = true + entry.Valid = false + return nil } @@ -961,7 +1020,10 @@ func (xRefTable *XRefTable) DereferenceXObjectDict(indRef types.IndirectRef) (*t } subType := sd.Dict.Subtype() - if subType == nil { + if subType == nil || len(*subType) == 0 { + if xRefTable.ValidationMode == ValidationRelaxed { + return sd, nil + } return nil, errors.Errorf("pdfcpu: DereferenceXObjectDict: missing stream dict Subtype %s\n", indRef) } @@ -982,7 +1044,7 @@ func (xRefTable *XRefTable) Catalog() (types.Dict, error) { return nil, errors.New("pdfcpu: Catalog: missing root dict") } - o, err := xRefTable.indRefToObject(xRefTable.Root) + o, _, err := xRefTable.indRefToObject(xRefTable.Root, true) if err != nil || o == nil { return nil, err } @@ -999,7 +1061,7 @@ func (xRefTable *XRefTable) Catalog() (types.Dict, error) { // EncryptDict returns a pointer to the root object / catalog. func (xRefTable *XRefTable) EncryptDict() (types.Dict, error) { - o, err := xRefTable.indRefToObject(xRefTable.Encrypt) + o, _, err := xRefTable.indRefToObject(xRefTable.Encrypt, true) if err != nil || o == nil { return nil, err } @@ -1635,7 +1697,9 @@ func (xRefTable *XRefTable) consolidateResources(obj types.Object, pAttrs *Inher if err != nil { return err } - pAttrs.Resources[k] = o.Clone() + if o != nil { + pAttrs.Resources[k] = o.Clone() + } } if log.WriteEnabled() { log.Write.Printf("pA:\n%s\n", pAttrs.Resources) @@ -1703,11 +1767,23 @@ func (xRefTable *XRefTable) checkInheritedPageAttrs(pageDict types.Dict, pAttrs } if obj, found = pageDict.Find("Rotate"); found { - i, err := xRefTable.DereferenceInteger(obj) + obj, err := xRefTable.Dereference(obj) if err != nil { return err } - pAttrs.Rotate = i.Value() + + switch obj := obj.(type) { + case types.Integer: + pAttrs.Rotate = obj.Value() + case types.Float: + if xRefTable.ValidationMode == ValidationStrict { + return errors.Errorf("pdfcpu: dereferenceNumber: wrong type <%v>", obj) + } + + pAttrs.Rotate = int(math.Round(obj.Value())) + default: + return errors.Errorf("pdfcpu: dereferenceNumber: wrong type <%v>", obj) + } } if obj, found = pageDict.Find("Resources"); !found { @@ -1728,9 +1804,29 @@ func (xRefTable *XRefTable) checkInheritedPageAttrs(pageDict types.Dict, pAttrs return xRefTable.consolidateResources(obj, pAttrs) } +func (xRefTable *XRefTable) decodeContentStream(sd *types.StreamDict, pageNr int) error { + err := sd.Decode() + if err == filter.ErrUnsupportedFilter { + return errors.New("pdfcpu: unsupported filter: unable to decode content") + } + if err != nil { + if xRefTable.ValidationMode == ValidationStrict { + return errors.Errorf("page %d content decode: %v", pageNr, err) + } + if !strings.HasPrefix(err.Error(), "flate: corrupt input before offset") { + return errors.Errorf("page %d content decode: %v", pageNr, err) + } + ShowSkipped(fmt.Sprintf("page %d: corrupt content stream (flate)", pageNr)) + } + return nil +} + // PageContent returns the content in PDF syntax for page dict d. -func (xRefTable *XRefTable) PageContent(d types.Dict) ([]byte, error) { +func (xRefTable *XRefTable) PageContent(d types.Dict, pageNr int) ([]byte, error) { o, _ := d.Find("Contents") + if o == nil { + return nil, ErrNoContent + } o, err := xRefTable.Dereference(o) if err != nil || o == nil { @@ -1743,14 +1839,9 @@ func (xRefTable *XRefTable) PageContent(d types.Dict) ([]byte, error) { case types.StreamDict: // no further processing. - err := o.Decode() - if err == filter.ErrUnsupportedFilter { - return nil, errors.New("pdfcpu: unsupported filter: unable to decode content") - } - if err != nil { + if err := xRefTable.decodeContentStream(&o, pageNr); err != nil { return nil, err } - bb = append(bb, o.Content...) case types.Array: @@ -1761,16 +1852,12 @@ func (xRefTable *XRefTable) PageContent(d types.Dict) ([]byte, error) { } o, _, err := xRefTable.DereferenceStreamDict(o) if err != nil { - return nil, err + return nil, errors.Errorf("page %d content decode: %v", pageNr, err) } if o == nil { continue } - err = o.Decode() - if err == filter.ErrUnsupportedFilter { - return nil, errors.New("pdfcpu: unsupported filter: unable to decode content") - } - if err != nil { + if err := xRefTable.decodeContentStream(o, pageNr); err != nil { return nil, err } bb = append(bb, o.Content...) @@ -1787,7 +1874,7 @@ func (xRefTable *XRefTable) PageContent(d types.Dict) ([]byte, error) { return bb, nil } -func consolidateResourceSubDict(d types.Dict, key string, prn PageResourceNames, pageNr int) error { +func (xRefTable *XRefTable) consolidateResourceSubDict(d types.Dict, key string, prn PageResourceNames, pageNr int) error { o := d[key] if o == nil { if prn.HasResources(key) { @@ -1814,28 +1901,32 @@ func consolidateResourceSubDict(d types.Dict, key string, prn PageResourceNames, // Check for missing resource sub dict entries. for k := range res { if !set[k] { - return errors.Errorf("pdfcpu: page %d: missing required %s: %s", pageNr, key, k) + s := fmt.Sprintf("page %d: missing required %s: %s", pageNr, key, k) + if xRefTable.ValidationMode == ValidationStrict { + return errors.New("pdfcpu: " + s) + } + ShowSkipped(s) } } d[key] = d1 return nil } -func consolidateResourceDict(d types.Dict, prn PageResourceNames, pageNr int) error { +func (xRefTable *XRefTable) consolidateResourceDict(d types.Dict, prn PageResourceNames, pageNr int) error { for k := range resourceTypes { - if err := consolidateResourceSubDict(d, k, prn, pageNr); err != nil { + if err := xRefTable.consolidateResourceSubDict(d, k, prn, pageNr); err != nil { return err } } return nil } -func (xRefTable *XRefTable) consolidateResourcesWithContent(pageDict, resDict types.Dict, page int, consolidateRes bool) error { +func (xRefTable *XRefTable) consolidateResourcesWithContent(pageDict, resDict types.Dict, pageNr int, consolidateRes bool) error { if !consolidateRes { return nil } - bb, err := xRefTable.PageContent(pageDict) + bb, err := xRefTable.PageContent(pageDict, pageNr) if err != nil { if err == ErrNoContent { return nil @@ -1849,11 +1940,52 @@ func (xRefTable *XRefTable) consolidateResourcesWithContent(pageDict, resDict ty return err } - // Compare required resouces (prn) with available resources (pAttrs.resources). + // Compare required resources (prn) with available resources (pAttrs.resources). // Remove any resource that's not required. // Return an error for any required resource missing. // TODO Calculate and accumulate resources required by content streams of any present form or type 3 fonts. - return consolidateResourceDict(resDict, prn, page) + return xRefTable.consolidateResourceDict(resDict, prn, pageNr) +} + +func (xRefTable *XRefTable) pageObjType(indRef types.IndirectRef) (string, error) { + + pageNodeDict, err := xRefTable.DereferenceDict(indRef) + if err != nil { + return "", err + } + + if t := pageNodeDict.Type(); t != nil { + return *t, nil + } + + objType := "" + + if xRefTable.ValidationMode == ValidationRelaxed { + if _, hasCount := pageNodeDict.Find("Count"); hasCount { + if _, hasKids := pageNodeDict.Find("Kids"); hasKids { + ShowRepaired(fmt.Sprintf("page tree node %s", indRef)) + objType = "Pages" + } + } + } + + return objType, nil +} + +func errForUnexpectedPageObjectType(validationMode int, objType string, indRef types.IndirectRef) error { + if validationMode == ValidationRelaxed { + return nil + } + + if objType == "Template" { + return errors.Errorf("Template page tree nodes not supported: %s", indRef) + } + + if objType == "" { + return errors.Errorf("page tree node without type: %s", indRef) + } + + return errors.Errorf("unsupported page tree node: %s", indRef) } func (xRefTable *XRefTable) processPageTreeForPageDict(root *types.IndirectRef, pAttrs *InheritedPageAttrs, p *int, page int, consolidateRes bool) (types.Dict, *types.IndirectRef, error) { @@ -1891,18 +2023,19 @@ func (xRefTable *XRefTable) processPageTreeForPageDict(root *types.IndirectRef, continue } - // Dereference next page node dict. + // Process next page node dict. + indRef, ok := o.(types.IndirectRef) if !ok { return nil, nil, errors.Errorf("pdfcpu: processPageTreeForPageDict: corrupt page node dict") } - pageNodeDict, err := xRefTable.DereferenceDict(indRef) + objType, err := xRefTable.pageObjType(indRef) if err != nil { return nil, nil, err } - switch *pageNodeDict.Type() { + switch objType { case "Pages": // Recurse over sub pagetree. @@ -1920,6 +2053,9 @@ func (xRefTable *XRefTable) processPageTreeForPageDict(root *types.IndirectRef, return xRefTable.processPageTreeForPageDict(&indRef, pAttrs, p, page, consolidateRes) } + default: + return nil, nil, errForUnexpectedPageObjectType(xRefTable.ValidationMode, objType, indRef) + } } @@ -1935,7 +2071,7 @@ func (xRefTable *XRefTable) PageDict(pageNr int, consolidateRes bool) (types.Dic pageCount int ) - if pageNr < 0 || pageNr > xRefTable.PageCount { + if pageNr <= 0 || pageNr > xRefTable.PageCount { return nil, nil, nil, errors.New("pdfcpu: page not found") } @@ -1945,6 +2081,10 @@ func (xRefTable *XRefTable) PageDict(pageNr int, consolidateRes bool) (types.Dic return nil, nil, nil, err } + if consolidateRes { + consolidateRes = xRefTable.Conf.OptimizeResourceDicts + } + // Calculate and return only resources that are really needed by // any content stream of this page and any possible forms or type 3 fonts referenced. pageDict, pageDictindRef, err := xRefTable.processPageTreeForPageDict(pageRootDictIndRef, &inhPAttrs, &pageCount, pageNr, consolidateRes) @@ -2221,11 +2361,22 @@ func (xRefTable *XRefTable) collectPageBoundariesForPageTree( } if obj, found := d.Find("Rotate"); found { - i, err := xRefTable.DereferenceInteger(obj) - if err != nil { + if obj, err = xRefTable.Dereference(obj); err != nil { return err } - r = i.Value() + + switch obj := obj.(type) { + case types.Integer: + r = obj.Value() + case types.Float: + if xRefTable.ValidationMode == ValidationStrict { + return errors.Errorf("pdfcpu: dereferenceNumber: wrong type <%v>", obj) + } + + r = int(math.Round(obj.Value())) + default: + return errors.Errorf("pdfcpu: dereferenceNumber: wrong type <%v>", obj) + } } if err := xRefTable.collectMediaBoxAndCropBox(d, inhMediaBox, inhCropBox); err != nil { @@ -2289,13 +2440,18 @@ func (xRefTable *XRefTable) PageDims() ([]types.Dim, error) { return dims, nil } -func (xRefTable *XRefTable) EmptyPage(parentIndRef *types.IndirectRef, mediaBox *types.Rectangle) (*types.IndirectRef, error) { +func (xRefTable *XRefTable) EmptyPage(parentIndRef *types.IndirectRef, mediaBox *types.Rectangle, objNr int) (*types.IndirectRef, error) { sd, _ := xRefTable.NewStreamDictForBuf(nil) if err := sd.Encode(); err != nil { return nil, err } + arr := types.RectForFormat("A4").Array() + if mediaBox != nil { + arr = mediaBox.Array() + } + contentsIndRef, err := xRefTable.IndRefForNewObject(*sd) if err != nil { return nil, err @@ -2306,11 +2462,15 @@ func (xRefTable *XRefTable) EmptyPage(parentIndRef *types.IndirectRef, mediaBox "Type": types.Name("Page"), "Parent": *parentIndRef, "Resources": types.NewDict(), - "MediaBox": mediaBox.Array(), + "MediaBox": arr, "Contents": *contentsIndRef, }, ) + if objNr > 0 { + return xRefTable.IndRefForObject(objNr, pageDict) + } + return xRefTable.IndRefForNewObject(pageDict) } @@ -2328,7 +2488,12 @@ func (xRefTable *XRefTable) pageMediaBox(d types.Dict) (*types.Rectangle, error) return rect(xRefTable, a) } -func (xRefTable *XRefTable) emptyPage(parent *types.IndirectRef, d types.Dict, pAttrs *InheritedPageAttrs) (*types.IndirectRef, error) { +func (xRefTable *XRefTable) emptyPage(parent *types.IndirectRef, d types.Dict, dim *types.Dim, pAttrs *InheritedPageAttrs) (*types.IndirectRef, error) { + // TODO cache empty page + if dim != nil { + return xRefTable.EmptyPage(parent, types.RectForDim(dim.Width, dim.Height), 0) + } + mediaBox, err := pAttrs.MediaBox, error(nil) if mediaBox == nil { mediaBox, err = xRefTable.pageMediaBox(d) @@ -2337,11 +2502,16 @@ func (xRefTable *XRefTable) emptyPage(parent *types.IndirectRef, d types.Dict, p } } - // TODO cache empty page - return xRefTable.EmptyPage(parent, mediaBox) + return xRefTable.EmptyPage(parent, mediaBox, 0) } -func (xRefTable *XRefTable) insertBlankPages(parent *types.IndirectRef, pAttrs *InheritedPageAttrs, p *int, selectedPages types.IntSet, before bool) (int, error) { +func (xRefTable *XRefTable) insertBlankPages( + parent *types.IndirectRef, + pAttrs *InheritedPageAttrs, + p *int, selectedPages types.IntSet, + dim *types.Dim, + before bool) (int, error) { + d, err := xRefTable.DereferenceDict(*parent) if err != nil { return 0, err @@ -2381,7 +2551,7 @@ func (xRefTable *XRefTable) insertBlankPages(parent *types.IndirectRef, pAttrs * case "Pages": // Recurse over sub pagetree. - j, err := xRefTable.insertBlankPages(&ir, pAttrs, p, selectedPages, before) + j, err := xRefTable.insertBlankPages(&ir, pAttrs, p, selectedPages, dim, before) if err != nil { return 0, err } @@ -2396,7 +2566,7 @@ func (xRefTable *XRefTable) insertBlankPages(parent *types.IndirectRef, pAttrs * } if selectedPages[*p] { // Insert empty page. - indRef, err := xRefTable.emptyPage(parent, pageNodeDict, pAttrs) + indRef, err := xRefTable.emptyPage(parent, pageNodeDict, dim, pAttrs) if err != nil { return 0, err } @@ -2419,7 +2589,7 @@ func (xRefTable *XRefTable) insertBlankPages(parent *types.IndirectRef, pAttrs * } // InsertBlankPages inserts a blank page before or after each selected page. -func (xRefTable *XRefTable) InsertBlankPages(pages types.IntSet, before bool) error { +func (xRefTable *XRefTable) InsertBlankPages(pages types.IntSet, dim *types.Dim, before bool) error { root, err := xRefTable.Pages() if err != nil { return err @@ -2428,7 +2598,7 @@ func (xRefTable *XRefTable) InsertBlankPages(pages types.IntSet, before bool) er var inhPAttrs InheritedPageAttrs p := 0 - _, err = xRefTable.insertBlankPages(root, &inhPAttrs, &p, pages, before) + _, err = xRefTable.insertBlankPages(root, &inhPAttrs, &p, pages, dim, before) return err } @@ -2639,14 +2809,12 @@ func (xRefTable *XRefTable) AppendContent(pageDict types.Dict, bb []byte) error switch o := obj.(type) { case types.StreamDict: - if err := appendToContentStream(&o, bb); err != nil { return err } entry.Object = o case types.Array: - // Get stream dict for last array element. o1 := o[len(o)-1] indRef, _ = o1.(types.IndirectRef) @@ -2654,15 +2822,13 @@ func (xRefTable *XRefTable) AppendContent(pageDict types.Dict, bb []byte) error genNr := indRef.GenerationNumber.Value() entry, _ = xRefTable.FindTableEntry(objNr, genNr) sd, _ := (entry.Object).(types.StreamDict) - if err := appendToContentStream(&sd, bb); err != nil { return err } - entry.Object = o + entry.Object = sd default: return errors.Errorf("pdfcpu: corrupt page \"Content\"") - } return nil diff --git a/pkg/pdfcpu/nup.go b/pkg/pdfcpu/nup.go index 9711bb43..22cc8df3 100644 --- a/pkg/pdfcpu/nup.go +++ b/pkg/pdfcpu/nup.go @@ -70,6 +70,7 @@ var nupParamMap = nUpParamMap{ "foliosize": parseBookletFolioSize, "btype": parseBookletType, "binding": parseBookletBinding, + "enforce": parseEnforce, } // Handle applies parameter completion and if successful @@ -108,7 +109,7 @@ func parseDimensionsNUp(s string, nup *model.NUp) (err error) { if nup.UserDim { return errors.New("pdfcpu: only one of formsize(papersize) or dimensions allowed") } - nup.PageDim, nup.PageSize, err = parsePageDim(s, nup.InpUnit) + nup.PageDim, nup.PageSize, err = ParsePageDim(s, nup.InpUnit) nup.UserDim = true return err @@ -131,6 +132,19 @@ func parseOrientation(s string, nup *model.NUp) error { return nil } +func parseEnforce(s string, nup *model.NUp) error { + switch strings.ToLower(s) { + case "on", "true", "t": + nup.Enforce = true + case "off", "false", "f": + nup.Enforce = false + default: + return errors.New("pdfcpu: enforce best-fit orientation of content, please provide one of: on/off true/false") + } + + return nil +} + func parseElementBorder(s string, nup *model.NUp) error { switch strings.ToLower(s) { case "on", "true", "t": @@ -408,7 +422,7 @@ func ParseNUpGridDefinition(rows, cols int, nUp *model.NUp) error { func nUpImagePDFBytes(w io.Writer, imgWidth, imgHeight int, nup *model.NUp, formResID string) { for _, r := range nup.RectsForGrid() { // Append to content stream. - model.NUpTilePDFBytes(w, types.RectForDim(float64(imgWidth), float64(imgHeight)), r, formResID, nup, false, true) + model.NUpTilePDFBytes(w, types.RectForDim(float64(imgWidth), float64(imgHeight)), r, formResID, nup, false) } } @@ -463,7 +477,7 @@ func NewNUpPageForImage(xRefTable *model.XRefTable, fileName string, parentIndRe defer f.Close() // create image dict. - imgIndRef, w, h, err := model.CreateImageResource(xRefTable, f, false, false) + imgIndRef, w, h, err := model.CreateImageResource(xRefTable, f) if err != nil { return nil, err } @@ -725,7 +739,7 @@ func NUpFromMultipleImages(ctx *model.Context, fileNames []string, nup *model.NU return err } - imgIndRef, w, h, err := model.CreateImageResource(xRefTable, f, false, false) + imgIndRef, w, h, err := model.CreateImageResource(xRefTable, f) if err != nil { return err } @@ -743,7 +757,7 @@ func NUpFromMultipleImages(ctx *model.Context, fileNames []string, nup *model.NU formsResDict.Insert(formResID, *formIndRef) // Append to content stream of page i. - model.NUpTilePDFBytes(&buf, types.RectForDim(float64(w), float64(h)), rr[i%len(rr)], formResID, nup, false, true) + model.NUpTilePDFBytes(&buf, types.RectForDim(float64(w), float64(h)), rr[i%len(rr)], formResID, nup, false) } // Wrap incomplete nUp page. diff --git a/pkg/pdfcpu/optimize.go b/pkg/pdfcpu/optimize.go index bbd5cef8..91f51dd4 100644 --- a/pkg/pdfcpu/optimize.go +++ b/pkg/pdfcpu/optimize.go @@ -18,7 +18,9 @@ package pdfcpu import ( "bytes" + "fmt" "sort" + "strings" "github.com/angel-one/pdfcpu/pkg/log" pdffont "github.com/angel-one/pdfcpu/pkg/pdfcpu/font" @@ -63,22 +65,89 @@ func optimizeContentStreamUsage(ctx *model.Context, sd *types.StreamDict, objNr return nil, nil } +func removeEmptyContentStreams(ctx *model.Context, pageDict types.Dict, obj types.Object, pageObjNumber int) error { + var contentArr types.Array + + if ir, ok := obj.(types.IndirectRef); ok { + + objNr := ir.ObjectNumber.Value() + entry, found := ctx.FindTableEntry(objNr, ir.GenerationNumber.Value()) + if !found { + return errors.Errorf("removeEmptyContentStreams: obj#:%d illegal indRef for Contents\n", pageObjNumber) + } + + contentStreamDict, ok := entry.Object.(types.StreamDict) + if ok { + if err := contentStreamDict.Decode(); err != nil { + return errors.Errorf("invalid content stream obj#%d: %v", pageObjNumber, err) + } + if len(contentStreamDict.Content) == 0 { + pageDict.Delete("Contents") + } + return nil + } + + contentArr, ok = entry.Object.(types.Array) + if !ok { + return errors.Errorf("removeEmptyContentStreams: obj#:%d page content entry neither stream dict nor array.\n", pageObjNumber) + } + + } else if contentArr, ok = obj.(types.Array); !ok { + return errors.Errorf("removeEmptyContentStreams: obj#:%d corrupt page content array", pageObjNumber) + } + + var newContentArr types.Array + + for _, c := range contentArr { + + ir, ok := c.(types.IndirectRef) + if !ok { + return errors.Errorf("removeEmptyContentStreams: obj#:%d corrupt page content array entry\n", pageObjNumber) + } + + objNr := ir.ObjectNumber.Value() + entry, found := ctx.FindTableEntry(objNr, ir.GenerationNumber.Value()) + if !found { + return errors.Errorf("removeEmptyContentStreams: obj#:%d illegal indRef for Contents\n", pageObjNumber) + } + + contentStreamDict, ok := entry.Object.(types.StreamDict) + if !ok { + return errors.Errorf("identifyPageContent: obj#:%d page content entry is no stream dict\n", pageObjNumber) + } + + if err := contentStreamDict.Decode(); err != nil { + return err + } + if len(contentStreamDict.Content) > 0 { + newContentArr = append(newContentArr, c) + } + } + + pageDict["Contents"] = newContentArr + + return nil +} + func optimizePageContent(ctx *model.Context, pageDict types.Dict, pageObjNumber int) error { - if !ctx.OptimizeDuplicateContentStreams { + o, found := pageDict.Find("Contents") + if !found { return nil } - if log.OptimizeEnabled() { - log.Optimize.Println("identifyPageContent begin") + + if err := removeEmptyContentStreams(ctx, pageDict, o, pageObjNumber); err != nil { + return err } - o, found := pageDict.Find("Contents") + o, found = pageDict.Find("Contents") if !found { - if log.OptimizeEnabled() { - log.Optimize.Println("identifyPageContent end: no \"Contents\"") - } return nil } + if log.OptimizeEnabled() { + log.Optimize.Println("identifyPageContent begin") + } + var contentArr types.Array if ir, ok := o.(types.IndirectRef); ok { @@ -115,7 +184,7 @@ func optimizePageContent(ctx *model.Context, pageDict types.Dict, pageObjNumber return errors.Errorf("identifyPageContent: obj#:%d corrupt page content array\n", pageObjNumber) } - // TODO Activate content array opimization as soon as we have a proper test file. + // TODO Activate content array optimization as soon as we have a proper test file. _ = contentArr @@ -171,7 +240,7 @@ func resourcesDictForPageDict(xRefTable *model.XRefTable, pageDict types.Dict, p } // handleDuplicateFontObject returns nil or the object number of the registered font if it matches this font. -func handleDuplicateFontObject(ctx *model.Context, fontDict types.Dict, fName, rName string, objNr, pageNumber int) (*int, error) { +func handleDuplicateFontObject(ctx *model.Context, fontDict types.Dict, fName, rName string, objNr, pageNr int) (*int, error) { // Get a slice of all font object numbers for font name. fontObjNrs, found := ctx.Optimize.Fonts[fName] if !found { @@ -179,13 +248,17 @@ func handleDuplicateFontObject(ctx *model.Context, fontDict types.Dict, fName, r return nil, nil } - // Get the set of font object numbers for pageNumber. - pageFonts := ctx.Optimize.PageFonts[pageNumber] + // Get the set of font object numbers for pageNr. + pageFonts := ctx.Optimize.PageFonts[pageNr] // Iterate over all registered font object numbers for font name. // Check if this font dict matches the font dict of each font object number. for _, fontObjNr := range fontObjNrs { + if fontObjNr == objNr { + continue + } + // Get the font object from the lookup table. fontObject, ok := ctx.Optimize.FontObjects[fontObjNr] if !ok { @@ -212,7 +285,7 @@ func handleDuplicateFontObject(ctx *model.Context, fontDict types.Dict, fName, r log.Optimize.Printf("handleDuplicateFontObject: redundant fontObj#:%d basefont %s already registered with obj#:%d !\n", objNr, fName, fontObjNr) } - // Register new page font with pageNumber. + // Register new page font with pageNr. // The font for font object number is used instead of objNr. pageFonts[fontObjNr] = true @@ -229,21 +302,21 @@ func handleDuplicateFontObject(ctx *model.Context, fontDict types.Dict, fName, r return nil, nil } -func pageImages(ctx *model.Context, pageNumber int) types.IntSet { - pageImages := ctx.Optimize.PageImages[pageNumber] +func pageImages(ctx *model.Context, pageNr int) types.IntSet { + pageImages := ctx.Optimize.PageImages[pageNr] if pageImages == nil { pageImages = types.IntSet{} - ctx.Optimize.PageImages[pageNumber] = pageImages + ctx.Optimize.PageImages[pageNr] = pageImages } return pageImages } -func pageFonts(ctx *model.Context, pageNumber int) types.IntSet { - pageFonts := ctx.Optimize.PageFonts[pageNumber] +func pageFonts(ctx *model.Context, pageNr int) types.IntSet { + pageFonts := ctx.Optimize.PageFonts[pageNr] if pageFonts == nil { pageFonts = types.IntSet{} - ctx.Optimize.PageFonts[pageNumber] = pageFonts + ctx.Optimize.PageFonts[pageNr] = pageFonts } return pageFonts @@ -265,17 +338,36 @@ func registerFontDictObjNr(ctx *model.Context, fName string, objNr int) { } } -// Get rid of redundant fonts for given fontResources dictionary. -func optimizeFontResourcesDict(ctx *model.Context, rDict types.Dict, pageNumber, pageObjNumber int) error { - if log.OptimizeEnabled() { - log.Optimize.Printf("optimizeFontResourcesDict begin: page=%d pageObjNumber=%d %s\nPageFonts=%v\n", pageNumber, pageObjNumber, rDict, ctx.Optimize.PageFonts) +func checkForEmbeddedFont(ctx *model.Context) bool { + return log.StatsEnabled() || ctx.Cmd == model.LISTINFO || ctx.Cmd == model.EXTRACTFONTS +} + +func qualifiedRName(rNamePrefix, rName string) string { + s := rName + if rNamePrefix != "" { + s = rNamePrefix + "." + rName } + return s +} + +// Get rid of redundant fonts for given fontResources dictionary. +func optimizeFontResourcesDict(ctx *model.Context, rDict types.Dict, pageNr int, rNamePrefix string) error { + pageFonts := pageFonts(ctx, pageNr) - pageFonts := pageFonts(ctx, pageNumber) + recordedCorrupt := false // Iterate over font resource dict. for rName, v := range rDict { + if v == nil { + if !recordedCorrupt { + // fontId with missing fontDict indRef. + ctx.Optimize.CorruptFontResDicts = append(ctx.Optimize.CorruptFontResDicts, rDict) + recordedCorrupt = true + } + continue + } + indRef, ok := v.(types.IndirectRef) if !ok { continue @@ -283,13 +375,10 @@ func optimizeFontResourcesDict(ctx *model.Context, rDict types.Dict, pageNumber, objNr := int(indRef.ObjectNumber) - if log.OptimizeEnabled() { - log.Optimize.Printf("optimizeFontResourcesDict: processing font: %s, objj#=%d\n", rName, objNr) - } + qualifiedRName := qualifiedRName(rNamePrefix, rName) if _, found := ctx.Optimize.FontObjects[objNr]; found { // This font has already been registered. - //log.Optimize.Printf("optimizeFontResourcesDict: Fontobject %d already registered\n", objectNumber) pageFonts[objNr] = true continue } @@ -297,28 +386,24 @@ func optimizeFontResourcesDict(ctx *model.Context, rDict types.Dict, pageNumber, // We are dealing with a new font. fontDict, err := ctx.DereferenceFontDict(indRef) if err != nil { - return err + if ctx.XRefTable.ValidationMode == model.ValidationStrict { + return err + } + + fontDict = nil } if fontDict == nil { continue } - if log.OptimizeEnabled() { - log.Optimize.Printf("optimizeFontResourcesDict: fontDict: %s\n", fontDict) - } - // Get the unique font name. prefix, fName, err := pdffont.Name(ctx.XRefTable, fontDict, objNr) if err != nil { return err } - if log.OptimizeEnabled() { - log.Optimize.Printf("optimizeFontResourcesDict: baseFont: prefix=%s name=%s\n", prefix, fName) - } - // Check if fontDict is a duplicate and if so return the object number of the original. - originalObjNr, err := handleDuplicateFontObject(ctx, fontDict, fName, rName, objNr, pageNumber) + originalObjNr, err := handleDuplicateFontObject(ctx, fontDict, fName, qualifiedRName, objNr, pageNr) if err != nil { return err } @@ -329,37 +414,69 @@ func optimizeFontResourcesDict(ctx *model.Context, rDict types.Dict, pageNumber, ir := types.NewIndirectRef(*originalObjNr, 0) rDict[rName] = *ir ctx.IncrementRefCount(ir) + if log.OptimizeEnabled() { + log.Optimize.Printf("optimizeFontResourcesDict: redundant fontDict prefix=%s name=%s (objNr#%d -> objNr#%d)\n", prefix, fName, objNr, originalObjNr) + } continue } registerFontDictObjNr(ctx, fName, objNr) - ctx.Optimize.FontObjects[objNr] = - &model.FontObject{ - ResourceNames: []string{rName}, - Prefix: prefix, - FontName: fName, - FontDict: fontDict, + fontObj := model.FontObject{ + ResourceNames: []string{qualifiedRName}, + Prefix: prefix, + FontName: fName, + FontDict: fontDict, + } + + if checkForEmbeddedFont(ctx) { + fontObj.Embedded, err = pdffont.Embedded(ctx.XRefTable, fontDict, objNr) + if err != nil { + return err } + } - pageFonts[objNr] = true - } + ctx.Optimize.FontObjects[objNr] = &fontObj - if log.OptimizeEnabled() { - log.Optimize.Println("optimizeFontResourcesDict end:") + pageFonts[objNr] = true } return nil } // handleDuplicateImageObject returns nil or the object number of the registered image if it matches this image. -func handleDuplicateImageObject(ctx *model.Context, imageDict *types.StreamDict, resourceName string, objNr, pageNumber int) (*int, error) { - // Get the set of image object numbers for pageNumber. - pageImages := ctx.Optimize.PageImages[pageNumber] +func handleDuplicateImageObject(ctx *model.Context, imageDict *types.StreamDict, resourceName string, objNr, pageNr int) (*int, bool, error) { + // Get the set of image object numbers for pageNr. + pageImages := ctx.Optimize.PageImages[pageNr] + + if duplImgObj, ok := ctx.Optimize.DuplicateImages[objNr]; ok { + + newObjNr := duplImgObj.NewObjNr + // We have detected a redundant image dict. + if log.OptimizeEnabled() { + log.Optimize.Printf("handleDuplicateImageObject: redundant imageObj#:%d already registered with obj#:%d !\n", objNr, newObjNr) + } + + // Register new page image for pageNr. + // The image for image object number is used instead of objNr. + pageImages[newObjNr] = true + + // Add the resource name of this duplicate image to the list of registered resource names. + ctx.Optimize.ImageObjects[newObjNr].AddResourceName(pageNr, resourceName) + + // Return the imageObjectNumber that will be used instead of objNr. + return &newObjNr, false, nil + } // Process image dict, check if this is a duplicate. for imageObjNr, imageObject := range ctx.Optimize.ImageObjects { + if imageObjNr == objNr { + // Add the resource name of this duplicate image to the list of registered resource names. + imageObject.AddResourceName(pageNr, resourceName) + return nil, true, nil + } + if log.OptimizeEnabled() { log.Optimize.Printf("handleDuplicateImageObject: comparing with imagedict Obj %d\n", imageObjNr) } @@ -367,7 +484,7 @@ func handleDuplicateImageObject(ctx *model.Context, imageDict *types.StreamDict, // Check if the input imageDict matches the imageDict of this imageObject. ok, err := model.EqualStreamDicts(imageObject.ImageDict, imageDict, ctx.XRefTable) if err != nil { - return nil, err + return nil, false, err } if !ok { @@ -380,37 +497,34 @@ func handleDuplicateImageObject(ctx *model.Context, imageDict *types.StreamDict, log.Optimize.Printf("handleDuplicateImageObject: redundant imageObj#:%d already registered with obj#:%d !\n", objNr, imageObjNr) } - // Register new page image for pageNumber. + // Register new page image for pageNr. // The image for image object number is used instead of objNr. pageImages[imageObjNr] = true // Add the resource name of this duplicate image to the list of registered resource names. - imageObject.AddResourceName(resourceName) + imageObject.AddResourceName(pageNr, resourceName) // Register imageDict as duplicate. - ctx.Optimize.DuplicateImages[objNr] = imageDict + ctx.Optimize.DuplicateImages[objNr] = &model.DuplicateImageObject{ImageDict: imageDict, NewObjNr: imageObjNr} // Return the imageObjectNumber that will be used instead of objNr. - return &imageObjNr, nil + return &imageObjNr, false, nil } - return nil, nil + return nil, false, nil } -func optimizeXObjectImage(ctx *model.Context, osd *types.StreamDict, rName string, objNr, pageNumber int, pageImages types.IntSet) (*types.IndirectRef, error) { +func optimizeXObjectImage(ctx *model.Context, osd *types.StreamDict, rNamePrefix, rName string, rDict types.Dict, objNr, pageNr, pageObjNumber int, pageImages types.IntSet) error { - // Already registered image object that appears in different resources dicts. - if _, found := ctx.Optimize.ImageObjects[objNr]; found { - // This image has already been registered. - //log.Optimize.Printf("optimizeXObjectResourcesDict: Imageobject %d already registered\n", objNr) - pageImages[objNr] = true - return nil, nil + qualifiedRName := rName + if rNamePrefix != "" { + qualifiedRName = rNamePrefix + "." + rName } // Check if image is a duplicate and if so return the object number of the original. - originalObjNr, err := handleDuplicateImageObject(ctx, osd, rName, objNr, pageNumber) + originalObjNr, alreadyDupl, err := handleDuplicateImageObject(ctx, osd, qualifiedRName, objNr, pageNr) if err != nil { - return nil, err + return err } if originalObjNr != nil { @@ -418,22 +532,24 @@ func optimizeXObjectImage(ctx *model.Context, osd *types.StreamDict, rName strin // Update xobject resource dict so that rName points to the original. ir := types.NewIndirectRef(*originalObjNr, 0) ctx.IncrementRefCount(ir) - return ir, nil + rDict[rName] = *ir + if log.OptimizeEnabled() { + log.Optimize.Printf("optimizeXObjectImage: redundant xobject name=%s (objNr#%d -> objNr#%d)\n", qualifiedRName, objNr, originalObjNr) + } + return nil } - // Register new image dict. - if log.OptimizeEnabled() { - log.Optimize.Printf("optimizeXObjectResourcesDict: adding new image obj#%d\n", objNr) + if !alreadyDupl { + // Register new image dict. + ctx.Optimize.ImageObjects[objNr] = + &model.ImageObject{ + ResourceNames: map[int]string{pageNr: qualifiedRName}, + ImageDict: osd, + } } - ctx.Optimize.ImageObjects[objNr] = - &model.ImageObject{ - ResourceNames: []string{rName}, - ImageDict: osd, - } - pageImages[objNr] = true - return nil, nil + return nil } func optimizeXObjectForm(ctx *model.Context, sd *types.StreamDict, objNr int) (*types.IndirectRef, error) { @@ -459,14 +575,14 @@ func optimizeXObjectForm(ctx *model.Context, sd *types.StreamDict, objNr int) (* return nil, nil } - for _, objNr := range cachedObjNrs { - sd1 := f[objNr] + for _, objNr1 := range cachedObjNrs { + sd1 := f[objNr1] ok, err := model.EqualStreamDicts(sd, sd1, ctx.XRefTable) if err != nil { return nil, err } if ok { - ir := types.NewIndirectRef(objNr, 0) + ir := types.NewIndirectRef(objNr1, 0) ctx.IncrementRefCount(ir) return ir, nil } @@ -476,14 +592,14 @@ func optimizeXObjectForm(ctx *model.Context, sd *types.StreamDict, objNr int) (* return nil, nil } -func optimizeFormResources(ctx *model.Context, o types.Object, pageNumber, pageObjNumber int, visitedRes []types.Object) error { +func optimizeFormResources(ctx *model.Context, o types.Object, pageNr, pageObjNumber int, rName string, visitedRes []types.Object) error { d, err := ctx.DereferenceDict(o) if err != nil { return err } if d != nil { // Optimize image and font resources. - if err = optimizeResources(ctx, d, pageNumber, pageObjNumber, visitedRes); err != nil { + if err = optimizeResources(ctx, d, pageNr, pageObjNumber, rName, visitedRes); err != nil { return err } } @@ -499,7 +615,7 @@ func visited(o types.Object, visited []types.Object) bool { return false } -func optimizeForm(ctx *model.Context, osd *types.StreamDict, rName string, rDict types.Dict, objNr, pageNumber, pageObjNumber int, vis []types.Object) error { +func optimizeForm(ctx *model.Context, osd *types.StreamDict, rNamePrefix, rName string, rDict types.Dict, objNr, pageNr, pageObjNumber int, vis []types.Object) error { ir, err := optimizeXObjectForm(ctx, osd, objNr) if err != nil { @@ -524,15 +640,83 @@ func optimizeForm(ctx *model.Context, osd *types.StreamDict, rName string, rDict vis = append(vis, indRef) } - return optimizeFormResources(ctx, o, pageNumber, pageObjNumber, vis) + qualifiedRName := rName + if rNamePrefix != "" { + qualifiedRName = rNamePrefix + "." + rName + } + + return optimizeFormResources(ctx, o, pageNr, pageObjNumber, qualifiedRName, vis) } -func optimizeXObjectResourcesDict(ctx *model.Context, rDict types.Dict, pageNumber, pageObjNumber int, vis []types.Object) error { +func optimizeExtGStateResources(ctx *model.Context, rDict types.Dict, pageNr, pageObjNumber int, rNamePrefix string, vis []types.Object) error { if log.OptimizeEnabled() { - log.Optimize.Printf("optimizeXObjectResourcesDict page#%dbegin: %s\n", pageObjNumber, rDict) + log.Optimize.Printf("optimizeExtGStateResources page#%dbegin: %s\n", pageObjNumber, rDict) + } + + pageImages := pageImages(ctx, pageNr) + + s, found := rDict.Find("SMask") + if found { + dict, ok := s.(types.Dict) + if ok { + if err := optimizeSMaskResources(dict, vis, rNamePrefix, ctx, rDict, pageNr, pageImages, pageObjNumber); err != nil { + return err + } + } } - pageImages := pageImages(ctx, pageNumber) + if log.OptimizeEnabled() { + log.Optimize.Println("optimizeExtGStateResources end") + } + + return nil +} + +func optimizeSMaskResources(dict types.Dict, vis []types.Object, rNamePrefix string, ctx *model.Context, rDict types.Dict, pageNr int, pageImages types.IntSet, pageObjNumber int) error { + indRef := dict.IndirectRefEntry("G") + if indRef == nil { + return nil + } + + if visited(*indRef, vis) { + return nil + } + + vis = append(vis, indRef) + + objNr := int(indRef.ObjectNumber) + + if log.OptimizeEnabled() { + log.Optimize.Printf("optimizeSMaskResources: processing \"G\", obj#=%d\n", objNr) + } + + sd, err := ctx.DereferenceXObjectDict(*indRef) + if err != nil { + return err + } + if sd == nil { + return nil + } + + if *sd.Subtype() == "Image" { + if err := optimizeXObjectImage(ctx, sd, rNamePrefix, "G", rDict, objNr, pageNr, pageObjNumber, pageImages); err != nil { + return err + } + } + + if *sd.Subtype() == "Form" { + if err := optimizeForm(ctx, sd, rNamePrefix, "G", rDict, objNr, pageNr, pageObjNumber, vis); err != nil { + return err + } + } + + return nil +} + +func optimizeExtGStateResourcesDict(ctx *model.Context, rDict types.Dict, pageNr, pageObjNumber int, rNamePrefix string, vis []types.Object) error { + if log.OptimizeEnabled() { + log.Optimize.Printf("optimizeExtGStateResourcesDict page#%dbegin: %s\n", pageObjNumber, rDict) + } for rName, v := range rDict { @@ -544,42 +728,94 @@ func optimizeXObjectResourcesDict(ctx *model.Context, rDict types.Dict, pageNumb if visited(indRef, vis) { continue } + vis = append(vis, indRef) objNr := int(indRef.ObjectNumber) + qualifiedRName := rName + if rNamePrefix != "" { + qualifiedRName = rNamePrefix + "." + rName + } + if log.OptimizeEnabled() { - log.Optimize.Printf("optimizeXObjectResourcesDict: processing XObject: %s, obj#=%d\n", rName, objNr) + log.Optimize.Printf("optimizeExtGStateResourcesDict: processing XObject: %s, obj#=%d\n", qualifiedRName, objNr) } - sd, err := ctx.DereferenceXObjectDict(indRef) + rDict, err := ctx.DereferenceDict(indRef) if err != nil { + continue + } + if rDict == nil { + continue + } + + if err := optimizeExtGStateResources(ctx, rDict, pageNr, pageObjNumber, qualifiedRName, vis); err != nil { return err } - if sd == nil { + + } + + if log.OptimizeEnabled() { + log.Optimize.Println("optimizeXObjectResourcesDict end") + } + + return nil +} + +func optimizeXObjectResourcesDict(ctx *model.Context, rDict types.Dict, pageNr, pageObjNumber int, rNamePrefix string, vis []types.Object) error { + if log.OptimizeEnabled() { + log.Optimize.Printf("optimizeXObjectResourcesDict page#%dbegin: %s\n", pageObjNumber, rDict) + } + + pageImages := pageImages(ctx, pageNr) + + for rName, v := range rDict { + + indRef, ok := v.(types.IndirectRef) + if !ok { + continue + } + + if visited(indRef, vis) { continue } + vis = append(vis, indRef) + + objNr := int(indRef.ObjectNumber) + + qualifiedRName := rName + if rNamePrefix != "" { + qualifiedRName = rNamePrefix + "." + rName + } + if log.OptimizeEnabled() { - log.Optimize.Printf("optimizeXObjectResourcesDict: dereferenced obj:%d\n%s", objNr, sd) + log.Optimize.Printf("optimizeXObjectResourcesDict: processing XObject: %s, obj#=%d\n", qualifiedRName, objNr) } - if *sd.Dict.Subtype() == "Image" { - ir, err := optimizeXObjectImage(ctx, sd, rName, objNr, pageNumber, pageImages) - if err != nil { + sd, err := ctx.DereferenceXObjectDict(indRef) + if err != nil { + return err + } + if sd == nil { + continue + } + + if *sd.Subtype() == "Image" { + if err := optimizeXObjectImage(ctx, sd, rNamePrefix, rName, rDict, objNr, pageNr, pageObjNumber, pageImages); err != nil { return err } - if ir != nil { - rDict[rName] = *ir - } - continue } if *sd.Subtype() == "Form" { - if err := optimizeForm(ctx, sd, rName, rDict, objNr, pageNumber, pageObjNumber, vis); err != nil { + // Get rid of PieceInfo dict from form XObjects. + if err := ctx.DeleteDictEntry(sd.Dict, "PieceInfo"); err != nil { + return err + } + if err := optimizeForm(ctx, sd, rNamePrefix, rName, rDict, objNr, pageNr, pageObjNumber, vis); err != nil { return err } - continue } } @@ -591,10 +827,49 @@ func optimizeXObjectResourcesDict(ctx *model.Context, rDict types.Dict, pageNumb return nil } +func processFontResources(ctx *model.Context, obj types.Object, pageNr, pageObjNumber int, rNamePrefix string) error { + d, err := ctx.DereferenceDict(obj) + if err != nil { + return err + } + + if d == nil { + return errors.Errorf("pdfcpu: processFontResources: font resource dict is null for page %d pageObj %d\n", pageNr, pageObjNumber) + } + + return optimizeFontResourcesDict(ctx, d, pageNr, rNamePrefix) +} + +func processXObjectResources(ctx *model.Context, obj types.Object, pageNr, pageObjNumber int, rNamePrefix string, visitedRes []types.Object) error { + d, err := ctx.DereferenceDict(obj) + if err != nil { + return err + } + + if d == nil { + return errors.Errorf("pdfcpu: processXObjectResources: xObject resource dict is null for page %d pageObj %d\n", pageNr, pageObjNumber) + } + + return optimizeXObjectResourcesDict(ctx, d, pageNr, pageObjNumber, rNamePrefix, visitedRes) +} + +func processExtGStateResources(ctx *model.Context, obj types.Object, pageNr, pageObjNumber int, rNamePrefix string, visitedRes []types.Object) error { + d, err := ctx.DereferenceDict(obj) + if err != nil { + return err + } + + if d == nil { + return errors.Errorf("pdfcpu: processExtGStateResources: extGState resource dict is null for page %d pageObj %d\n", pageNr, pageObjNumber) + } + + return optimizeExtGStateResourcesDict(ctx, d, pageNr, pageObjNumber, rNamePrefix, visitedRes) +} + // Optimize given resource dictionary by removing redundant fonts and images. -func optimizeResources(ctx *model.Context, resourcesDict types.Dict, pageNumber, pageObjNumber int, visitedRes []types.Object) error { +func optimizeResources(ctx *model.Context, resourcesDict types.Dict, pageNr, pageObjNumber int, rNamePrefix string, visitedRes []types.Object) error { if log.OptimizeEnabled() { - log.Optimize.Printf("optimizeResources begin: pageNumber=%d pageObjNumber=%d\n", pageNumber, pageObjNumber) + log.Optimize.Printf("optimizeResources begin: pageNr=%d pageObjNumber=%d\n", pageNr, pageObjNumber) } if resourcesDict == nil { @@ -604,44 +879,28 @@ func optimizeResources(ctx *model.Context, resourcesDict types.Dict, pageNumber, return nil } - // Process Font resource dict, get rid of redundant fonts. - o, found := resourcesDict.Find("Font") + obj, found := resourcesDict.Find("Font") if found { - - d, err := ctx.DereferenceDict(o) - if err != nil { - return err - } - - if d == nil { - return errors.Errorf("pdfcpu: optimizeResources: font resource dict is null for page %d pageObj %d\n", pageNumber, pageObjNumber) - } - - if err = optimizeFontResourcesDict(ctx, d, pageNumber, pageObjNumber); err != nil { + // Process Font resource dict, get rid of redundant fonts. + if err := processFontResources(ctx, obj, pageNr, pageObjNumber, rNamePrefix); err != nil { return err } - } - // Note: An optional ExtGState resource dict may contain binary content in the following entries: "SMask", "HT". - - // Process XObject resource dict, get rid of redundant images. - o, found = resourcesDict.Find("XObject") + obj, found = resourcesDict.Find("XObject") if found { - - d, err := ctx.DereferenceDict(o) - if err != nil { + // Process XObject resource dict, get rid of redundant images. + if err := processXObjectResources(ctx, obj, pageNr, pageObjNumber, rNamePrefix, visitedRes); err != nil { return err } + } - if d == nil { - return errors.Errorf("pdfcpu: optimizeResources: xobject resource dict is null for page %d pageObj %d\n", pageNumber, pageObjNumber) - } - - if err = optimizeXObjectResourcesDict(ctx, d, pageNumber, pageObjNumber, visitedRes); err != nil { + obj, found = resourcesDict.Find("ExtGState") + if found { + // An ExtGState resource dict may contain binary content in the following entries: "SMask", "HT". + if err := processExtGStateResources(ctx, obj, pageNr, pageObjNumber, rNamePrefix, visitedRes); err != nil { return err } - } if log.OptimizeEnabled() { @@ -652,15 +911,15 @@ func optimizeResources(ctx *model.Context, resourcesDict types.Dict, pageNumber, } // Process the resources dictionary for given page number and optimize by removing redundant resources. -func parseResourcesDict(ctx *model.Context, pageDict types.Dict, pageNumber, pageObjNumber int) error { +func parseResourcesDict(ctx *model.Context, pageDict types.Dict, pageNr, pageObjNumber int) error { if ctx.Optimize.Cache[pageObjNumber] { return nil } ctx.Optimize.Cache[pageObjNumber] = true - // The logical pageNumber is pageNumber+1. + // The logical pageNr is pageNr+1. if log.OptimizeEnabled() { - log.Optimize.Printf("parseResourcesDict begin page: %d, object:%d\n", pageNumber+1, pageObjNumber) + log.Optimize.Printf("parseResourcesDict begin page: %d, object:%d\n", pageNr+1, pageObjNumber) } // Get resources dict for this page. @@ -673,30 +932,26 @@ func parseResourcesDict(ctx *model.Context, pageDict types.Dict, pageNumber, pag if d != nil { // Optimize image and font resources. - if err = optimizeResources(ctx, d, pageNumber, pageObjNumber, []types.Object{}); err != nil { + if err = optimizeResources(ctx, d, pageNr, pageObjNumber, "", []types.Object{}); err != nil { return err } } if log.OptimizeEnabled() { - log.Optimize.Printf("parseResourcesDict end page: %d, object:%d\n", pageNumber+1, pageObjNumber) + log.Optimize.Printf("parseResourcesDict end page: %d, object:%d\n", pageNr+1, pageObjNumber) } return nil } -// Iterate over all pages and optimize resources. -func parsePagesDict(ctx *model.Context, pagesDict types.Dict, pageNumber int) (int, error) { +// Iterate over all pages and optimize content & resources. +func parsePagesDict(ctx *model.Context, pagesDict types.Dict, pageNr int) (int, error) { // TODO Integrate resource consolidation based on content stream requirements. - count, found := pagesDict.Find("Count") + _, found := pagesDict.Find("Count") if !found { - return pageNumber, errors.New("pdfcpu: parsePagesDict: missing Count") - } - - if log.OptimizeEnabled() { - log.Optimize.Printf("parsePagesDict begin (next page=%d has %s pages): %s\n", pageNumber+1, count.(types.Integer), pagesDict) + return pageNr, errors.New("pdfcpu: parsePagesDict: missing Count") } ctx.Optimize.Cache = map[int]bool{} @@ -704,12 +959,12 @@ func parsePagesDict(ctx *model.Context, pagesDict types.Dict, pageNumber int) (i // Iterate over page tree. o, found := pagesDict.Find("Kids") if !found { - return pageNumber, errors.New("pdfcpu: corrupt \"Kids\" entry") + return pageNr, errors.Errorf("pdfcpu: corrupt \"Kids\" entry %s", pagesDict) } kids, err := ctx.DereferenceArray(o) if err != nil || kids == nil { - return pageNumber, errors.New("pdfcpu: corrupt \"Kids\" entry") + return pageNr, errors.Errorf("pdfcpu: corrupt \"Kids\" entry: %s", pagesDict) } for _, v := range kids { @@ -733,7 +988,7 @@ func parsePagesDict(ctx *model.Context, pagesDict types.Dict, pageNumber int) (i if *dictType == "Pages" { // Recurse over pagetree and optimize resources. - pageNumber, err = parsePagesDict(ctx, d, pageNumber) + pageNr, err = parsePagesDict(ctx, d, pageNr) if err != nil { return 0, err } @@ -743,27 +998,30 @@ func parsePagesDict(ctx *model.Context, pagesDict types.Dict, pageNumber int) (i // Process page dict. - if err = optimizePageContent(ctx, d, int(ir.ObjectNumber)); err != nil { - return 0, err + if ctx.OptimizeDuplicateContentStreams { + if err = optimizePageContent(ctx, d, int(ir.ObjectNumber)); err != nil { + return 0, err + } } + // Get rid of PieceInfo dict from page dict. if err := ctx.DeleteDictEntry(d, "PieceInfo"); err != nil { return 0, err } // Parse and optimize resource dict for one page. - if err = parseResourcesDict(ctx, d, pageNumber, int(ir.ObjectNumber)); err != nil { + if err = parseResourcesDict(ctx, d, pageNr, int(ir.ObjectNumber)); err != nil { return 0, err } - pageNumber++ + pageNr++ } if log.OptimizeEnabled() { log.Optimize.Printf("parsePagesDict end: %s\n", pagesDict) } - return pageNumber, nil + return pageNr, nil } func traverse(xRefTable *model.XRefTable, value types.Object, duplObjs types.IntSet) error { @@ -848,10 +1106,10 @@ func calcRedundantObjects(ctx *model.Context) error { } } - for i, sd := range ctx.Optimize.DuplicateImages { + for i, obj := range ctx.Optimize.DuplicateImages { ctx.Optimize.DuplicateImageObjs[i] = true // Identify and mark all involved potential duplicate objects for a redundant image. - if err := traverseObjectGraphAndMarkDuplicates(ctx.XRefTable, *sd, ctx.Optimize.DuplicateImageObjs); err != nil { + if err := traverseObjectGraphAndMarkDuplicates(ctx.XRefTable, *obj.ImageDict, ctx.Optimize.DuplicateImageObjs); err != nil { return err } } @@ -863,6 +1121,28 @@ func calcRedundantObjects(ctx *model.Context) error { return nil } +func fixCorruptFontResDicts(ctx *model.Context) error { + // TODO: hacky, also because we don't reall y take the fontDict type into account. + for _, d := range ctx.Optimize.CorruptFontResDicts { + for k, v := range d { + if v == nil { + for fn, objNrs := range ctx.Optimize.Fonts { + + if strings.HasPrefix(fn, "Arial") && (len(fn) == 5 || fn[5] != '-') { + model.ShowRepaired(fmt.Sprintf("font %s mapped to objNr %d", k, objNrs[0])) + d[k] = *types.NewIndirectRef(objNrs[0], 0) + break + } + } + } + // if d[k] == nil { + // d[k] = *types.NewIndirectRef(objNrs[0], 0) + // } + } + } + return nil +} + // Iterate over all pages and optimize resources. // Get rid of duplicate embedded fonts and images. func optimizeFontAndImages(ctx *model.Context) error { @@ -882,22 +1162,6 @@ func optimizeFontAndImages(ctx *model.Context) error { return err } - // Detect the number of pages of this PDF file. - pageCount := pageTreeRootDict.IntEntry("Count") - if pageCount == nil { - return errors.New("pdfcpu: optimizeFontAndImagess: missing \"Count\" in page root dict") - } - - // If PageCount already set by validation doublecheck. - if ctx.PageCount > 0 && ctx.PageCount != *pageCount { - return errors.New("pdfcpu: optimizeFontAndImagess: unexpected page root dict pageCount discrepancy") - } - - // If we optimize w/o prior validation, set PageCount. - if ctx.PageCount == 0 { - ctx.PageCount = *pageCount - } - // Prepare optimization environment. ctx.Optimize.PageFonts = make([]types.IntSet, ctx.PageCount) ctx.Optimize.PageImages = make([]types.IntSet, ctx.PageCount) @@ -908,6 +1172,10 @@ func optimizeFontAndImages(ctx *model.Context) error { return err } + if err := fixCorruptFontResDicts(ctx); err != nil { + return err + } + ctx.Optimize.ContentStreamCache = map[int]*types.StreamDict{} ctx.Optimize.FormStreamCache = map[int]*types.StreamDict{} @@ -955,25 +1223,29 @@ func calcEmbeddedFontsMemoryUsage(ctx *model.Context) error { fontFileIndRefs := map[types.IndirectRef]bool{} - var objectNumbers []int + var objNrs []int // Sorting unnecessary. for k := range ctx.Optimize.FontObjects { - objectNumbers = append(objectNumbers, k) + objNrs = append(objNrs, k) } - sort.Ints(objectNumbers) + sort.Ints(objNrs) // Iterate over all embedded font objects and record font file references. - for _, objectNumber := range objectNumbers { + for _, objNr := range objNrs { - fontObject := ctx.Optimize.FontObjects[objectNumber] + fontObject := ctx.Optimize.FontObjects[objNr] // Only embedded fonts have binary data. - if !fontObject.Embedded() { + ok, err := pdffont.Embedded(ctx.XRefTable, fontObject.FontDict, objNr) + if err != nil { + return err + } + if !ok { continue } - if err := processFontFilesForFontDict(ctx.XRefTable, fontObject.FontDict, objectNumber, fontFileIndRefs); err != nil { + if err := processFontFilesForFontDict(ctx.XRefTable, fontObject.FontDict, objNr, fontFileIndRefs); err != nil { return err } } @@ -1017,93 +1289,6 @@ func fontDescriptorFontFileIndirectObjectRef(fontDescriptorDict types.Dict) *typ return ir } -func trivialFontDescriptor(xRefTable *model.XRefTable, fontDict types.Dict, objNr int) (types.Dict, error) { - o, ok := fontDict.Find("FontDescriptor") - if !ok { - return nil, nil - } - - // fontDescriptor directly available. - - d, err := xRefTable.DereferenceDict(o) - if err != nil { - return nil, err - } - - if d == nil { - return nil, errors.Errorf("pdfcpu: trivialFontDescriptor: FontDescriptor is null for font object %d\n", objNr) - } - - if d.Type() != nil && *d.Type() != "FontDescriptor" { - return nil, errors.Errorf("pdfcpu: trivialFontDescriptor: FontDescriptor dict incorrect dict type for font object %d\n", objNr) - } - - return d, nil -} - -// FontDescriptor gets the font descriptor for this font. -func fontDescriptor(xRefTable *model.XRefTable, fontDict types.Dict, objNr int) (types.Dict, error) { - if log.OptimizeEnabled() { - log.Optimize.Println("fontDescriptor begin") - } - - d, err := trivialFontDescriptor(xRefTable, fontDict, objNr) - if err != nil { - return nil, err - } - if d != nil { - return d, nil - } - - // Try to access a fontDescriptor in a Descendent font for Type0 fonts. - - o, ok := fontDict.Find("DescendantFonts") - if !ok { - //logErrorOptimize.Printf("FontDescriptor: Neither FontDescriptor nor DescendantFonts for font object %d\n", objectNumber) - return nil, nil - } - - // A descendant font is contained in an array of size 1. - - a, err := xRefTable.DereferenceArray(o) - if err != nil || a == nil { - return nil, errors.Errorf("pdfcpu: fontDescriptor: DescendantFonts: IndirectRef or Array wth length 1 expected for font object %d\n", objNr) - } - if len(a) > 1 { - return nil, errors.Errorf("pdfcpu: fontDescriptor: DescendantFonts Array length > 1 %v\n", a) - } - - // dict is the fontDict of the descendant font. - d, err = xRefTable.DereferenceDict(a[0]) - if err != nil { - return nil, errors.Errorf("pdfcpu: fontDescriptor: No descendant font dict for %v\n", a) - } - if d == nil { - return nil, errors.Errorf("pdfcpu: fontDescriptor: descendant font dict is null for %v\n", a) - } - - if *d.Type() != "Font" { - return nil, errors.Errorf("pdfcpu: fontDescriptor: font dict with incorrect dict type for %v\n", d) - } - - o, ok = d.Find("FontDescriptor") - if !ok { - log.Optimize.Printf("fontDescriptor: descendant font not embedded %s\n", d) - return nil, nil - } - - d, err = xRefTable.DereferenceDict(o) - if err != nil { - return nil, errors.Errorf("pdfcpu: fontDescriptor: No FontDescriptor dict for font object %d\n", objNr) - } - - if log.OptimizeEnabled() { - log.Optimize.Println("fontDescriptor end") - } - - return d, nil -} - // Record font file objects referenced by this fonts font descriptor for stats and size calculation. func processFontFilesForFontDict(xRefTable *model.XRefTable, fontDict types.Dict, objectNumber int, indRefsMap map[types.IndirectRef]bool) error { if log.OptimizeEnabled() { @@ -1113,7 +1298,7 @@ func processFontFilesForFontDict(xRefTable *model.XRefTable, fontDict types.Dict // Note: // "ToUnicode" is also an entry containing binary content that could be inspected for duplicate content. - d, err := fontDescriptor(xRefTable, fontDict, objectNumber) + d, err := pdffont.FontDescriptor(xRefTable, fontDict, objectNumber) if err != nil { return err } @@ -1200,8 +1385,8 @@ func calcImageBinarySizes(ctx *model.Context) { } // Calc memory usage for duplicate images. - for _, imageDict := range ctx.Optimize.DuplicateImages { - ctx.Read.BinaryImageDuplSize += *imageDict.StreamLength + for _, obj := range ctx.Optimize.DuplicateImages { + ctx.Read.BinaryImageDuplSize += *obj.ImageDict.StreamLength } if log.OptimizeEnabled() { @@ -1350,7 +1535,7 @@ func fixReferencesToFreeObjects(ctx *model.Context) error { return fixDirectObject(ctx, ctx.RootDict) } -func cacheFormFonts(ctx *model.Context) error { +func CacheFormFonts(ctx *model.Context) error { d, err := primitives.FormFontResDict(ctx.XRefTable) if err != nil { @@ -1428,13 +1613,35 @@ func optimizeResourceDicts(ctx *model.Context) error { return nil } +func resolveWidth(ctx *model.Context, sd *types.StreamDict) error { + if obj, ok := sd.Find("Width"); ok { + w, err := ctx.DereferenceNumber(obj) + if err != nil { + return err + } + sd.Dict["Width"] = types.Integer(w) + } + return nil +} + +func ensureDirectWidthForXObjs(ctx *model.Context) error { + for _, imgObjs := range ctx.Optimize.PageImages { + for objNr, v := range imgObjs { + if v { + imageObj := ctx.Optimize.ImageObjects[objNr] + if err := resolveWidth(ctx, imageObj.ImageDict); err != nil { + return err + } + } + } + } + return nil +} + // OptimizeXRefTable optimizes an xRefTable by locating and getting rid of redundant embedded fonts and images. func OptimizeXRefTable(ctx *model.Context) error { - if log.InfoEnabled() { - log.Info.Println("optimizing fonts & images") - } - if log.OptimizeEnabled() { - log.Optimize.Println("optimizeXRefTable begin") + if ctx.PageCount == 0 { + return nil } // Sometimes free objects are used although they are part of the free object list. @@ -1443,14 +1650,12 @@ func OptimizeXRefTable(ctx *model.Context) error { return err } - // Cache form fonts. - // TODO optimize form fonts. - if err := cacheFormFonts(ctx); err != nil { - return err - } - - if ctx.Cmd == model.OPTIMIZE { - // Consolidate resource dicts. + if (ctx.Cmd == model.VALIDATE || + ctx.Cmd == model.OPTIMIZE || + ctx.Cmd == model.LISTIMAGES || + ctx.Cmd == model.EXTRACTIMAGES || + ctx.Cmd == model.UPDATEIMAGES) && + ctx.Conf.OptimizeResourceDicts { // Extra step with potential for performance hit when processing large files. if err := optimizeResourceDicts(ctx); err != nil { return err @@ -1462,21 +1667,23 @@ func OptimizeXRefTable(ctx *model.Context) error { return err } + if err := ensureDirectWidthForXObjs(ctx); err != nil { + return err + } + // Get rid of PieceInfo dict from root. if err := ctx.DeleteDictEntry(ctx.RootDict, "PieceInfo"); err != nil { return err } // Calculate memory usage of binary content for stats. - if err := calcBinarySizes(ctx); err != nil { - return err + if log.StatsEnabled() { + if err := calcBinarySizes(ctx); err != nil { + return err + } } ctx.Optimized = true - if log.OptimizeEnabled() { - log.Optimize.Println("optimizeXRefTable end") - } - return nil } diff --git a/pkg/pdfcpu/page.go b/pkg/pdfcpu/page.go index f9eb9afe..06dc8bc4 100644 --- a/pkg/pdfcpu/page.go +++ b/pkg/pdfcpu/page.go @@ -17,11 +17,115 @@ limitations under the License. package pdfcpu import ( + "fmt" + "strings" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" ) +type pagesParamMap map[string]func(string, *PageConfiguration) error + +// Handle applies parameter completion and if successful +// parses the parameter values into pages. +func (m pagesParamMap) Handle(paramPrefix, paramValueStr string, pageConf *PageConfiguration) error { + + var param string + + // Completion support + for k := range m { + if !strings.HasPrefix(k, strings.ToLower(paramPrefix)) { + continue + } + if len(param) > 0 { + return errors.Errorf("pdfcpu: ambiguous parameter prefix \"%s\"", paramPrefix) + } + param = k + } + + if param == "" { + return errors.Errorf("pdfcpu: unknown parameter prefix \"%s\"", paramPrefix) + } + + return m[param](paramValueStr, pageConf) +} + +var pParamMap = pagesParamMap{ + "dimensions": parseDimensions, + "formsize": parsePageFormat, + "papersize": parsePageFormat, +} + +// PageConfiguration represents the page config for the "pages insert" command. +type PageConfiguration struct { + PageDim *types.Dim // page dimensions in display unit. + PageSize string // one of A0,A1,A2,A3,A4(=default),A5,A6,A7,A8,Letter,Legal,Ledger,Tabloid,Executive,ANSIC,ANSID,ANSIE. + UserDim bool // true if one of dimensions or paperSize provided overriding the default. + InpUnit types.DisplayUnit // input display unit. +} + +// DefaultPageConfiguration returns the default configuration. +func DefaultPageConfiguration() *PageConfiguration { + return &PageConfiguration{ + PageDim: types.PaperSize["A4"], + PageSize: "A4", + InpUnit: types.POINTS, + } +} + +func (p PageConfiguration) String() string { + return fmt.Sprintf("Page config: %s %s\n", p.PageSize, p.PageDim) +} + +func parsePageFormat(s string, p *PageConfiguration) (err error) { + if p.UserDim { + return errors.New("pdfcpu: only one of formsize(papersize) or dimensions allowed") + } + p.PageDim, p.PageSize, err = types.ParsePageFormat(s) + p.UserDim = true + return err +} + +func parseDimensions(s string, p *PageConfiguration) (err error) { + if p.UserDim { + return errors.New("pdfcpu: only one of formsize(papersize) or dimensions allowed") + } + p.PageDim, p.PageSize, err = ParsePageDim(s, p.InpUnit) + p.UserDim = true + return err +} + +// ParsePageConfiguration parses a page configuration string into an internal structure. +func ParsePageConfiguration(s string, u types.DisplayUnit) (*PageConfiguration, error) { + + if s == "" { + return nil, nil + } + + pageConf := DefaultPageConfiguration() + pageConf.InpUnit = u + + ss := strings.Split(s, ",") + + for _, s := range ss { + + ss1 := strings.Split(s, ":") + if len(ss1) != 2 { + return nil, errors.New("pdfcpu: Invalid page configuration string. Please consult pdfcpu help pages") + } + + paramPrefix := strings.TrimSpace(ss1[0]) + paramValueStr := strings.TrimSpace(ss1[1]) + + if err := pParamMap.Handle(paramPrefix, paramValueStr, pageConf); err != nil { + return nil, err + } + } + + return pageConf, nil +} + func addPages( ctxSrc, ctxDest *model.Context, pageNrs []int, @@ -46,7 +150,7 @@ func addPages( } } - d, _, inhPAttrs, err := ctxSrc.PageDict(i, true) + d, pageIndRef, inhPAttrs, err := ctxSrc.PageDict(i, true) if err != nil { return err } @@ -54,7 +158,12 @@ func addPages( return errors.Errorf("pdfcpu: unknown page number: %d\n", i) } - d = d.Clone().(types.Dict) + obj, err := migrateIndRef(pageIndRef, ctxSrc, ctxDest, migrated) + if err != nil { + return err + } + + d = obj.(types.Dict) d["Resources"] = inhPAttrs.Resources.Clone() d["Parent"] = pagesIndRef d["MediaBox"] = inhPAttrs.MediaBox.Array() @@ -62,11 +171,6 @@ func addPages( d["Rotate"] = types.Integer(inhPAttrs.Rotate) } - pageIndRef, err := ctxDest.IndRefForNewObject(d) - if err != nil { - return err - } - if err := migratePageDict(d, *pageIndRef, ctxSrc, ctxDest, migrated); err != nil { return err } @@ -89,6 +193,31 @@ func addPages( return nil } +func migrateNamedDests(ctxSrc *model.Context, n *model.Node, migrated map[int]int) error { + patchValues := func(xRefTable *model.XRefTable, k string, v *types.Object) error { + if *v == nil { + // Skip corrupt node. + return nil + } + arr, err := xRefTable.DereferenceArray(*v) + if err == nil { + arr[0] = patchObject(arr[0], migrated) + *v = arr + return nil + } + d, err := xRefTable.DereferenceDict(*v) + if err != nil { + return err + } + arr = d.ArrayEntry("D") + arr[0] = patchObject(arr[0], migrated) + *v = d + return nil + } + + return n.Process(ctxSrc.XRefTable, patchValues) +} + // AddPages adds pages and corresponding resources from ctxSrc to ctxDest. func AddPages(ctxSrc, ctxDest *model.Context, pageNrs []int, usePgCache bool) error { @@ -126,5 +255,13 @@ func AddPages(ctxSrc, ctxDest *model.Context, pageNrs []int, usePgCache bool) er ctxDest.RootDict["AcroForm"] = d } + if n, ok := ctxSrc.Names["Dests"]; ok { + // Carry over used named destinations. + if err := migrateNamedDests(ctxSrc, n, migrated); err != nil { + return err + } + ctxDest.Names = map[string]*model.Node{"Dests": n} + } + return nil } diff --git a/pkg/pdfcpu/primitives/checkBox.go b/pkg/pdfcpu/primitives/checkBox.go index 84e7ab7c..74c451d4 100644 --- a/pkg/pdfcpu/primitives/checkBox.go +++ b/pkg/pdfcpu/primitives/checkBox.go @@ -533,7 +533,7 @@ func (cb *CheckBox) appearanceIndRefs(fonts model.FontMap, bgCol *color.SimpleCo func (cb *CheckBox) prepareDict(fonts model.FontMap) (types.Dict, error) { - id, err := types.EscapeUTF16String(cb.ID) + id, err := types.EscapedUTF16String(cb.ID) if err != nil { return nil, err } @@ -595,7 +595,7 @@ func (cb *CheckBox) prepareDict(fonts model.FontMap) (types.Dict, error) { ) if cb.Tip != "" { - tu, err := types.EscapeUTF16String(cb.Tip) + tu, err := types.EscapedUTF16String(cb.Tip) if err != nil { return nil, err } @@ -761,17 +761,31 @@ func (cb *CheckBox) render(p *model.Page, pageNr int, fonts model.FontMap) error return cb.doRender(p, fonts) } -func CalcCheckBoxASNames(d types.Dict) (types.Name, types.Name) { - apDict := d.DictEntry("AP") - d1 := apDict.DictEntry("D") - if d1 == nil { - d1 = apDict.DictEntry("N") +func CalcCheckBoxASNames(ctx *model.Context, d types.Dict) (types.Name, types.Name, error) { + obj, found := d.Find("AP") + if !found { + return "", "", errors.New("pdfcpu: corrupt form field: missing entry \"AP\"") + } + d, err := ctx.DereferenceDict(obj) + if err != nil { + return "", "", err + } + obj, found = d.Find("D") + if !found { + obj, found = d.Find("N") + } + if !found { + return "", "", errors.New("pdfcpu: corrupt form field: missing entries \"N\" and \"N\"") + } + d, err = ctx.DereferenceDict(obj) + if err != nil { + return "", "", err } offName, yesName := "Off", "Yes" - for k := range d1 { + for k := range d { if k != "Off" { yesName = k } } - return types.Name(offName), types.Name(yesName) + return types.Name(offName), types.Name(yesName), nil } diff --git a/pkg/pdfcpu/primitives/comboBox.go b/pkg/pdfcpu/primitives/comboBox.go index aa6b96ea..a557e4e9 100644 --- a/pkg/pdfcpu/primitives/comboBox.go +++ b/pkg/pdfcpu/primitives/comboBox.go @@ -23,7 +23,6 @@ import ( "github.com/angel-one/pdfcpu/pkg/font" "github.com/angel-one/pdfcpu/pkg/pdfcpu/color" - pdffont "github.com/angel-one/pdfcpu/pkg/pdfcpu/font" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -232,35 +231,15 @@ func (cb *ComboBox) validate() error { return cb.validateTab() } -func (cb *ComboBox) calcFontFromDA(ctx *model.Context, d types.Dict, fonts map[string]types.IndirectRef) (*types.IndirectRef, error) { - - s := d.StringEntry("DA") - if s == nil { - s = ctx.Form.StringEntry("DA") - if s == nil { - return nil, errors.New("pdfcpu: combobox missing \"DA\"") - } - } - - fontID, f, err := fontFromDA(*s) - if err != nil { - return nil, err - } - - cb.Font, cb.fontID = &f, fontID - - id, name, lang, fontIndRef, err := extractFormFontDetails(ctx, cb.fontID, fonts) +func (cb *ComboBox) calcFontFromDA(ctx *model.Context, d types.Dict, da *string, fonts map[string]types.IndirectRef) (*types.IndirectRef, error) { + id, font, rtl, fontIndRef, err := calcFontDetailsFromDA(ctx, d, da, false, fonts) if err != nil { return nil, err } - if fontIndRef == nil { - return nil, errors.New("pdfcpu: unable to detect indirect reference for font") - } cb.fontID = id - cb.Font.Name = name - cb.Font.Lang = lang - cb.RTL = pdffont.RTL(lang) + cb.Font = font + cb.RTL = rtl return fontIndRef, nil } @@ -386,7 +365,7 @@ func (cb *ComboBox) renderN(xRefTable *model.XRefTable) ([]byte, error) { v = model.DecodeUTF8ToByte(v) } lineBB := model.CalcBoundingBox(v, 0, 0, f.Name, f.Size) - s := model.PrepBytes(xRefTable, v, f.Name, true, cb.RTL) + s := model.PrepBytes(xRefTable, v, f.Name, true, cb.RTL, f.FillFont) x := 2 * boWidth if x == 0 { x = 2 @@ -467,14 +446,14 @@ func (cb *ComboBox) handleBorderAndMK(d types.Dict) { func (cb *ComboBox) prepareDict(fonts model.FontMap) (types.Dict, error) { pdf := cb.pdf - id, err := types.EscapeUTF16String(cb.ID) + id, err := types.EscapedUTF16String(cb.ID) if err != nil { return nil, err } opt := types.Array{} for _, s := range cb.Options { - s, err := types.EscapeUTF16String(s) + s, err := types.EscapedUTF16String(s) if err != nil { return nil, err } @@ -498,7 +477,7 @@ func (cb *ComboBox) prepareDict(fonts model.FontMap) (types.Dict, error) { ) if cb.Tip != "" { - tu, err := types.EscapeUTF16String(cb.Tip) + tu, err := types.EscapedUTF16String(cb.Tip) if err != nil { return nil, err } @@ -509,7 +488,7 @@ func (cb *ComboBox) prepareDict(fonts model.FontMap) (types.Dict, error) { v := cb.Value if cb.Default != "" { - s, err := types.EscapeUTF16String(cb.Default) + s, err := types.EscapedUTF16String(cb.Default) if err != nil { return nil, err } @@ -526,7 +505,7 @@ func (cb *ComboBox) prepareDict(fonts model.FontMap) (types.Dict, error) { break } } - s, err := types.EscapeUTF16String(v) + s, err := types.EscapedUTF16String(v) if err != nil { return nil, err } @@ -720,18 +699,21 @@ func NewComboBox( ctx *model.Context, d types.Dict, v string, + da *string, fonts map[string]types.IndirectRef) (*ComboBox, *types.IndirectRef, error) { cb := &ComboBox{Value: v} - bb, err := ctx.RectForArray(d.ArrayEntry("Rect")) + obj, _ := d.Find("Rect") + arr, _ := ctx.DereferenceArray(obj) + bb, err := ctx.RectForArray(arr) if err != nil { return nil, nil, err } cb.BoundingBox = types.RectForDim(bb.Width(), bb.Height()) - fontIndRef, err := cb.calcFontFromDA(ctx, d, fonts) + fontIndRef, err := cb.calcFontFromDA(ctx, d, da, fonts) if err != nil { return nil, nil, err } @@ -758,9 +740,9 @@ func NewComboBox( return cb, fontIndRef, nil } -func renderComboBoxAP(ctx *model.Context, d types.Dict, v string, fonts map[string]types.IndirectRef) error { +func renderComboBoxAP(ctx *model.Context, d types.Dict, v string, da *string, fonts map[string]types.IndirectRef) error { - cb, fontIndRef, err := NewComboBox(ctx, d, v, fonts) + cb, fontIndRef, err := NewComboBox(ctx, d, v, da, fonts) if err != nil { return err } @@ -780,9 +762,9 @@ func renderComboBoxAP(ctx *model.Context, d types.Dict, v string, fonts map[stri return nil } -func refreshComboBoxAP(ctx *model.Context, d types.Dict, v string, fonts map[string]types.IndirectRef, irN *types.IndirectRef) error { +func refreshComboBoxAP(ctx *model.Context, d types.Dict, v string, da *string, fonts map[string]types.IndirectRef, irN *types.IndirectRef) error { - cb, _, err := NewComboBox(ctx, d, v, fonts) + cb, _, err := NewComboBox(ctx, d, v, da, fonts) if err != nil { return err } @@ -795,11 +777,11 @@ func refreshComboBoxAP(ctx *model.Context, d types.Dict, v string, fonts map[str return updateForm(ctx.XRefTable, bb, irN) } -func EnsureComboBoxAP(ctx *model.Context, d types.Dict, v string, fonts map[string]types.IndirectRef) error { +func EnsureComboBoxAP(ctx *model.Context, d types.Dict, v string, da *string, fonts map[string]types.IndirectRef) error { apd := d.DictEntry("AP") if apd == nil { - return renderComboBoxAP(ctx, d, v, fonts) + return renderComboBoxAP(ctx, d, v, da, fonts) } irN := apd.IndirectRefEntry("N") @@ -807,5 +789,5 @@ func EnsureComboBoxAP(ctx *model.Context, d types.Dict, v string, fonts map[stri return nil } - return refreshComboBoxAP(ctx, d, v, fonts, irN) + return refreshComboBoxAP(ctx, d, v, da, fonts, irN) } diff --git a/pkg/pdfcpu/primitives/dateField.go b/pkg/pdfcpu/primitives/dateField.go index 0dc8a4b8..d90a9f7d 100644 --- a/pkg/pdfcpu/primitives/dateField.go +++ b/pkg/pdfcpu/primitives/dateField.go @@ -281,35 +281,14 @@ func (df *DateField) validate() error { return df.validateTab() } -func (df *DateField) calcFontFromDA(ctx *model.Context, d types.Dict, fonts map[string]types.IndirectRef) (*types.IndirectRef, error) { - - s := d.StringEntry("DA") - if s == nil { - s = ctx.Form.StringEntry("DA") - if s == nil { - return nil, errors.New("pdfcpu: datefield missing \"DA\"") - } - } - - fontID, f, err := fontFromDA(*s) +func (df *DateField) calcFontFromDA(ctx *model.Context, d types.Dict, da *string, needUTF8 bool, fonts map[string]types.IndirectRef) (*types.IndirectRef, error) { + id, font, _, fontIndRef, err := calcFontDetailsFromDA(ctx, d, da, needUTF8, fonts) if err != nil { return nil, err } - df.Font, df.fontID = &f, fontID - - id, name, lang, fontIndRef, err := extractFormFontDetails(ctx, df.fontID, fonts) - if err != nil { - return nil, err - } - if fontIndRef == nil { - return nil, errors.New("pdfcpu: unable to detect indirect reference for font") - } - df.fontID = id - df.Font.Name = name - df.Font.Lang = lang - //df.RTL = pdffont.RTL(lang) + df.Font = font return fontIndRef, nil } @@ -454,7 +433,7 @@ func (df *DateField) renderN(xRefTable *model.XRefTable) ([]byte, error) { } lineBB := model.CalcBoundingBox(v, 0, 0, f.Name, f.Size) - s := model.PrepBytes(xRefTable, v, f.Name, true, false) + s := model.PrepBytes(xRefTable, v, f.Name, true, false, f.FillFont) x := 2 * boWidth if x == 0 { x = 2 @@ -484,15 +463,14 @@ func (df *DateField) renderN(xRefTable *model.XRefTable) ([]byte, error) { } // RefreshN updates the normal appearance referred to by indRef according to df. +// Unused. func (df *DateField) RefreshN(xRefTable *model.XRefTable, indRef *types.IndirectRef) error { - - entry, _ := xRefTable.FindTableEntryForIndRef(indRef) - bb, err := df.renderN(xRefTable) if err != nil { return err } + entry, _ := xRefTable.FindTableEntryForIndRef(indRef) sd, _ := entry.Object.(types.StreamDict) sd.Content = bb @@ -601,7 +579,7 @@ func (df *DateField) handleBorderAndMK(d types.Dict) { func (df *DateField) prepareDict(fonts model.FontMap) (types.Dict, error) { pdf := df.pdf - id, err := types.EscapeUTF16String(df.ID) + id, err := types.EscapedUTF16String(df.ID) if err != nil { return nil, err } @@ -658,7 +636,7 @@ func (df *DateField) prepareDict(fonts model.FontMap) (types.Dict, error) { df.handleBorderAndMK(d) if df.Value != "" { - s, err := types.EscapeUTF16String(df.Value) + s, err := types.EscapedUTF16String(df.Value) if err != nil { return nil, err } @@ -666,7 +644,7 @@ func (df *DateField) prepareDict(fonts model.FontMap) (types.Dict, error) { } if df.Default != "" { - s, err := types.EscapeUTF16String(df.Default) + s, err := types.EscapedUTF16String(df.Default) if err != nil { return nil, err } @@ -860,20 +838,25 @@ func NewDateField( ctx *model.Context, d types.Dict, v string, + da *string, + fontIndRef *types.IndirectRef, fonts map[string]types.IndirectRef) (*DateField, *types.IndirectRef, error) { df := &DateField{Value: v} - bb, err := ctx.RectForArray(d.ArrayEntry("Rect")) + obj, _ := d.Find("Rect") + arr, _ := ctx.DereferenceArray(obj) + bb, err := ctx.RectForArray(arr) if err != nil { return nil, nil, err } df.BoundingBox = types.RectForDim(bb.Width(), bb.Height()) - fontIndRef, err := df.calcFontFromDA(ctx, d, fonts) - if err != nil { - return nil, nil, err + if fontIndRef == nil { + if fontIndRef, err = df.calcFontFromDA(ctx, d, da, hasUTF(v), fonts); err != nil { + return nil, nil, err + } } df.HorAlign = types.AlignLeft @@ -898,9 +881,9 @@ func NewDateField( return df, fontIndRef, nil } -func renderDateFieldAP(ctx *model.Context, d types.Dict, v string, fonts map[string]types.IndirectRef) error { +func renderDateFieldAP(ctx *model.Context, d types.Dict, v string, da *string, fonts map[string]types.IndirectRef) error { - df, fontIndRef, err := NewDateField(ctx, d, v, fonts) + df, fontIndRef, err := NewDateField(ctx, d, v, da, nil, fonts) if err != nil { return err } @@ -920,9 +903,8 @@ func renderDateFieldAP(ctx *model.Context, d types.Dict, v string, fonts map[str return nil } -func refreshDateFieldAP(ctx *model.Context, d types.Dict, v string, fonts map[string]types.IndirectRef, irN *types.IndirectRef) error { - - df, _, err := NewDateField(ctx, d, v, fonts) +func refreshDateFieldAP(ctx *model.Context, d types.Dict, v string, da *string, fonts map[string]types.IndirectRef, irN *types.IndirectRef) error { + df, _, err := NewDateField(ctx, d, v, da, nil, fonts) if err != nil { return err } @@ -935,11 +917,10 @@ func refreshDateFieldAP(ctx *model.Context, d types.Dict, v string, fonts map[st return updateForm(ctx.XRefTable, bb, irN) } -func EnsureDateFieldAP(ctx *model.Context, d types.Dict, v string, fonts map[string]types.IndirectRef) error { - +func EnsureDateFieldAP(ctx *model.Context, d types.Dict, v string, da *string, fonts map[string]types.IndirectRef) error { apd := d.DictEntry("AP") if apd == nil { - return renderDateFieldAP(ctx, d, v, fonts) + return renderDateFieldAP(ctx, d, v, da, fonts) } irN := apd.IndirectRefEntry("N") @@ -947,5 +928,5 @@ func EnsureDateFieldAP(ctx *model.Context, d types.Dict, v string, fonts map[str return nil } - return refreshDateFieldAP(ctx, d, v, fonts, irN) + return refreshDateFieldAP(ctx, d, v, da, fonts, irN) } diff --git a/pkg/pdfcpu/primitives/font.go b/pkg/pdfcpu/primitives/font.go index 49cb16a0..0f5b43c1 100644 --- a/pkg/pdfcpu/primitives/font.go +++ b/pkg/pdfcpu/primitives/font.go @@ -29,13 +29,14 @@ import ( ) type FormFont struct { - pdf *PDF - Name string - Lang string // ISO-639 - Script string // ISO-15924 - Size int - Color string `json:"col"` - col *color.SimpleColor + pdf *PDF + Name string + Lang string // ISO-639 + Script string // ISO-15924 + Size int + Color string `json:"col"` + col *color.SimpleColor + FillFont bool } // ISO-639 country codes @@ -137,28 +138,33 @@ func (f FormFont) RTL() bool { return types.MemberOf(f.Script, []string{"Arab", "Hebr"}) || types.MemberOf(f.Lang, []string{"ar", "fa", "he"}) } -func FormFontNameAndLangForID(xRefTable *model.XRefTable, indRef types.IndirectRef) (string, string, error) { +func FormFontDetails(xRefTable *model.XRefTable, indRef types.IndirectRef) (string, string, string, error) { objNr := int(indRef.ObjectNumber) fontDict, err := xRefTable.DereferenceDict(indRef) if err != nil || fontDict == nil { - return "", "", err + return "", "", "", err } _, fName, err := pdffont.Name(xRefTable, fontDict, objNr) if err != nil { - return "", "", err + return "", "", "", err } var fLang string if font.IsUserFont(fName) { fLang, err = pdffont.Lang(xRefTable, fontDict) if err != nil { - return "", "", err + return "", "", "", err } } - return fName, fLang, nil + fScript := "" + if enc := fontDict.NameEntry("Encoding"); enc != nil { + fScript = pdffont.ScriptForEncoding(*enc) + } + + return fName, fLang, fScript, nil } // FormFontResDict returns form dict's font resource dict. @@ -187,21 +193,20 @@ func FormFontResDict(xRefTable *model.XRefTable) (types.Dict, error) { return xRefTable.DereferenceDict(o) } -func formFontIndRef(xRefTable *model.XRefTable, fontID string) (*types.IndirectRef, error) { - d, err := FormFontResDict(xRefTable) - if err != nil { - return nil, err +func formFontIndRef(xRefTable *model.XRefTable, fontID string) *types.IndirectRef { + + indRef, ok := xRefTable.FillFonts[fontID] + if ok { + return &indRef } - for k, v := range d { - //fmt.Printf("%s %s\n", k, v) + for k, v := range xRefTable.FillFonts { if strings.HasPrefix(k, fontID) || strings.HasPrefix(fontID, k) { - indRef, _ := v.(types.IndirectRef) - return &indRef, nil + return &v } } - return nil, nil + return nil } func FontIndRef(fName string, ctx *model.Context, fonts map[string]types.IndirectRef) (*types.IndirectRef, error) { @@ -234,142 +239,57 @@ func FontIndRef(fName string, ctx *model.Context, fonts map[string]types.Indirec return nil, nil } -func ensureCorrectFontIndRef( - ctx *model.Context, - fontIndRef **types.IndirectRef, - fName string, - fonts map[string]types.IndirectRef) error { - - d, err := ctx.DereferenceDict(**fontIndRef) - if err != nil { - return err - } - - if enc := d.NameEntry("Encoding"); enc != nil && *enc == "Identity-H" { - indRef, ok := fonts[fName] - if !ok { - fonts[fName] = **fontIndRef - return nil - } - if indRef != **fontIndRef { - return errors.Errorf("pdfcpu: %s: duplicate fontDicts", fName) - } - return nil - } - - indRef, err := FontIndRef(fName, ctx, fonts) - if err != nil { - return err - } - if indRef != nil { - *fontIndRef = indRef - } - - return nil -} - -func fontFromAcroDict(xRefTable *model.XRefTable, fontIndRef *types.IndirectRef, fName, fLang *string, fontID string) error { - - // Use DA fontId from Acrodict - - s := xRefTable.Form.StringEntry("DA") - if s == nil { - if fName != nil { - return errors.Errorf("pdfcpu: unsupported font: %s", *fName) - } - return errors.Errorf("pdfcpu: unsupported fontID: %s", fontID) - } - - da := strings.Fields(*s) - rootFontID := "" - - for i := 0; i < len(da); i++ { - if da[i] == "Tf" { - if i >= 2 { - rootFontID = da[i-2][1:] - } - break - } - } - - if rootFontID == "" { - if fName != nil { - return errors.Errorf("pdfcpu: unsupported font: %s", *fName) - } - return errors.Errorf("pdfcpu: unsupported fontID: %s", fontID) - } - - fontID = rootFontID - indRef, err := formFontIndRef(xRefTable, fontID) - if err != nil { - return err - } - - *fontIndRef = *indRef - - *fName, *fLang, err = FormFontNameAndLangForID(xRefTable, *indRef) - if err != nil { - return err - } - - return nil -} - -func ensureUTF8FormFont(ctx *model.Context, fonts map[string]types.IndirectRef) (string, string, string, *types.IndirectRef, error) { +func ensureUTF8FormFont(ctx *model.Context, fonts map[string]types.IndirectRef) (string, string, string, string, *types.IndirectRef, error) { // TODO Make name of UTF-8 userfont part of pdfcpu configs. fontID, fontName := "F0", "Roboto-Regular" if indRef, ok := fonts[fontName]; ok { - return fontID, fontName, "", &indRef, nil + return fontID, fontName, "", "", &indRef, nil } for objNr, fo := range ctx.Optimize.FontObjects { if fo.FontName == fontName && fo.Prefix != "" { indRef := types.NewIndirectRef(objNr, 0) fonts[fontName] = *indRef - return fontID, fontName, "", indRef, nil + return fontID, fontName, "", "", indRef, nil } } indRef, err := pdffont.EnsureFontDict(ctx.XRefTable, fontName, "", "", false, nil) if err != nil { - return "", "", "", nil, err + return "", "", "", "", nil, err } fonts[fontName] = *indRef - return fontID, fontName, "", indRef, nil + return fontID, fontName, "", "", indRef, nil } func extractFormFontDetails( ctx *model.Context, fontID string, - fonts map[string]types.IndirectRef) (string, string, string, *types.IndirectRef, error) { + fonts map[string]types.IndirectRef) (string, string, string, string, *types.IndirectRef, error) { xRefTable := ctx.XRefTable var ( - fName, fLang string - fontIndRef *types.IndirectRef - err error + fName, fLang, fScript string + fontIndRef *types.IndirectRef + err error ) if len(fontID) > 0 { - fontIndRef, err = formFontIndRef(xRefTable, fontID) - if err != nil { - return "", "", "", nil, err - } - + fontIndRef = formFontIndRef(xRefTable, fontID) if fontIndRef != nil { - fName, fLang, err = FormFontNameAndLangForID(xRefTable, *fontIndRef) + fName, fLang, fScript, err = FormFontDetails(xRefTable, *fontIndRef) if err != nil { - return "", "", "", nil, err + return "", "", "", "", nil, err } if fName == "" { - return "", "", "", nil, errors.Errorf("pdfcpu: Unable to detect fontName for: %s", fontID) + return "", "", "", "", nil, errors.Errorf("pdfcpu: Unable to detect fontName for: %s", fontID) } } @@ -379,7 +299,7 @@ func extractFormFontDetails( return ensureUTF8FormFont(ctx, fonts) } - return fontID, fName, fLang, fontIndRef, err + return fontID, fName, fLang, fScript, fontIndRef, err } func fontFromDA(s string) (string, FormFont, error) { @@ -423,3 +343,42 @@ func fontFromDA(s string) (string, FormFont, error) { return fontID, f, nil } + +func calcFontDetailsFromDA(ctx *model.Context, d types.Dict, da *string, needUTF8 bool, fonts map[string]types.IndirectRef) (string, *FormFont, bool, *types.IndirectRef, error) { + s := locateDA(ctx, d, da) + if s == nil { + return "", nil, false, nil, errors.New("pdfcpu: missing \"DA\"") + } + + fontID, f, err := fontFromDA(*s) + if err != nil { + return "", nil, false, nil, err + } + + id, name, lang, script, fontIndRef, err := extractFormFontDetails(ctx, fontID, fonts) + if err != nil { + return "", nil, false, nil, err + } + if fontIndRef == nil { + return "", nil, false, nil, errors.New("pdfcpu: unable to detect indirect reference for font") + } + + fillFont := formFontIndRef(ctx.XRefTable, fontID) != nil + + if needUTF8 && font.IsCoreFont(name) { + id, name, lang, script, fontIndRef, err = ensureUTF8FormFont(ctx, fonts) + if err != nil { + return "", nil, false, nil, err + } + fillFont = false + } + + f.Name = name + f.Lang = lang + f.Script = script + f.FillFont = fillFont + + rtl := pdffont.RTL(lang) + + return id, &f, rtl, fontIndRef, nil +} diff --git a/pkg/pdfcpu/primitives/imageBox.go b/pkg/pdfcpu/primitives/imageBox.go index 9974050e..9f8ca4ab 100644 --- a/pkg/pdfcpu/primitives/imageBox.go +++ b/pkg/pdfcpu/primitives/imageBox.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "math" + "net" "net/http" "os" "strconv" @@ -264,21 +265,33 @@ func (ib *ImageBox) resource() (io.ReadCloser, error) { pdf := ib.pdf var f io.ReadCloser if strings.HasPrefix(ib.Src, "http") { + if pdf.Offline { + if log.CLIEnabled() { + log.CLI.Printf("pdfcpu is offline, can't get %s\n", ib.Src) + } + return nil, nil + } client := pdf.httpClient if client == nil { pdf.httpClient = &http.Client{ - Timeout: 10 * time.Second, + Timeout: time.Duration(pdf.Timeout) * time.Second, } client = pdf.httpClient } resp, err := client.Get(ib.Src) if err != nil { - if log.CLIEnabled() { - log.CLI.Printf("%v: %s\n", err, ib.Src) + if e, ok := err.(net.Error); ok && e.Timeout() { + if log.CLIEnabled() { + log.CLI.Printf("timeout: %s\n", ib.Src) + } + } else { + if log.CLIEnabled() { + log.CLI.Printf("%v: %s\n", err, ib.Src) + } } return nil, err } - if resp.StatusCode != 200 { + if resp.StatusCode != http.StatusOK { if log.CLIEnabled() { log.CLI.Printf("http status %d: %s\n", resp.StatusCode, ib.Src) } @@ -316,7 +329,7 @@ func (ib *ImageBox) imageResource(pageImages, images model.ImageMap, pageNr int) if ib.pdf.Update() { - sd, w, h, err = model.CreateImageStreamDict(pdf.XRefTable, f, false, false) + sd, w, h, err = model.CreateImageStreamDict(pdf.XRefTable, f) if err != nil { return nil, err } @@ -347,7 +360,7 @@ func (ib *ImageBox) imageResource(pageImages, images model.ImageMap, pageNr int) } id = imgResIDs.NewIDForPrefix("Im", len(pageImages)) } else { - indRef, w, h, err = model.CreateImageResource(pdf.XRefTable, f, false, false) + indRef, w, h, err = model.CreateImageResource(pdf.XRefTable, f) if err != nil { return nil, err } @@ -394,16 +407,20 @@ func (ib *ImageBox) createLink(p *model.Page, pageNr int, r *types.Rectangle, m id := fmt.Sprintf("l%d%d", pageNr, len(p.LinkAnnots)) ann := model.NewLinkAnnotation( - *ql.EnclosingRectangle(5.0), - types.QuadPoints{ql}, - nil, - ib.Url, - id, - 0, - 0, - model.BSSolid, - nil, - false) + *ql.EnclosingRectangle(5.0), // rect + 0, // apObjNr + "", // contents + id, // id + "", // modDate + 0, // f + &color.Red, // borderCol + nil, // dest + ib.Url, // uri + types.QuadPoints{ql}, // quad + false, // border + 0, // borderWidth + model.BSSolid, // borderStyle + ) p.LinkAnnots = append(p.LinkAnnots, ann) } diff --git a/pkg/pdfcpu/primitives/listBox.go b/pkg/pdfcpu/primitives/listBox.go index 69a85b7d..b8f13cb9 100644 --- a/pkg/pdfcpu/primitives/listBox.go +++ b/pkg/pdfcpu/primitives/listBox.go @@ -24,7 +24,6 @@ import ( "github.com/angel-one/pdfcpu/pkg/font" "github.com/angel-one/pdfcpu/pkg/pdfcpu/color" - pdffont "github.com/angel-one/pdfcpu/pkg/pdfcpu/font" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -302,35 +301,15 @@ func (lb *ListBox) validate() error { return lb.validateTab() } -func (lb *ListBox) calcFontFromDA(ctx *model.Context, d types.Dict, fonts map[string]types.IndirectRef) (*types.IndirectRef, error) { - - s := d.StringEntry("DA") - if s == nil { - s = ctx.Form.StringEntry("DA") - if s == nil { - return nil, errors.New("pdfcpu: listbox missing \"DA\"") - } - } - - fontID, f, err := fontFromDA(*s) - if err != nil { - return nil, err - } - - lb.Font, lb.fontID = &f, fontID - - id, name, lang, fontIndRef, err := extractFormFontDetails(ctx, lb.fontID, fonts) +func (lb *ListBox) calcFontFromDA(ctx *model.Context, d types.Dict, da *string, fonts map[string]types.IndirectRef) (*types.IndirectRef, error) { + id, font, rtl, fontIndRef, err := calcFontDetailsFromDA(ctx, d, da, false, fonts) if err != nil { return nil, err } - if fontIndRef == nil { - return nil, errors.New("pdfcpu: unable to detect indirect reference for font") - } lb.fontID = id - lb.Font.Name = name - lb.Font.Lang = lang - lb.RTL = pdffont.RTL(lang) + lb.Font = font + lb.RTL = rtl return fontIndRef, nil } @@ -481,7 +460,7 @@ func (lb *ListBox) renderN(xRefTable *model.XRefTable) ([]byte, error) { s = model.DecodeUTF8ToByte(s) } lineBB := model.CalcBoundingBox(s, 0, 0, f.Name, f.Size) - s = model.PrepBytes(xRefTable, s, f.Name, true, lb.RTL) + s = model.PrepBytes(xRefTable, s, f.Name, true, lb.RTL, f.FillFont) x := 2 * boWidth if x == 0 { x = 2 @@ -615,7 +594,7 @@ func (lb *ListBox) handleVAndDV(d types.Dict) error { ind = append(ind, types.Integer(i)) } } - s, err := types.EscapeUTF16String(v) + s, err := types.EscapedUTF16String(v) if err != nil { return err } @@ -634,7 +613,7 @@ func (lb *ListBox) handleVAndDV(d types.Dict) error { arr = types.Array{} for _, v := range lb.Defaults { - s, err := types.EscapeUTF16String(v) + s, err := types.EscapedUTF16String(v) if err != nil { return err } @@ -653,14 +632,14 @@ func (lb *ListBox) handleVAndDV(d types.Dict) error { func (lb *ListBox) prepareDict(fonts model.FontMap) (types.Dict, error) { pdf := lb.pdf - id, err := types.EscapeUTF16String(lb.ID) + id, err := types.EscapedUTF16String(lb.ID) if err != nil { return nil, err } opt := types.Array{} for _, s := range lb.Options { - s1, err := types.EscapeUTF16String(s) + s1, err := types.EscapedUTF16String(s) if err != nil { return nil, err } @@ -684,7 +663,7 @@ func (lb *ListBox) prepareDict(fonts model.FontMap) (types.Dict, error) { ) if lb.Tip != "" { - tu, err := types.EscapeUTF16String(lb.Tip) + tu, err := types.EscapedUTF16String(lb.Tip) if err != nil { return nil, err } @@ -871,18 +850,21 @@ func NewListBox( d types.Dict, opts []string, ind types.Array, + da *string, fonts map[string]types.IndirectRef) (*ListBox, *types.IndirectRef, error) { lb := &ListBox{Options: opts, Ind: ind} - bb, err := ctx.RectForArray(d.ArrayEntry("Rect")) + obj, _ := d.Find("Rect") + arr, _ := ctx.DereferenceArray(obj) + bb, err := ctx.RectForArray(arr) if err != nil { return nil, nil, err } lb.BoundingBox = types.RectForDim(bb.Width(), bb.Height()) - fontIndRef, err := lb.calcFontFromDA(ctx, d, fonts) + fontIndRef, err := lb.calcFontFromDA(ctx, d, da, fonts) if err != nil { return nil, nil, err } @@ -962,9 +944,9 @@ func updateForm(xRefTable *model.XRefTable, bb []byte, indRef *types.IndirectRef return nil } -func renderListBoxAP(ctx *model.Context, d types.Dict, opts []string, ind types.Array, fonts map[string]types.IndirectRef) error { +func renderListBoxAP(ctx *model.Context, d types.Dict, opts []string, ind types.Array, da *string, fonts map[string]types.IndirectRef) error { - lb, fontIndRef, err := NewListBox(ctx, d, opts, ind, fonts) + lb, fontIndRef, err := NewListBox(ctx, d, opts, ind, da, fonts) if err != nil { return err } @@ -984,9 +966,9 @@ func renderListBoxAP(ctx *model.Context, d types.Dict, opts []string, ind types. return nil } -func refreshListBoxAP(ctx *model.Context, d types.Dict, opts []string, ind types.Array, fonts map[string]types.IndirectRef, irN *types.IndirectRef) error { +func refreshListBoxAP(ctx *model.Context, d types.Dict, opts []string, ind types.Array, da *string, fonts map[string]types.IndirectRef, irN *types.IndirectRef) error { - lb, _, err := NewListBox(ctx, d, opts, ind, fonts) + lb, _, err := NewListBox(ctx, d, opts, ind, da, fonts) if err != nil { return err } @@ -999,11 +981,11 @@ func refreshListBoxAP(ctx *model.Context, d types.Dict, opts []string, ind types return updateForm(ctx.XRefTable, bb, irN) } -func EnsureListBoxAP(ctx *model.Context, d types.Dict, opts []string, ind types.Array, fonts map[string]types.IndirectRef) error { +func EnsureListBoxAP(ctx *model.Context, d types.Dict, opts []string, ind types.Array, da *string, fonts map[string]types.IndirectRef) error { apd := d.DictEntry("AP") if apd == nil { - return renderListBoxAP(ctx, d, opts, ind, fonts) + return renderListBoxAP(ctx, d, opts, ind, da, fonts) } irN := apd.IndirectRefEntry("N") @@ -1011,5 +993,5 @@ func EnsureListBoxAP(ctx *model.Context, d types.Dict, opts []string, ind types. return nil } - return refreshListBoxAP(ctx, d, opts, ind, fonts, irN) + return refreshListBoxAP(ctx, d, opts, ind, da, fonts, irN) } diff --git a/pkg/pdfcpu/primitives/pdf.go b/pkg/pdfcpu/primitives/pdf.go index 0df750b3..212cfffa 100644 --- a/pkg/pdfcpu/primitives/pdf.go +++ b/pkg/pdfcpu/primitives/pdf.go @@ -115,6 +115,8 @@ type PDF struct { RadioBtnAPs map[float64]*AP `json:"-"` HasForm bool `json:"-"` OldFieldIDs types.StringSet `json:"-"` + Offline bool `json:"-"` + Timeout int `json:"-"` httpClient *http.Client } @@ -492,7 +494,7 @@ func (pdf *PDF) DuplicateField(ID string) bool { if pdf.FieldIDs[ID] || pdf.OldFieldIDs[ID] { return true } - oldID, err := types.EscapeUTF16String(ID) + oldID, err := types.EscapedUTF16String(ID) if err != nil { return true } diff --git a/pkg/pdfcpu/primitives/radioButtonGroup.go b/pkg/pdfcpu/primitives/radioButtonGroup.go index 3b9c9406..d944d911 100644 --- a/pkg/pdfcpu/primitives/radioButtonGroup.go +++ b/pkg/pdfcpu/primitives/radioButtonGroup.go @@ -995,7 +995,7 @@ func (rbg *RadioButtonGroup) prepareDict(p *model.Page, pageNr int, fonts model. rbg.renderButtonLabels(p, pageNr, fonts) - id, err := types.EscapeUTF16String(rbg.ID) + id, err := types.EscapedUTF16String(rbg.ID) if err != nil { return nil, nil, err } @@ -1031,7 +1031,7 @@ func (rbg *RadioButtonGroup) prepareDict(p *model.Page, pageNr int, fonts model. d["V"] = v if rbg.Tip != "" { - tu, err := types.EscapeUTF16String(rbg.Tip) + tu, err := types.EscapedUTF16String(rbg.Tip) if err != nil { return nil, nil, err } diff --git a/pkg/pdfcpu/primitives/textBox.go b/pkg/pdfcpu/primitives/textBox.go index b9b1ace1..9f8c1a7d 100644 --- a/pkg/pdfcpu/primitives/textBox.go +++ b/pkg/pdfcpu/primitives/textBox.go @@ -411,6 +411,10 @@ func (tb *TextBox) render(p *model.Page, pageNr int, fonts model.FontMap) error return err } + if len(td.Text) == 0 { + return nil + } + mTop, mRight, mBottom, mLeft, err := tb.calcMargin() if err != nil { return err diff --git a/pkg/pdfcpu/primitives/textField.go b/pkg/pdfcpu/primitives/textField.go index 7e792b7e..ad508350 100644 --- a/pkg/pdfcpu/primitives/textField.go +++ b/pkg/pdfcpu/primitives/textField.go @@ -55,6 +55,8 @@ type TextField struct { BgCol *color.SimpleColor `json:"-"` Alignment string `json:"align"` // "Left", "Center", "Right" HorAlign types.HAlignment `json:"-"` + MaxLen int `json:"maxlen"` + Comb bool `json:"comb"` RTL bool Tab int Locked bool @@ -224,41 +226,26 @@ func (tf *TextField) validate() error { return tf.validateTab() } -func (tf *TextField) calcFontFromDA(ctx *model.Context, d types.Dict, needUTF8 bool, fonts map[string]types.IndirectRef) (*types.IndirectRef, error) { +func locateDA(ctx *model.Context, d types.Dict, inhDA *string) *string { s := d.StringEntry("DA") - if s == nil { - s = ctx.Form.StringEntry("DA") - if s == nil { - return nil, errors.New("pdfcpu: textfield missing \"DA\"") - } + if s != nil { + return s } - - fontID, f, err := fontFromDA(*s) - if err != nil { - return nil, err + if inhDA != nil { + return inhDA } + return ctx.Form.StringEntry("DA") +} - tf.Font, tf.fontID = &f, fontID - - id, name, lang, fontIndRef, err := extractFormFontDetails(ctx, tf.fontID, fonts) +func (tf *TextField) calcFontFromDA(ctx *model.Context, d types.Dict, da *string, needUTF8 bool, fonts map[string]types.IndirectRef) (*types.IndirectRef, error) { + id, font, rtl, fontIndRef, err := calcFontDetailsFromDA(ctx, d, da, needUTF8, fonts) if err != nil { return nil, err } - if fontIndRef == nil { - return nil, errors.New("pdfcpu: unable to detect indirect reference for font") - } - - if needUTF8 && font.IsCoreFont(name) { - id, name, lang, fontIndRef, err = ensureUTF8FormFont(ctx, fonts) - if err != nil { - return nil, err - } - } tf.fontID = id - tf.Font.Name = name - tf.Font.Lang = lang - tf.RTL = pdffont.RTL(lang) + tf.Font = font + tf.RTL = rtl return fontIndRef, nil } @@ -381,6 +368,49 @@ func (tf *TextField) renderBackground(w io.Writer, bgCol, boCol *color.SimpleCol } } +func (tf *TextField) renderLines(xRefTable *model.XRefTable, boWidth, lh, w, y float64, lines []string, buf io.Writer) { + f := tf.Font + cjk := pdffont.CJK(f.Script, f.Lang) + for i := 0; i < len(lines); i++ { + s := lines[i] + lineBB := model.CalcBoundingBox(s, 0, 0, f.Name, f.Size) + s = model.PrepBytes(xRefTable, s, f.Name, !cjk, f.RTL(), f.FillFont) + x := 2 * boWidth + if x == 0 { + x = 2 + } + switch tf.HorAlign { + case types.AlignCenter: + x = w/2 - lineBB.Width()/2 + case types.AlignRight: + x = w - lineBB.Width() - 2 + } + fmt.Fprint(buf, "BT ") + if i == 0 { + fmt.Fprintf(buf, "/%s %d Tf %.2f %.2f %.2f RG %.2f %.2f %.2f rg ", + tf.fontID, f.Size, + f.col.R, f.col.G, f.col.B, + f.col.R, f.col.G, f.col.B) + } + + if tf.Comb && tf.MaxLen > 0 && tf.HorAlign == types.AlignLeft { + x = 0.5 + dx := w / float64(tf.MaxLen) + y0 := y + for j := 0; j < len(s) && j < tf.MaxLen; j++ { + fmt.Fprintf(buf, "%.2f %.2f Td (%c) Tj ", x, y0, s[j]) + y0 = 0 + x = dx + } + fmt.Fprint(buf, "ET ") + } else { + fmt.Fprintf(buf, "%.2f %.2f Td (%s) Tj ET ", x, y, s) + } + + y -= lh + } +} + func (tf *TextField) renderN(xRefTable *model.XRefTable) ([]byte, error) { w, h := tf.BoundingBox.Width(), tf.BoundingBox.Height() bgCol := tf.BgCol @@ -417,32 +447,7 @@ func (tf *TextField) renderN(xRefTable *model.XRefTable) ([]byte, error) { fmt.Fprintf(buf, "q 1 1 %.1f %.1f re W n ", w-2, h-2) } - cjk := pdffont.CJK(f.Script, f.Lang) - - for i := 0; i < len(lines); i++ { - s := lines[i] - lineBB := model.CalcBoundingBox(s, 0, 0, f.Name, f.Size) - s = model.PrepBytes(xRefTable, s, f.Name, !cjk, f.RTL()) - x := 2 * boWidth - if x == 0 { - x = 2 - } - switch tf.HorAlign { - case types.AlignCenter: - x = w/2 - lineBB.Width()/2 - case types.AlignRight: - x = w - lineBB.Width() - 2 - } - fmt.Fprint(buf, "BT ") - if i == 0 { - fmt.Fprintf(buf, "/%s %d Tf %.2f %.2f %.2f RG %.2f %.2f %.2f rg ", - tf.fontID, f.Size, - f.col.R, f.col.G, f.col.B, - f.col.R, f.col.G, f.col.B) - } - fmt.Fprintf(buf, "%.2f %.2f Td (%s) Tj ET ", x, y, s) - y -= lh - } + tf.renderLines(xRefTable, boWidth, lh, w, y, lines, buf) if len(lines) > 0 { fmt.Fprint(buf, "Q ") @@ -458,6 +463,7 @@ func (tf *TextField) renderN(xRefTable *model.XRefTable) ([]byte, error) { return buf.Bytes(), nil } +// unused func (tf *TextField) RefreshN(xRefTable *model.XRefTable, indRef *types.IndirectRef) error { bb, err := tf.renderN(xRefTable) if err != nil { @@ -553,6 +559,10 @@ func (tf *TextField) prepareFF() FieldFlags { ff += FieldReadOnly } + if tf.Comb { + ff += FieldComb + } + return ff } @@ -587,7 +597,7 @@ func (tf *TextField) handleBorderAndMK(d types.Dict) { func (tf *TextField) prepareDict(fonts model.FontMap) (types.Dict, error) { pdf := tf.pdf - id, err := types.EscapeUTF16String(tf.ID) + id, err := types.EscapedUTF16String(tf.ID) if err != nil { return nil, err } @@ -608,17 +618,24 @@ func (tf *TextField) prepareDict(fonts model.FontMap) (types.Dict, error) { ) if tf.Tip != "" { - tu, err := types.EscapeUTF16String(tf.Tip) + tu, err := types.EscapedUTF16String(tf.Tip) if err != nil { return nil, err } d["TU"] = types.StringLiteral(*tu) } + if tf.MaxLen > 0 { + d["MaxLen"] = types.Integer(tf.MaxLen) + } + tf.handleBorderAndMK(d) if tf.Value != "" { - s, err := types.EscapeUTF16String(tf.Value) + if tf.MaxLen > 0 && len(tf.Value) > tf.MaxLen { + return nil, errors.Errorf("pdfcpu: field overflow at %s, maxLen = %d", tf.ID, tf.MaxLen) + } + s, err := types.EscapedUTF16String(tf.Value) if err != nil { return nil, err } @@ -626,7 +643,7 @@ func (tf *TextField) prepareDict(fonts model.FontMap) (types.Dict, error) { } if tf.Default != "" { - s, err := types.EscapeUTF16String(tf.Default) + s, err := types.EscapedUTF16String(tf.Default) if err != nil { return nil, err } @@ -882,12 +899,23 @@ func NewTextField( d types.Dict, v string, multiLine bool, + comb bool, + maxLen int, + da *string, fontIndRef *types.IndirectRef, fonts map[string]types.IndirectRef) (*TextField, *types.IndirectRef, error) { - tf := &TextField{Value: v, Multiline: multiLine} + tf := &TextField{Value: v, Multiline: multiLine, Comb: comb} + + i := d.IntEntry("MaxLen") // Inheritable! + if i != nil { + maxLen = *i + } + tf.MaxLen = maxLen - bb, err := ctx.RectForArray(d.ArrayEntry("Rect")) + obj, _ := d.Find("Rect") + arr, _ := ctx.DereferenceArray(obj) + bb, err := ctx.RectForArray(arr) if err != nil { return nil, nil, err } @@ -895,7 +923,7 @@ func NewTextField( tf.BoundingBox = types.RectForDim(bb.Width(), bb.Height()) if fontIndRef == nil { - if fontIndRef, err = tf.calcFontFromDA(ctx, d, hasUTF(v), fonts); err != nil { + if fontIndRef, err = tf.calcFontFromDA(ctx, d, da, hasUTF(v), fonts); err != nil { return nil, nil, err } } @@ -922,14 +950,14 @@ func NewTextField( return tf, fontIndRef, nil } -func renderTextFieldAP(ctx *model.Context, d types.Dict, v string, multiLine bool, fonts map[string]types.IndirectRef) error { +func renderTextFieldAP(ctx *model.Context, d types.Dict, v string, multiLine, comb bool, maxLen int, da *string, fonts map[string]types.IndirectRef) error { if ap := d.DictEntry("AP"); ap != nil { if err := ctx.DeleteObject(ap); err != nil { return err } } - tf, fontIndRef, err := NewTextField(ctx, d, v, multiLine, nil, fonts) + tf, fontIndRef, err := NewTextField(ctx, d, v, multiLine, comb, maxLen, da, nil, fonts) if err != nil { return err } @@ -949,78 +977,41 @@ func renderTextFieldAP(ctx *model.Context, d types.Dict, v string, multiLine boo return nil } -func EnsureTextFieldAP(ctx *model.Context, d types.Dict, v string, multiLine bool, fonts map[string]types.IndirectRef) error { - ap := d.DictEntry("AP") - if ap == nil { - return renderTextFieldAP(ctx, d, v, multiLine, fonts) - } - - irN := ap.IndirectRefEntry("N") - if irN == nil { - return renderTextFieldAP(ctx, d, v, multiLine, fonts) - } - - sd, _, err := ctx.DereferenceStreamDict(*irN) - if err != nil { - return err - } - - d1 := sd.DictEntry("Resources") - if d1 == nil { - return renderTextFieldAP(ctx, d, v, multiLine, fonts) - } - - fd := d1.DictEntry("Font") - if fd == nil { - return renderTextFieldAP(ctx, d, v, multiLine, fonts) - } - - s := d.StringEntry("DA") - if s == nil { - s = ctx.Form.StringEntry("DA") - if s == nil { - return errors.New("pdfcpu: textfield missing \"DA\"") - } - } - - fontID, f, err := fontFromDA(*s) - if err != nil { - return err - } - - var prefix, name, lang string +func fontAttrs(ctx *model.Context, fd types.Dict, fontID, text string, fonts map[string]types.IndirectRef) (string, string, string, string, *types.IndirectRef, error) { + var prefix, name, lang, script string + var err error fontIndRef := fd.IndirectRefEntry(fontID) if fontIndRef == nil { // create utf8 font * save as indRef - fontID, name, lang, fontIndRef, err = ensureUTF8FormFont(ctx, fonts) + fontID, name, lang, script, fontIndRef, err = ensureUTF8FormFont(ctx, fonts) if err != nil { - return err + return "", "", "", "", nil, err } fd[fontID] = *fontIndRef } else { objNr := int(fontIndRef.ObjectNumber) fontDict, err := ctx.DereferenceDict(*fontIndRef) if err != nil { - return err + return "", "", "", "", nil, err } if fontDict == nil { // create utf8 font * save as indRef - fontID, name, lang, fontIndRef, err = ensureUTF8FormFont(ctx, fonts) + fontID, name, lang, script, fontIndRef, err = ensureUTF8FormFont(ctx, fonts) if err != nil { - return err + return "", "", "", "", nil, err } fd[fontID] = *fontIndRef } else { prefix, name, err = pdffont.Name(ctx.XRefTable, fontDict, objNr) if err != nil { - return err + return "", "", "", "", nil, err } - if len(prefix) == 0 && hasUTF(v) { + if !font.SupportedFont(name) || (len(prefix) == 0 && hasUTF(text)) { // create utf8 font * save as indRef - fontID, name, lang, fontIndRef, err = ensureUTF8FormFont(ctx, fonts) + fontID, name, lang, script, fontIndRef, err = ensureUTF8FormFont(ctx, fonts) if err != nil { - return err + return "", "", "", "", nil, err } fd[fontID] = *fontIndRef } else { @@ -1029,17 +1020,78 @@ func EnsureTextFieldAP(ctx *model.Context, d types.Dict, v string, multiLine boo } } - tf, _, err := NewTextField(ctx, d, v, multiLine, fontIndRef, fonts) + return fontID, name, lang, script, fontIndRef, nil +} + +func EnsureTextFieldAP(ctx *model.Context, d types.Dict, text string, multiLine, comb bool, maxLen int, da *string, fonts map[string]types.IndirectRef) error { + ap := d.DictEntry("AP") + if ap == nil { + return renderTextFieldAP(ctx, d, text, multiLine, comb, maxLen, da, fonts) + } + + irN := ap.IndirectRefEntry("N") + if irN == nil { + return renderTextFieldAP(ctx, d, text, multiLine, comb, maxLen, da, fonts) + } + + sd, _, err := ctx.DereferenceStreamDict(*irN) if err != nil { return err } - tf.Font = &f + obj, ok := sd.Find("Resources") + if !ok { + return renderTextFieldAP(ctx, d, text, multiLine, comb, maxLen, da, fonts) + } + + d1, err := ctx.DereferenceDict(obj) + if err != nil { + return err + } + if d1 == nil { + return renderTextFieldAP(ctx, d, text, multiLine, comb, maxLen, da, fonts) + } + + fd := d1.DictEntry("Font") + if fd == nil { + return renderTextFieldAP(ctx, d, text, multiLine, comb, maxLen, da, fonts) + } + + s := locateDA(ctx, d, da) + if s == nil { + return errors.New("pdfcpu: textfield missing \"DA\"") + } + + fontID, f, err := fontFromDA(*s) + if err != nil { + return err + } + + fontID, name, lang, script, fontIndRef, err := fontAttrs(ctx, fd, fontID, text, fonts) + if err != nil { + return err + } + + fillFont := formFontIndRef(ctx.XRefTable, fontID) != nil + + tf, _, err := NewTextField(ctx, d, text, multiLine, comb, maxLen, da, fontIndRef, fonts) + if err != nil { + return err + } + + f.Name = name + f.Lang = lang + f.Script = script + f.FillFont = fillFont + tf.fontID = fontID - tf.Font.Name = name - tf.Font.Lang = lang + tf.Font = &f tf.RTL = pdffont.RTL(lang) + if !font.SupportedFont(name) { + return errors.Errorf("pdfcpu: font unavailable: %s", name) + } + bb, err := tf.renderN(ctx.XRefTable) if err != nil { return err diff --git a/pkg/pdfcpu/property.go b/pkg/pdfcpu/property.go index 1b9434c6..6ff25231 100644 --- a/pkg/pdfcpu/property.go +++ b/pkg/pdfcpu/property.go @@ -51,9 +51,12 @@ func PropertiesAdd(ctx *model.Context, properties map[string]string) error { d, _ := ctx.DereferenceDict(*ctx.Info) for k, v := range properties { - k1 := types.UTF8ToCP1252(k) - d[k1] = types.StringLiteral(v) - ctx.Properties[k1] = v + s, err := types.EscapedUTF16String(v) + if err != nil { + return err + } + d[k] = types.StringLiteral(*s) + ctx.Properties[k] = *s } return nil @@ -65,6 +68,7 @@ func PropertiesRemove(ctx *model.Context, properties []string) (bool, error) { if ctx.Info == nil { return false, nil } + d, err := ctx.DereferenceDict(*ctx.Info) if err != nil || d == nil { return false, err @@ -73,8 +77,7 @@ func PropertiesRemove(ctx *model.Context, properties []string) (bool, error) { if len(properties) == 0 { // Remove all properties. for k := range ctx.Properties { - k1 := types.UTF8ToCP1252(k) - delete(d, k1) + delete(d, types.EncodeName(k)) } ctx.Properties = map[string]string{} return true, nil @@ -82,11 +85,10 @@ func PropertiesRemove(ctx *model.Context, properties []string) (bool, error) { var removed bool for _, k := range properties { - k1 := types.UTF8ToCP1252(k) - _, ok := d[k1] + _, ok := d[k] if ok && !removed { - delete(d, k1) - delete(ctx.Properties, k1) + delete(d, k) + delete(ctx.Properties, k) removed = true } } diff --git a/pkg/pdfcpu/read.go b/pkg/pdfcpu/read.go index 469dc333..390d7a54 100644 --- a/pkg/pdfcpu/read.go +++ b/pkg/pdfcpu/read.go @@ -20,11 +20,13 @@ import ( "bufio" "bytes" "context" + "fmt" "io" "os" "sort" "strconv" "strings" + "unicode" "github.com/angel-one/pdfcpu/pkg/filter" "github.com/angel-one/pdfcpu/pkg/log" @@ -36,12 +38,16 @@ import ( const ( defaultBufSize = 1024 + maximumBufSize = 1024 * 1024 ) var ( - ErrWrongPassword = errors.New("pdfcpu: please provide the correct password") - ErrCorruptHeader = errors.New("pdfcpu: no header version available") - zero int64 = 0 + ErrCorruptHeader = errors.New("pdfcpu: no header version available") + ErrMissingXRefSection = errors.New("pdfcpu: can't detect last xref section") + ErrReferenceDoesNotExist = errors.New("pdfcpu: referenced object does not exist") + ErrWrongPassword = errors.New("pdfcpu: please provide the correct password") + + zero int64 = 0 ) // ReadFile reads in a PDF file and builds an internal structure holding its cross reference table aka the PDF model context. @@ -87,6 +93,10 @@ func ReadWithContext(c context.Context, rs io.ReadSeeker, conf *model.Configurat return nil, err } + if ctx.Read.FileSize == 0 { + return nil, errors.New("The file could not be opened because it is empty.") + } + if log.InfoEnabled() { if ctx.Reader15 { log.Info.Println("PDF Version 1.5 conforming reader") @@ -102,13 +112,15 @@ func ReadWithContext(c context.Context, rs io.ReadSeeker, conf *model.Configurat // Make all objects explicitly available (load into memory) in corresponding xRefTable entries. // Also decode any involved object streams. - if err = dereferenceXRefTable(c, ctx, conf); err != nil { + if err = dereferenceXRefTable(c, ctx); err != nil { return nil, err } // Some PDFWriters write an incorrect Size into trailer. - if *ctx.XRefTable.Size < len(ctx.XRefTable.Table) { - *ctx.XRefTable.Size = len(ctx.XRefTable.Table) + if ctx.XRefTable.Size == nil || *ctx.XRefTable.Size != ctx.MaxObjNr+1 { + maxObjNr := ctx.MaxObjNr + 1 + ctx.XRefTable.Size = &maxObjNr + model.ShowRepaired("trailer size") } if log.ReadEnabled() { @@ -151,6 +163,16 @@ func newPositionedReader(rs io.ReadSeeker, offset *int64) (*bufio.Reader, error) return bufio.NewReader(rs), nil } +func incrEpilogIndex(s string) int { + suffixes := []string{"%%EO", "%%E", "%%", "%"} + for _, suf := range suffixes { + if strings.HasSuffix(s, suf) { + return len(s) - len(suf) + } + } + return -1 +} + // Get the file offset of the last XRefSection. // Go to end of file and search backwards for the first occurrence of startxref {offset} %%EOF func offsetLastXRefSection(ctx *model.Context, skip int64) (*int64, error) { @@ -162,11 +184,15 @@ func offsetLastXRefSection(ctx *model.Context, skip int64) (*int64, error) { offset int64 ) + if ctx.Read.FileSize < bufSize { + bufSize = ctx.Read.FileSize + } + for i := 1; offset == 0; i++ { off, err := rs.Seek(-int64(i)*bufSize-skip, io.SeekEnd) if err != nil { - return nil, errors.New("pdfcpu: can't find last xref section") + return nil, ErrMissingXRefSection } if log.ReadEnabled() { @@ -190,16 +216,22 @@ func offsetLastXRefSection(ctx *model.Context, skip int64) (*int64, error) { continue } - p := workBuf[j+len("startxref"):] + p := workBuf[j+len("startxref")+1:] posEOF := strings.Index(string(p), "%%EOF") - if posEOF == -1 { - return nil, errors.New("pdfcpu: no matching %%EOF for startxref") + if posEOF < 0 { + posEOF = incrEpilogIndex(string(p)) + if posEOF < 0 { + return nil, errors.New("pdfcpu: no matching %%EOF for startxref") + } } p = p[:posEOF] offset, err = strconv.ParseInt(strings.TrimSpace(string(p)), 10, 64) - if err != nil || offset >= ctx.Read.FileSize { - return nil, errors.New("pdfcpu: corrupted last xref section") + if err != nil { + return nil, errors.New("pdfcpu: invalid last xref section") + } + if offset >= ctx.Read.FileSize { + offset = 0 } } @@ -210,8 +242,8 @@ func offsetLastXRefSection(ctx *model.Context, skip int64) (*int64, error) { return &offset, nil } -func createXRefTableEntry(entryType string, objNr int, offset, offExtra int64, generation int) (model.XRefTableEntry, bool) { - entry := model.XRefTableEntry{Offset: &offset, Generation: &generation} +func createXRefTableEntry(entryType string, objNr int, offset, offExtra int64, generation, incr int) (model.XRefTableEntry, bool) { + entry := model.XRefTableEntry{Offset: &offset, Generation: &generation, Incr: incr} if entryType == "n" { @@ -222,12 +254,21 @@ func createXRefTableEntry(entryType string, objNr int, offset, offExtra int64, g } if offset == 0 { + if objNr == 0 { + entry.Free = true + model.ShowRepaired("obj#0") + return entry, true + } if log.InfoEnabled() { log.Info.Printf("createXRefTableEntry: Skip entry for in use object #%d with offset 0\n", objNr) } return entry, false } + if offset < 9 { + return entry, false + } + *entry.Offset += offExtra return entry, true @@ -244,14 +285,23 @@ func createXRefTableEntry(entryType string, objNr int, offset, offExtra int64, g return entry, true } -func decodeSubsection(fields []string, repairOff int) (int64, int, string, error) { - offset, err := strconv.ParseInt(fields[0], 10, 64) +func decodeSubsection(fields []string) (int64, int, string, error) { + s := fields[0] + for len(s) > 0 && !unicode.IsDigit(rune(s[0])) { + s = s[1:] + } + + offset, err := strconv.ParseInt(s, 10, 64) if err != nil { return 0, 0, "", err } - offset += int64(repairOff) - generation, err := strconv.Atoi(fields[1]) + s = "00000" + // model.ShowRepaired + if len(fields[1]) <= 5 { + s = fields[1] + } + generation, err := strconv.Atoi(s) if err != nil { return 0, 0, "", err } @@ -265,35 +315,17 @@ func decodeSubsection(fields []string, repairOff int) (int64, int, string, error } // Read next subsection entry and generate corresponding xref table entry. -func parseXRefTableEntry(xRefTable *model.XRefTable, s *bufio.Scanner, objNr int, offExtra int64, repairOff int) error { +func parseXRefTableEntry(xRefTable *model.XRefTable, fields []string, objNr int, offExtra int64, incr int) error { if log.ReadEnabled() { log.Read.Println("parseXRefTableEntry: begin") } - line, err := scanLine(s) - if err != nil { - return err - } - - if xRefTable.Exists(objNr) { - if log.ReadEnabled() { - log.Read.Printf("parseXRefTableEntry: end - Skip entry %d - already assigned\n", objNr) - } - return nil - } - - fields := strings.Fields(line) - if len(fields) != 3 || - len(fields[0]) != 10 || len(fields[1]) != 5 || len(fields[2]) != 1 { - return errors.New("pdfcpu: parseXRefTableEntry: corrupt xref subsection header") - } - - offset, generation, entryType, err := decodeSubsection(fields, repairOff) + offset, generation, entryType, err := decodeSubsection(fields) if err != nil { return err } - entry, ok := createXRefTableEntry(entryType, objNr, offset, offExtra, generation) + entry, ok := createXRefTableEntry(entryType, objNr, offset, offExtra, generation, incr) if !ok { return nil } @@ -311,38 +343,67 @@ func parseXRefTableEntry(xRefTable *model.XRefTable, s *bufio.Scanner, objNr int return nil } -// Process xRef table subsection and create corrresponding xRef table entries. -func parseXRefTableSubSection(xRefTable *model.XRefTable, s *bufio.Scanner, fields []string, offExtra int64, repairOff int) error { - if log.ReadEnabled() { - log.Read.Println("parseXRefTableSubSection: begin") - } +// Process xRef table subsection and create corresponding xRef table entries. +func parseXRefTableSubSection(xRefTable *model.XRefTable, s *bufio.Scanner, fields []string, offExtra int64, incr int) (string, error) { + var line string + trailer := false - startObjNumber, err := strconv.Atoi(fields[0]) - if err != nil { - return err - } + for !trailer && len(fields) == 2 { - objCount, err := strconv.Atoi(fields[1]) - if err != nil { - return err - } + startObjNumber, err := strconv.Atoi(fields[0]) + if err != nil { + return "", err + } - if log.ReadEnabled() { - log.Read.Printf("detected xref subsection, startObj=%d length=%d\n", startObjNumber, objCount) - } + objCount, err := strconv.Atoi(fields[1]) + if err != nil { + return "", err + } - // Process all entries of this subsection into xRefTable entries. - for i := 0; i < objCount; i++ { - if err = parseXRefTableEntry(xRefTable, s, startObjNumber+i, offExtra, repairOff); err != nil { - return err + if log.ReadEnabled() { + log.Read.Printf("detected xref subsection, startObj=%d length=%d\n", startObjNumber, objCount) } - } - if log.ReadEnabled() { - log.Read.Println("parseXRefTableSubSection: end") + // Process all entries of this subsection into xRefTable entries. + for i := 0; ; i++ { + + objNr := startObjNumber + i + + line, err = scanLine(s) + if err != nil { + return "", err + } + + fields = strings.Fields(line) + if len(fields) != 3 { + if i < objCount { + return "", errors.New("pdfcpu: incomplete xRefTable subsection detected") + } + trailer = strings.Contains(line, "trailer") + break + } + + if xRefTable.Exists(objNr) { + if log.ReadEnabled() { + log.Read.Printf("parseXRefTableEntry: end - Skip entry %d - already assigned\n", objNr) + } + continue + } + + if len(fields[0]) == 1 { // should be 10 + continue + } + + if i >= objCount { + model.ShowMsg(fmt.Sprintf("digesting extra XrefTable entry for obj#%d", objNr)) + } + if err = parseXRefTableEntry(xRefTable, fields, objNr, offExtra, incr); err != nil { + return "", err + } + } } - return nil + return line, nil } // Parse compressed object. @@ -384,8 +445,21 @@ func parseObjectStream(c context.Context, osd *types.ObjectStreamDict) error { } decodedContent := osd.Content + if decodedContent == nil { + // The actual content will be decoded lazily, only decode the prolog here. + var err error + decodedContent, err = osd.DecodeLength(int64(osd.FirstObjOffset)) + if err != nil { + return err + } + } prolog := decodedContent[:osd.FirstObjOffset] + // Remove inline comment. + if i := bytes.Index(prolog, []byte("%")); i != -1 { + prolog = prolog[:i] + } + // The separator used in the prolog shall be white space // but some PDF writers use 0x00. prolog = bytes.ReplaceAll(prolog, []byte{0x00}, []byte{0x20}) @@ -415,34 +489,12 @@ func parseObjectStream(c context.Context, osd *types.ObjectStreamDict) error { offset += osd.FirstObjOffset if i > 0 { - dstr := string(decodedContent[offsetOld:offset]) - if log.ReadEnabled() { - log.Read.Printf("parseObjectStream: objString = %s\n", dstr) - } - o, err := compressedObject(c, dstr) - if err != nil { - return err - } - - if log.ReadEnabled() { - log.Read.Printf("parseObjectStream: [%d] = obj %s:\n%s\n", i/2-1, objs[i-2], o) - } + o := types.NewLazyObjectStreamObject(osd, offsetOld, offset, compressedObject) objArray = append(objArray, o) } if i == len(objs)-2 { - dstr := string(decodedContent[offset:]) - if log.ReadEnabled() { - log.Read.Printf("parseObjectStream: objString = %s\n", dstr) - } - o, err := compressedObject(c, dstr) - if err != nil { - return err - } - - if log.ReadEnabled() { - log.Read.Printf("parseObjectStream: [%d] = obj %s:\n%s\n", i/2, objs[i], o) - } + o := types.NewLazyObjectStreamObject(osd, offset, -1, compressedObject) objArray = append(objArray, o) } @@ -458,10 +510,10 @@ func parseObjectStream(c context.Context, osd *types.ObjectStreamDict) error { return nil } -func createXRefTableEntryFromXRefStream(entry byte, objNr int, c2, c3, offExtra int64, objStreams types.IntSet) model.XRefTableEntry { +func createXRefTableEntryFromXRefStream(entryType int64, objNr int, c2, c3, offExtra int64, objStreams types.IntSet, incr int) model.XRefTableEntry { var xRefTableEntry model.XRefTableEntry - switch entry { + switch entryType { case 0x00: // free object @@ -475,7 +527,8 @@ func createXRefTableEntryFromXRefStream(entry byte, objNr int, c2, c3, offExtra Free: true, Compressed: false, Offset: &c2, - Generation: &g} + Generation: &g, + Incr: incr} case 0x01: // in use object @@ -491,7 +544,8 @@ func createXRefTableEntryFromXRefStream(entry byte, objNr int, c2, c3, offExtra Free: false, Compressed: false, Offset: &c2, - Generation: &g} + Generation: &g, + Incr: incr} case 0x02: // compressed object @@ -507,7 +561,8 @@ func createXRefTableEntryFromXRefStream(entry byte, objNr int, c2, c3, offExtra Free: false, Compressed: true, ObjectStream: &objNumberRef, - ObjectStreamInd: &objIndex} + ObjectStreamInd: &objIndex, + Incr: incr} objStreams[objNumberRef] = true } @@ -516,7 +571,7 @@ func createXRefTableEntryFromXRefStream(entry byte, objNr int, c2, c3, offExtra } // For each object embedded in this xRefStream create the corresponding xRef table entry. -func extractXRefTableEntriesFromXRefStream(buf []byte, offExtra int64, xsd *types.XRefStreamDict, ctx *model.Context) error { +func extractXRefTableEntriesFromXRefStream(buf []byte, offExtra int64, xsd *types.XRefStreamDict, ctx *model.Context, incr int) error { if log.ReadEnabled() { log.Read.Printf("extractXRefTableEntriesFromXRefStream begin") } @@ -535,7 +590,7 @@ func extractXRefTableEntriesFromXRefStream(buf []byte, offExtra int64, xsd *type log.Read.Printf("extractXRefTableEntriesFromXRefStream: begin xrefEntryLen = %d\n", xrefEntryLen) } - if len(buf)%xrefEntryLen > 0 { + if xrefEntryLen != 0 && len(buf)%xrefEntryLen > 0 { return errors.New("pdfcpu: extractXRefTableEntriesFromXRefStream: corrupt xrefstream") } @@ -564,20 +619,28 @@ func extractXRefTableEntriesFromXRefStream(buf []byte, offExtra int64, xsd *type for i := 0; i < len(buf) && j < len(xsd.Objects); i += xrefEntryLen { objNr := xsd.Objects[j] - i2Start := i + i1 - c2 := bufToInt64(buf[i2Start : i2Start+i2]) - c3 := bufToInt64(buf[i2Start+i2 : i2Start+i2+i3]) - - entry := createXRefTableEntryFromXRefStream(buf[i], objNr, c2, c3, offExtra, ctx.Read.ObjectStreams) if ctx.XRefTable.Exists(objNr) { if log.ReadEnabled() { log.Read.Printf("extractXRefTableEntriesFromXRefStream: Skip entry %d - already assigned\n", objNr) } + j++ + continue + } + + var c1 int64 + if i1 == 0 { + // If the first element is zero, the type field shall not be present, + // and shall default to type 1. + c1 = 1 } else { - ctx.Table[objNr] = &entry + c1 = bufToInt64(buf[i : i+i1]) } + c2 := bufToInt64(buf[i+i1 : i+i1+i2]) + c3 := bufToInt64(buf[i+i1+i2 : i+i1+i2+i3]) + entry := createXRefTableEntryFromXRefStream(c1, objNr, c2, c3, offExtra, ctx.Read.ObjectStreams, incr) + ctx.Table[objNr] = &entry j++ } @@ -623,7 +686,7 @@ func xRefStreamDict(c context.Context, ctx *model.Context, o types.Object, objNr return model.ParseXRefStreamDict(&sd) } -func processXRefStream(ctx *model.Context, xsd *types.XRefStreamDict, objNr, genNr *int, offset *int64, offExtra int64) (prevOffset *int64, err error) { +func processXRefStream(ctx *model.Context, xsd *types.XRefStreamDict, objNr *int, offset *int64, offExtra int64, incr int) (prevOffset *int64, err error) { if log.ReadEnabled() { log.Read.Println("processXRefStream: begin") } @@ -633,25 +696,32 @@ func processXRefStream(ctx *model.Context, xsd *types.XRefStreamDict, objNr, gen } // Parse xRefStream and create xRefTable entries for embedded objects. - if err = extractXRefTableEntriesFromXRefStream(xsd.Content, offExtra, xsd, ctx); err != nil { + if err = extractXRefTableEntriesFromXRefStream(xsd.Content, offExtra, xsd, ctx, incr); err != nil { return nil, err } *offset += offExtra - entry := - model.XRefTableEntry{ - Free: false, - Offset: offset, - Generation: genNr, - Object: *xsd} - - if log.ReadEnabled() { - log.Read.Printf("processXRefStream: Insert new xRefTable entry for Object %d\n", *objNr) + if entry, ok := ctx.Table[*objNr]; ok && entry.Offset != nil && *entry.Offset == *offset { + entry.Object = *xsd } - ctx.Table[*objNr] = &entry - ctx.Read.XRefStreams[*objNr] = true + ////////////////// + // entry := + // model.XRefTableEntry{ + // Free: false, + // Offset: offset, + // Generation: genNr, + // Object: *xsd} + + // if log.ReadEnabled() { + // log.Read.Printf("processXRefStream: Insert new xRefTable entry for Object %d\n", *objNr) + // } + + // ctx.Table[*objNr] = &entry + // ctx.Read.XRefStreams[*objNr] = true + /////////////////// + prevOffset = xsd.PreviousOffset if log.ReadEnabled() { @@ -662,12 +732,12 @@ func processXRefStream(ctx *model.Context, xsd *types.XRefStreamDict, objNr, gen } // Parse xRef stream and setup xrefTable entries for all embedded objects and the xref stream dict. -func parseXRefStream(c context.Context, ctx *model.Context, rd io.Reader, offset *int64, offExtra int64) (prevOffset *int64, err error) { +func parseXRefStream(c context.Context, ctx *model.Context, rd io.Reader, offset *int64, offExtra int64, incr int) (prevOffset *int64, err error) { if log.ReadEnabled() { log.Read.Printf("parseXRefStream: begin at offset %d\n", *offset) } - buf, endInd, streamInd, streamOffset, err := buffer(rd) + buf, endInd, streamInd, streamOffset, err := buffer(c, rd) if err != nil { return nil, err } @@ -713,11 +783,11 @@ func parseXRefStream(c context.Context, ctx *model.Context, rd io.Reader, offset return nil, err } - return processXRefStream(ctx, xsd, objNr, genNr, offset, offExtra) + return processXRefStream(ctx, xsd, objNr, offset, offExtra, incr) } // Parse an xRefStream for a hybrid PDF file. -func parseHybridXRefStream(c context.Context, ctx *model.Context, offset *int64, offExtra int64) error { +func parseHybridXRefStream(c context.Context, ctx *model.Context, offset *int64, offExtra int64, incr int) error { if log.ReadEnabled() { log.Read.Println("parseHybridXRefStream: begin") } @@ -727,7 +797,7 @@ func parseHybridXRefStream(c context.Context, ctx *model.Context, offset *int64, return err } - if _, err = parseXRefStream(c, ctx, rd, offset, offExtra); err != nil { + if _, err = parseXRefStream(c, ctx, rd, offset, offExtra, incr); err != nil { return err } @@ -775,6 +845,16 @@ func parseTrailerInfo(xRefTable *model.XRefTable, d types.Dict) error { func parseTrailerID(xRefTable *model.XRefTable, d types.Dict) error { arr := d.ArrayEntry("ID") if arr != nil { + if len(arr) != 2 { + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.New("pdfcpu: parseTrailerID: invalid entry \"ID\"") + } + if len(arr) != 1 { + return errors.New("pdfcpu: parseTrailerID: invalid entry \"ID\"") + } + arr = append(arr, arr[0]) + model.ShowRepaired("trailer ID") + } xRefTable.ID = arr if log.ReadEnabled() { log.Read.Printf("parseTrailerID: ID object: %s\n", xRefTable.ID) @@ -922,7 +1002,7 @@ func offsetPrev(ctx *model.Context, trailerDict types.Dict, offCurXRef *int64) * return offset } -func parseTrailerDict(c context.Context, ctx *model.Context, trailerDict types.Dict, offCurXRef *int64, offExtra int64) (*int64, error) { +func parseTrailerDict(c context.Context, ctx *model.Context, trailerDict types.Dict, offCurXRef *int64, offExtra int64, incr int, repairing bool) (*int64, error) { if log.ReadEnabled() { log.Read.Println("parseTrailerDict begin") } @@ -938,7 +1018,7 @@ func parseTrailerDict(c context.Context, ctx *model.Context, trailerDict types.D offset := offsetPrev(ctx, trailerDict, offCurXRef) offsetXRefStream := trailerDict.Int64Entry("XRefStm") - if offsetXRefStream == nil { + if offsetXRefStream == nil || repairing { // No cross reference stream. if !ctx.Reader15 && xRefTable.Version() >= model.V14 && !ctx.Read.Hybrid { return nil, errors.Errorf("parseTrailerDict: PDF1.4 conformant reader: found incompatible version: %s", xRefTable.VersionString()) @@ -962,7 +1042,7 @@ func parseTrailerDict(c context.Context, ctx *model.Context, trailerDict types.D // Previous XRefSection is expected to have free entries for hidden entries. // May appear in XRefSections only. if ctx.Reader15 { - if err := parseHybridXRefStream(c, ctx, offsetXRefStream, offExtra); err != nil { + if err := parseHybridXRefStream(c, ctx, offsetXRefStream, offExtra, incr); err != nil { return nil, err } } @@ -994,19 +1074,9 @@ func scanLine(s *bufio.Scanner) (s1 string, err error) { break } } - return s1, nil } -func isDict(s string) (bool, error) { - o, err := model.ParseObject(&s) - if err != nil { - return false, err - } - _, ok := o.(types.Dict) - return ok, nil -} - func scanTrailerDictStart(s *bufio.Scanner, line *string) error { l := *line var err error @@ -1027,81 +1097,31 @@ func scanTrailerDictStart(s *bufio.Scanner, line *string) error { } func scanTrailerDictRemainder(s *bufio.Scanner, line string, buf bytes.Buffer) (string, error) { - var err error - var i, j, k int - - buf.WriteString(line) - buf.WriteString("\x0a") - // log.Read.Printf("scanTrailer dictBuf after start tag: <%s>\n", line) - - line = line[2:] - - for { + var ( + i int + err error + ) - if len(line) == 0 { - if line, err = scanLine(s); err != nil { - return "", err - } - buf.WriteString(line) - buf.WriteString("\x0a") - // log.Read.Printf("scanTrailer dictBuf next line: <%s>\n", line) - } - - i = strings.Index(line, "<<") - if i < 0 { - // No << - j = strings.Index(line, ">>") - if j >= 0 { - // Yes >> - if k == 0 { - // Check for dict - ok, err := isDict(buf.String()) - if err == nil && ok { - return buf.String(), nil - } - } else { - k-- - } - line = line[j+2:] - continue - } - // No >> - line, err = scanLine(s) - if err != nil { - return "", err - } - buf.WriteString(line) - buf.WriteString("\x0a") - // log.Read.Printf("scanTrailer dictBuf next line: <%s>\n", line) - } else { - // Yes << - j = strings.Index(line, ">>") - if j < 0 { - // No >> - k++ - line = line[i+2:] - } else { - // Yes >> - if i < j { - // handle << - k++ - line = line[i+2:] - } else { - // handle >> - if k == 0 { - // Check for dict - ok, err := isDict(buf.String()) - if err == nil && ok { - return buf.String(), nil - } - } else { - k-- - } - line = line[j+2:] - } - } + for i = strings.Index(line, "startxref"); i < 0; { + if log.ReadEnabled() { + log.Read.Printf("line: <%s>\n", line) + } + buf.WriteString(line) + buf.WriteString("\x0a") + if line, err = scanLine(s); err != nil { + return "", err } + i = strings.Index(line, "startxref") + } + + line = line[:i] + if log.ReadEnabled() { + log.Read.Printf("line: <%s>\n", line) } + buf.WriteString(line[:i]) + buf.WriteString("\x0a") + + return buf.String(), nil } func scanTrailer(s *bufio.Scanner, line string) (string, error) { @@ -1110,16 +1130,14 @@ func scanTrailer(s *bufio.Scanner, line string) (string, error) { log.Read.Printf("line: <%s>\n", line) } - // Scan for dict start tag "<<". if err := scanTrailerDictStart(s, &line); err != nil { return "", err } - // Scan for dict end tag ">>" but account for inner dicts. return scanTrailerDictRemainder(s, line, buf) } -func processTrailer(c context.Context, ctx *model.Context, s *bufio.Scanner, line string, offCurXRef *int64, offExtra int64) (*int64, error) { +func processTrailer(c context.Context, ctx *model.Context, s *bufio.Scanner, line string, offCurXRef *int64, offExtra int64, incr int, repairing bool) (*int64, error) { var trailerString string if line != "trailer" { @@ -1156,53 +1174,46 @@ func processTrailer(c context.Context, ctx *model.Context, s *bufio.Scanner, lin log.Read.Printf("processTrailer: trailerDict:\n%s\n", trailerDict) } - return parseTrailerDict(c, ctx, trailerDict, offCurXRef, offExtra) + return parseTrailerDict(c, ctx, trailerDict, offCurXRef, offExtra, incr, repairing) } // Parse xRef section into corresponding number of xRef table entries. -func parseXRefSection(c context.Context, ctx *model.Context, s *bufio.Scanner, ssCount *int, offCurXRef *int64, offExtra int64, repairOff int) (*int64, error) { +func parseXRefSection(c context.Context, ctx *model.Context, s *bufio.Scanner, fields []string, ssCount *int, offCurXRef *int64, offExtra int64, incr int) (*int64, error) { if log.ReadEnabled() { log.Read.Println("parseXRefSection begin") } - line, err := scanLine(s) - if err != nil { - return nil, err - } - - if log.ReadEnabled() { - log.Read.Printf("parseXRefSection: <%s>\n", line) - } - - fields := strings.Fields(line) - - // Process all sub sections of this xRef section. - for !strings.HasPrefix(line, "trailer") && len(fields) == 2 { + var ( + line string + err error + ) - if err = parseXRefTableSubSection(ctx.XRefTable, s, fields, offExtra, repairOff); err != nil { - return nil, err - } - *ssCount++ + if len(fields) == 0 { - // trailer or another xref table subsection ? - if line, err = scanLine(s); err != nil { + line, err = scanLine(s) + if err != nil { return nil, err } - // if empty line try next line for trailer - if len(line) == 0 { - if line, err = scanLine(s); err != nil { - return nil, err - } + if log.ReadEnabled() { + log.Read.Printf("parseXRefSection: <%s>\n", line) } fields = strings.Fields(line) } + // Process all sub sections of this xRef section. + if line, err = parseXRefTableSubSection(ctx.XRefTable, s, fields, offExtra, incr); err != nil { + return nil, err + } + *ssCount++ + if log.ReadEnabled() { log.Read.Println("parseXRefSection: All subsections read!") } + line = strings.TrimLeft(line, " ") + if !strings.HasPrefix(line, "trailer") { return nil, errors.Errorf("xrefsection: missing trailer dict, line = <%s>", line) } @@ -1211,7 +1222,7 @@ func parseXRefSection(c context.Context, ctx *model.Context, s *bufio.Scanner, s log.Read.Println("parseXRefSection: parsing trailer dict..") } - return processTrailer(c, ctx, s, line, offCurXRef, offExtra) + return processTrailer(c, ctx, s, line, offCurXRef, offExtra, incr, false) } func scanForVersion(rs io.ReadSeeker, prefix string) ([]byte, int, error) { @@ -1238,7 +1249,7 @@ func scanForVersion(rs io.ReadSeeker, prefix string) ([]byte, int, error) { i := bytes.IndexByte(curBuf, '%') if i < 0 { // no match, check next block - off += bufSize + off += len(curBuf) break } @@ -1246,6 +1257,7 @@ func scanForVersion(rs io.ReadSeeker, prefix string) ([]byte, int, error) { if i < len(curBuf)-18 { if !bytes.HasPrefix(curBuf[i:], []byte(prefix)) { // No match, keep checking + off += i + 1 curBuf = curBuf[i+1:] continue } @@ -1266,8 +1278,8 @@ func scanForVersion(rs io.ReadSeeker, prefix string) ([]byte, int, error) { buf3 := append(curBuf[i:], buf2[:n]...) if !bytes.HasPrefix(buf3, []byte(prefix)) { // No match, keep checking + off += len(curBuf) curBuf = buf2 - off += bufSize continue } off += i @@ -1328,61 +1340,105 @@ func headerVersion(rs io.ReadSeeker) (v *model.Version, eolCount int, offset int return &pdfVersion, eolCount, int64(off), nil } -func parseAndLoad(c context.Context, ctx *model.Context, line string, offset *int64) error { +func parseAndLoad(c context.Context, ctx *model.Context, line string, offset *int64, incr int, offsetPrev *int64) error { l := line objNr, generation, err := model.ParseObjectAttributes(&l) if err != nil { return err } - entry := model.XRefTableEntry{ - Free: false, - Offset: offset, - Generation: generation} + if *objNr == 0 { + return nil + } + + off := *offset - ctx.Table[*objNr] = &entry - o, err := ParseObjectWithContext(c, ctx, *entry.Offset, *objNr, *entry.Generation) + obj, err := ParseObjectWithContext(c, ctx, off, *objNr, *generation) if err != nil { return err } - entry.Object = o + if d, ok := obj.(types.Dict); ok { + if typ := d.Type(); typ != nil { + if *typ == "Catalog" { + ctx.RootDict = d + ctx.Root = types.NewIndirectRef(*objNr, *generation) + model.ShowRepaired("catalog") + } + } + } - sd, ok := o.(types.StreamDict) + *offset += int64(len(line)) + + sd, ok := obj.(types.StreamDict) if ok { if err = loadStreamDict(c, ctx, &sd, *objNr, *generation, true); err != nil { return err } - entry.Object = sd + obj = sd *offset = sd.StreamOffset + *sd.StreamLength + } + + e, found := ctx.Table[*objNr] + if !found { + entry := model.XRefTableEntry{ + Free: false, + Generation: generation, + Offset: &off, + Object: obj, + Incr: incr, + } + ctx.Table[*objNr] = &entry return nil } - *offset += int64(len(line) + ctx.Read.EolCount) + e.Offset = &off + e.Object = obj + e.Incr = incr return nil } -func showRep() { - msg := "repaired: xreftable" - if log.DebugEnabled() { - log.Debug.Println("pdfcpu " + msg) +func processObject(c context.Context, ctx *model.Context, line string, offset *int64, incr int, offsetPrev *int64) (*bufio.Scanner, error) { + if err := parseAndLoad(c, ctx, line, offset, incr, offsetPrev); err != nil { + return nil, err } - if log.ReadEnabled() { - log.Read.Println("pdfcpu " + msg) + rd, err := newPositionedReader(ctx.Read.RS, offset) + if err != nil { + return nil, err + } + s := bufio.NewScanner(rd) + s.Split(scan.LinesSingleEOL) + return s, nil +} + +func objCandidate(withinObj bool, line string) bool { + if withinObj { + return false } - if log.CLIEnabled() { - log.CLI.Println(msg) + i := strings.Index(line, "obj") + return i > 2 && strings.Index(line, "endobj") != i-3 +} + +func checkEndObj(withinObj *bool, line *string) { + if *withinObj { + i := strings.Index(*line, "endobj") + if i >= 0 { + *line = (*line)[i:] + *withinObj = false + } } } +func ensureNoStartXRef(line string, i int) bool { + return i == 0 || i > 0 && line[i-1] != 't' +} + // bypassXrefSection is a fix for digesting corrupt xref sections. // It populates the xRefTable by reading in all indirect objects line by line // and works on the assumption of a single xref section - meaning no incremental updates. -func bypassXrefSection(c context.Context, ctx *model.Context, offExtra int64, wasErr error) error { - if log.ReadEnabled() { - log.Read.Printf("bypassXRefSection after %v\n", wasErr) - } +func bypassXrefSection(c context.Context, ctx *model.Context, offExtra int64, wasErr error, incr int) error { + ctx.Table = make(map[int]*model.XRefTableEntry) var z int64 g := types.FreeHeadGeneration @@ -1392,7 +1448,6 @@ func bypassXrefSection(c context.Context, ctx *model.Context, offExtra int64, wa Generation: &g} rs := ctx.Read.RS - eolCount := ctx.Read.EolCount var offset int64 rd, err := newPositionedReader(rs, &offset) @@ -1401,12 +1456,16 @@ func bypassXrefSection(c context.Context, ctx *model.Context, offExtra int64, wa } s := bufio.NewScanner(rd) - s.Split(scan.Lines) + s.Split(scan.LinesSingleEOL) + eolCount := 1 - bb := []byte{} var ( + withinObj bool withinXref bool withinTrailer bool + prevLine string + bb []byte + offsetPrev *int64 ) for { @@ -1414,50 +1473,59 @@ func bypassXrefSection(c context.Context, ctx *model.Context, offExtra int64, wa if err != nil { break } + length := len(line) + line = types.TrimLeadingComment(line) + if len(prevLine) > 0 { + line = prevLine + line + prevLine = "" + } if withinXref { - offset += int64(len(line) + eolCount) + offset += int64(length + eolCount) if withinTrailer { + if length == 0 { + continue + } bb = append(bb, '\n') bb = append(bb, line...) - i := strings.Index(line, "startxref") - if i >= 0 { - _, err = processTrailer(c, ctx, s, string(bb), nil, offExtra) - if err == nil { - showRep() - } + if !strings.HasPrefix(line, "startxref") { + continue + } + offsetPrev, err = processTrailer(c, ctx, s, string(bb), nil, offExtra, incr, true) + if err != nil { return err } + model.ShowRepaired("xreftable") + withinXref = false + withinTrailer = false continue } i := strings.Index(line, "trailer") if i >= 0 { - bb = append(bb, line...) + bb = append([]byte{}, line...) withinTrailer = true } continue } i := strings.Index(line, "xref") - if i >= 0 { - offset += int64(len(line) + eolCount) + if ensureNoStartXRef(line, i) { + offset += int64(length + eolCount) withinXref = true continue } - i = strings.Index(line, "obj") - if i >= 0 { - if i > 2 && strings.Index(line, "endobj") != i-3 { - if err := parseAndLoad(c, ctx, line, &offset); err != nil { - return err - } - rd, err = newPositionedReader(ctx.Read.RS, &offset) - if err != nil { + checkEndObj(&withinObj, &line) + if objCandidate(withinObj, line) { + if !strings.HasSuffix(line, "obj") { + withinObj = true + if s, err = processObject(c, ctx, line, &offset, incr, offsetPrev); err != nil { return err } - s = bufio.NewScanner(rd) - s.Split(scan.Lines) continue } + prevLine = line + continue } - offset += int64(len(line) + eolCount) + + offset += int64(length + eolCount) continue } return nil @@ -1467,23 +1535,23 @@ func postProcess(ctx *model.Context, xrefSectionCount int) { // Ensure free object #0 if exactly one xref subsection // and in one of the following weird situations: if xrefSectionCount == 1 && !ctx.Exists(0) { + // Fix for #250 if *ctx.Size == len(ctx.Table)+1 { - // Fix for #262 // Create free object 0 from scratch if the free list head is missing. g0 := types.FreeHeadGeneration ctx.Table[0] = &model.XRefTableEntry{Free: true, Offset: &zero, Generation: &g0} } else { - // Fix for #250 // Create free object 0 by shifting down all objects by one. for i := 1; i <= *ctx.Size; i++ { ctx.Table[i-1] = ctx.Table[i] } delete(ctx.Table, *ctx.Size) } + model.ShowRepaired("obj#0") } } -func tryXRefSection(c context.Context, ctx *model.Context, rs io.ReadSeeker, offset *int64, offExtra int64, xrefSectionCount *int) (*int64, error) { +func tryXRefSection(c context.Context, ctx *model.Context, rs io.ReadSeeker, offset *int64, offExtra int64, xrefSectionCount *int, incr int) (*int64, error) { rd, err := newPositionedReader(rs, offset) if err != nil { return nil, err @@ -1507,10 +1575,18 @@ func tryXRefSection(c context.Context, ctx *model.Context, rs io.ReadSeeker, off if log.ReadEnabled() { log.Read.Println("tryXRefSection: found xref section") } - return parseXRefSection(c, ctx, s, xrefSectionCount, offset, offExtra, 0) + return parseXRefSection(c, ctx, s, nil, xrefSectionCount, offset, offExtra, incr) } - // Retry using next line. (Repair fix for #326) + // Repair fix for #823 + if strings.HasPrefix(line, "xref") { + fields := strings.Fields(line) + if len(fields) == 3 { + return parseXRefSection(c, ctx, s, fields[1:], xrefSectionCount, offset, offExtra, incr) + } + } + + // Repair fix for #326 if line, err = scanLine(s); err != nil { return nil, err } @@ -1519,7 +1595,7 @@ func tryXRefSection(c context.Context, ctx *model.Context, rs io.ReadSeeker, off } i := strings.Index(line, "xref") - if i >= 0 { + if i == 0 || (i > 0 && line[i-1] != 't') { // Don't confuse with "startxref". if log.ReadEnabled() { log.Read.Println("tryXRefSection: found xref section") } @@ -1527,7 +1603,8 @@ func tryXRefSection(c context.Context, ctx *model.Context, rs io.ReadSeeker, off if log.ReadEnabled() { log.Read.Printf("Repair offset: %d\n", repairOff) } - return parseXRefSection(c, ctx, s, xrefSectionCount, offset, offExtra, repairOff) + ctx.Read.RepairOffset = int64(repairOff) + return parseXRefSection(c, ctx, s, nil, xrefSectionCount, offset, offExtra, incr) } return &zero, nil @@ -1550,9 +1627,13 @@ func buildXRefTableStartingAt(c context.Context, ctx *model.Context, offset *int ctx.Read.EolCount = eolCount offs := map[int64]bool{} xrefSectionCount := 0 + incr := 0 for offset != nil { + incr++ + //fmt.Printf("Incr: %d\n", incr) + if err := c.Err(); err != nil { return err } @@ -1568,7 +1649,7 @@ func buildXRefTableStartingAt(c context.Context, ctx *model.Context, offset *int offs[*offset] = true - off, err := tryXRefSection(c, ctx, rs, offset, offExtra, &xrefSectionCount) + off, err := tryXRefSection(c, ctx, rs, offset, offExtra, &xrefSectionCount, incr) if err != nil { return err } @@ -1587,9 +1668,9 @@ func buildXRefTableStartingAt(c context.Context, ctx *model.Context, offset *int return err } - if offset, err = parseXRefStream(c, ctx, rd, offset, offExtra); err != nil { + if offset, err = parseXRefStream(c, ctx, rd, offset, offExtra, incr); err != nil { // Try fix for corrupt single xref section. - return bypassXrefSection(c, ctx, offExtra, err) + return bypassXrefSection(c, ctx, offExtra, err, incr) } } @@ -1615,7 +1696,11 @@ func readXRefTable(c context.Context, ctx *model.Context) (err error) { offset, err := offsetLastXRefSection(ctx, 0) if err != nil { - return + if err != ErrMissingXRefSection { + return err + } + zero := int64(0) + offset = &zero } ctx.Write.OffsetPrevXRef = offset @@ -1707,7 +1792,7 @@ func lastStreamMarker(streamInd *int, endInd int, line string) { } // Provide a PDF file buffer of sufficient size for parsing an object w/o stream. -func buffer(rd io.Reader) (buf []byte, endInd int, streamInd int, streamOffset int64, err error) { +func buffer(c context.Context, rd io.Reader) (buf []byte, endInd int, streamInd int, streamOffset int64, err error) { // process: # gen obj ... obj dict ... {stream ... data ... endstream} ... endobj // streamInd endInd // -1 if absent -1 if absent @@ -1715,16 +1800,24 @@ func buffer(rd io.Reader) (buf []byte, endInd int, streamInd int, streamOffset i //log.Read.Println("buffer: begin") endInd, streamInd = -1, -1 + growSize := defaultBufSize for endInd < 0 && streamInd < 0 { + if err := c.Err(); err != nil { + return nil, 0, 0, 0, err + } - if buf, err = growBufBy(buf, defaultBufSize, rd); err != nil { + if buf, err = growBufBy(buf, growSize, rd); err != nil { return nil, 0, 0, 0, err } + growSize = min(growSize*2, maximumBufSize) line := string(buf) - endInd = strings.Index(line, "endobj") - streamInd = strings.Index(line, "stream") + + endInd, streamInd, err = model.DetectKeywordsWithContext(c, line) + if err != nil { + return nil, 0, 0, 0, err + } if endInd > 0 && (streamInd < 0 || streamInd > endInd) { // No stream marker in buf detected. @@ -1833,6 +1926,16 @@ func singleFilter(c context.Context, ctx *model.Context, filterName string, d ty return []types.PDFFilter{{Name: filterName}}, nil } + if ctx.XRefTable.ValidationMode == model.ValidationRelaxed { + if arr, ok := o.(types.Array); ok && len(arr) == 0 || len(arr) == 1 && arr[0] == nil { + // w/o decode parameters. + if log.ReadEnabled() { + log.Read.Println("singleFilter: end w/o decode parms") + } + return []types.PDFFilter{{Name: filterName}}, nil + } + } + var err error d, ok := o.(types.Dict) if !ok { @@ -1853,6 +1956,17 @@ func singleFilter(c context.Context, ctx *model.Context, filterName string, d ty return []types.PDFFilter{{Name: filterName, DecodeParms: d}}, nil } +func filterArraySupportsDecodeParms(filters types.Array) bool { + for _, obj := range filters { + if name, ok := obj.(types.Name); ok { + if filter.SupportsDecodeParms(name.String()) { + return true + } + } + } + return false +} + // Return the filter pipeline associated with this stream dict. func pdfFilterPipeline(c context.Context, ctx *model.Context, dict types.Dict) ([]types.PDFFilter, error) { if log.ReadEnabled() { @@ -1895,9 +2009,13 @@ func pdfFilterPipeline(c context.Context, ctx *model.Context, dict types.Dict) ( var decodeParmsArr types.Array decodeParms, found := dict.Find("DecodeParms") if found { - decodeParmsArr, ok = decodeParms.(types.Array) - if !ok || len(decodeParmsArr) != len(filterArray) { - return nil, errors.New("pdfcpu: pdfFilterPipeline: expected decodeParms array corrupt") + if filterArraySupportsDecodeParms(filterArray) { + decodeParmsArr, ok = decodeParms.(types.Array) + if ok { + if len(decodeParmsArr) != len(filterArray) { + return nil, errors.New("pdfcpu: pdfFilterPipeline: expected decodeParms array corrupt") + } + } } } @@ -1966,7 +2084,7 @@ func object(c context.Context, ctx *model.Context, offset int64, objNr, genNr in // streamInd endInd // -1 if absent -1 if absent var buf []byte - if buf, endInd, streamInd, streamOffset, err = buffer(rd); err != nil { + if buf, endInd, streamInd, streamOffset, err = buffer(c, rd); err != nil { return nil, 0, 0, 0, err } @@ -2043,16 +2161,7 @@ func ParseObject(ctx *model.Context, offset int64, objNr, genNr int) (types.Obje return ParseObjectWithContext(context.Background(), ctx, offset, objNr, genNr) } -func ParseObjectWithContext(c context.Context, ctx *model.Context, offset int64, objNr, genNr int) (types.Object, error) { - if log.ReadEnabled() { - log.Read.Printf("ParseObject: begin, obj#%d, offset:%d\n", objNr, offset) - } - - obj, endInd, streamInd, streamOffset, err := object(c, ctx, offset, objNr, genNr) - if err != nil { - return nil, err - } - +func resolveObject(c context.Context, ctx *model.Context, obj types.Object, offset int64, objNr, genNr, endInd, streamInd int, streamOffset int64) (types.Object, error) { switch o := obj.(type) { case types.Dict: @@ -2066,7 +2175,7 @@ func ParseObjectWithContext(c context.Context, ctx *model.Context, offset int64, case types.Array: if ctx.EncKey != nil { - if _, err = decryptDeepObject(o, objNr, genNr, ctx.EncKey, ctx.AES4Strings, ctx.E.R); err != nil { + if _, err := decryptDeepObject(o, objNr, genNr, ctx.EncKey, ctx.AES4Strings, ctx.E.R); err != nil { return nil, err } } @@ -2074,21 +2183,21 @@ func ParseObjectWithContext(c context.Context, ctx *model.Context, offset int64, case types.StringLiteral: if ctx.EncKey != nil { - bb, err := decryptString(o.Value(), objNr, genNr, ctx.EncKey, ctx.AES4Strings, ctx.E.R) + sl, err := decryptStringLiteral(o, objNr, genNr, ctx.EncKey, ctx.AES4Strings, ctx.E.R) if err != nil { return nil, err } - return types.NewHexLiteral(bb), nil + return *sl, nil } return o, nil case types.HexLiteral: if ctx.EncKey != nil { - bb, err := decryptHexLiteral(o, objNr, genNr, ctx.EncKey, ctx.AES4Strings, ctx.E.R) + hl, err := decryptHexLiteral(o, objNr, genNr, ctx.EncKey, ctx.AES4Strings, ctx.E.R) if err != nil { return nil, err } - return types.NewHexLiteral(bb), nil + return *hl, nil } return o, nil @@ -2097,10 +2206,28 @@ func ParseObjectWithContext(c context.Context, ctx *model.Context, offset int64, } } +func ParseObjectWithContext(c context.Context, ctx *model.Context, offset int64, objNr, genNr int) (types.Object, error) { + if log.ReadEnabled() { + log.Read.Printf("ParseObject: begin, obj#%d, offset:%d\n", objNr, offset) + } + + obj, endInd, streamInd, streamOffset, err := object(c, ctx, offset, objNr, genNr) + if err != nil { + if ctx.XRefTable.ValidationMode == model.ValidationRelaxed { + if err == io.EOF { + err = nil + } + } + return nil, err + } + + return resolveObject(c, ctx, obj, offset, objNr, genNr, endInd, streamInd, streamOffset) +} + func dereferencedObject(c context.Context, ctx *model.Context, objNr int) (types.Object, error) { entry, ok := ctx.Find(objNr) if !ok { - return nil, errors.New("pdfcpu: dereferencedObject: unregistered object") + return nil, errors.Errorf("pdfcpu: dereferencedObject: unregistered object: %d", objNr) } if entry.Compressed { @@ -2115,6 +2242,10 @@ func dereferencedObject(c context.Context, ctx *model.Context, objNr int) (types log.Read.Printf("dereferencedObject: dereferencing object %d\n", objNr) } + if entry.Free { + return nil, ErrReferenceDoesNotExist + } + o, err := ParseObjectWithContext(c, ctx, *entry.Offset, objNr, *entry.Generation) if err != nil { return nil, errors.Wrapf(err, "dereferencedObject: problem dereferencing object %d", objNr) @@ -2124,6 +2255,14 @@ func dereferencedObject(c context.Context, ctx *model.Context, objNr int) (types return nil, errors.New("pdfcpu: dereferencedObject: object is nil") } + entry.Object = o + } else if l, ok := entry.Object.(types.LazyObjectStreamObject); ok { + o, err := l.DecodedObject(c) + if err != nil { + return nil, errors.Wrapf(err, "dereferencedObject: problem dereferencing object %d", objNr) + } + + model.ProcessRefCounts(ctx.XRefTable, o) entry.Object = o } @@ -2183,14 +2322,16 @@ func readStreamContentBlindly(rd io.Reader) (buf []byte, err error) { // Weak heuristic for reading in stream data for cases where stream length is unknown. // ...data...{eol}endstream{eol}endobj - if buf, err = growBufBy(buf, defaultBufSize, rd); err != nil { + growSize := defaultBufSize + if buf, err = growBufBy(buf, growSize, rd); err != nil { return nil, err } i := bytes.Index(buf, []byte("endstream")) if i < 0 { for i = -1; i < 0; i = bytes.Index(buf, []byte("endstream")) { - buf, err = growBufBy(buf, defaultBufSize, rd) + growSize = min(growSize*2, maximumBufSize) + buf, err = growBufBy(buf, growSize, rd) if err != nil { return nil, err } @@ -2254,58 +2395,60 @@ func readStreamContent(rd io.Reader, streamLength int) ([]byte, error) { return buf, nil } +func ensureStreamLength(sd *types.StreamDict, fixLength bool) { + l := int64(len(sd.Raw)) + if fixLength || sd.StreamLength == nil || l != *sd.StreamLength { + sd.StreamLength = &l + sd.Dict["Length"] = types.Integer(l) + } +} + // loadEncodedStreamContent loads the encoded stream content into sd. func loadEncodedStreamContent(c context.Context, ctx *model.Context, sd *types.StreamDict, fixLength bool) error { + if sd.Raw != nil { + return nil + } + if log.ReadEnabled() { log.Read.Printf("loadEncodedStreamContent: begin\n%v\n", sd) } var err error - if sd.Raw != nil { - if log.ReadEnabled() { - log.Read.Println("loadEncodedStreamContent: end, already in memory.") - } - return nil - } - // Read stream content encoded at offset with stream length. // Dereference stream length if stream length is an indirect object. if !fixLength && sd.StreamLength == nil { if sd.StreamLengthObjNr == nil { - return errors.New("pdfcpu: loadEncodedStreamContent: missing streamLength") - } - if sd.StreamLength, err = int64Object(c, ctx, *sd.StreamLengthObjNr); err != nil { - return err + if ctx.XRefTable.ValidationMode == model.ValidationStrict { + return errors.New("pdfcpu: loadEncodedStreamContent: missing streamLength") + } + model.ShowSkipped("missing stream length") } - if log.ReadEnabled() { - log.Read.Printf("loadEncodedStreamContent: new indirect streamLength:%d\n", *sd.StreamLength) + if sd.StreamLengthObjNr != nil { + if sd.StreamLength, err = int64Object(c, ctx, *sd.StreamLengthObjNr); err != nil { + if err != ErrReferenceDoesNotExist { + return err + } + } } } - newOffset := sd.StreamOffset - rd, err := newPositionedReader(ctx.Read.RS, &newOffset) + rd, err := newPositionedReader(ctx.Read.RS, &sd.StreamOffset) if err != nil { return err } l1 := 0 - if sd.StreamLength != nil { + if !fixLength && sd.StreamLength != nil { l1 = int(*sd.StreamLength) } - rawContent, err := readStreamContent(rd, l1) + sd.Raw, err = readStreamContent(rd, l1) if err != nil { return err } - l := int64(len(rawContent)) - if fixLength || l != *sd.StreamLength { - sd.StreamLength = &l - sd.Dict["Length"] = types.Integer(l) - } - - sd.Raw = rawContent + ensureStreamLength(sd, fixLength) if log.ReadEnabled() { log.Read.Printf("loadEncodedStreamContent: end: len(streamDictRaw)=%d\n", len(sd.Raw)) @@ -2340,8 +2483,7 @@ func saveDecodedStreamContent(ctx *model.Context, sd *types.StreamDict, objNr, g if sd.Raw, err = decryptStream(sd.Raw, objNr, genNr, ctx.EncKey, ctx.AES4Streams, ctx.E.R); err != nil { return err } - l := int64(len(sd.Raw)) - sd.StreamLength = &l + ensureStreamLength(sd, true) } if !decode { @@ -2497,8 +2639,8 @@ func decodeObjectStream(c context.Context, ctx *model.Context, objNr int) error return errors.Wrapf(err, "decodeObjectStream: problem dereferencing object stream %d", objNr) } - // Save decoded stream content to xRefTable. - if err = saveDecodedStreamContent(ctx, &sd, objNr, *entry.Generation, true); err != nil { + // Will only decrypt, the actual stream content is decoded later lazily. + if err = saveDecodedStreamContent(ctx, &sd, objNr, *entry.Generation, false); err != nil { if log.ReadEnabled() { log.Read.Printf("obj %d: %s", objNr, err) } @@ -2616,10 +2758,14 @@ func loadStreamDict(c context.Context, ctx *model.Context, sd *types.StreamDict, return errors.Wrapf(err, "dereferenceObject: problem dereferencing stream %d", objNr) } + // Decode stream content. + if err := saveDecodedStreamContent(ctx, sd, objNr, genNr, ctx.DecodeAllStreams); err != nil { + return err + } + ctx.Read.BinaryTotalSize += *sd.StreamLength - // Decode stream content. - return saveDecodedStreamContent(ctx, sd, objNr, genNr, ctx.DecodeAllStreams) + return nil } func updateBinaryTotalSize(ctx *model.Context, o types.Object) { @@ -2641,7 +2787,21 @@ func dereferenceAndLoad(c context.Context, ctx *model.Context, objNr int, entry // Parse object from ctx: anything goes dict, array, integer, float, streamdict... o, err := ParseObjectWithContext(c, ctx, *entry.Offset, objNr, *entry.Generation) if err != nil { - return errors.Wrapf(err, "dereferenceAndLoad: problem dereferencing object %d", objNr) + if ctx.XRefTable.ValidationMode == model.ValidationStrict { + return errors.Wrapf(err, "dereferenceAndLoad: problem dereferencing object %d", objNr) + } + if ctx.Read.RepairOffset > 0 { + o, err = ParseObjectWithContext(c, ctx, *entry.Offset+ctx.Read.RepairOffset, objNr, *entry.Generation) + } + if err != nil { + model.ShowSkipped(fmt.Sprintf("missing obj #%d", objNr)) + } + if err == model.ErrCorruptObjectOffset { + return err + } + } + if o == nil { + return nil } entry.Object = o @@ -2676,14 +2836,15 @@ func dereferenceAndLoad(c context.Context, ctx *model.Context, objNr int, entry } func dereferenceObject(c context.Context, ctx *model.Context, objNr int) error { - xRefTable := ctx.XRefTable - xRefTableSize := len(xRefTable.Table) - if log.ReadEnabled() { log.Read.Printf("dereferenceObject: begin, dereferencing object %d\n", objNr) } - entry := xRefTable.Table[objNr] + if objNr > ctx.MaxObjNr { + ctx.MaxObjNr = objNr + } + + entry := ctx.Table[objNr] if entry.Free { if log.ReadEnabled() { @@ -2693,7 +2854,7 @@ func dereferenceObject(c context.Context, ctx *model.Context, objNr int) error { } if entry.Compressed { - if err := decompressXRefTableEntry(xRefTable, objNr, entry); err != nil { + if err := decompressXRefTableEntry(ctx.XRefTable, objNr, entry); err != nil { return err } //log.Read.Printf("dereferenceObject: decompressed entry, Compressed=%v\n%s\n", entry.Compressed, entry.Object) @@ -2719,7 +2880,7 @@ func dereferenceObject(c context.Context, ctx *model.Context, objNr int) error { logStream(entry.Object) updateBinaryTotalSize(ctx, o) if log.ReadEnabled() { - log.Read.Printf("dereferenceObject: using cached object %d of %d\n<%s>\n", objNr, xRefTableSize, entry.Object) + log.Read.Printf("dereferenceObject: using cached object %d of %d\n<%s>\n", objNr, ctx.MaxObjNr+1, entry.Object) } return nil } @@ -2733,41 +2894,60 @@ func dereferenceObject(c context.Context, ctx *model.Context, objNr int) error { return nil } -func processDictRefCounts(xRefTable *model.XRefTable, d types.Dict) { - for _, e := range d { - switch o1 := e.(type) { - case types.IndirectRef: - xRefTable.IncrementRefCount(&o1) - case types.Dict: - processRefCounts(xRefTable, o1) - case types.Array: - processRefCounts(xRefTable, o1) +func dereferenceObjectsSorted(c context.Context, ctx *model.Context) error { + xRefTable := ctx.XRefTable + var keys []int + for k := range xRefTable.Table { + keys = append(keys, k) + } + sort.Ints(keys) + + for _, objNr := range keys { + if err := c.Err(); err != nil { + return err + } + if err := dereferenceObject(c, ctx, objNr); err != nil { + return err } } -} -func processArrayRefCounts(xRefTable *model.XRefTable, a types.Array) { - for _, e := range a { - switch o1 := e.(type) { - case types.IndirectRef: - xRefTable.IncrementRefCount(&o1) - case types.Dict: - processRefCounts(xRefTable, o1) - case types.Array: - processRefCounts(xRefTable, o1) + for _, objNr := range keys { + entry := xRefTable.Table[objNr] + if entry.Free || entry.Compressed { + continue + } + if err := c.Err(); err != nil { + return err } + model.ProcessRefCounts(xRefTable, entry.Object) } + + return nil } -func processRefCounts(xRefTable *model.XRefTable, o types.Object) { - switch o := o.(type) { - case types.Dict: - processDictRefCounts(xRefTable, o) - case types.StreamDict: - processDictRefCounts(xRefTable, o.Dict) - case types.Array: - processArrayRefCounts(xRefTable, o) +func dereferenceObjectsRaw(c context.Context, ctx *model.Context) error { + xRefTable := ctx.XRefTable + for objNr := range xRefTable.Table { + if err := c.Err(); err != nil { + return err + } + if err := dereferenceObject(c, ctx, objNr); err != nil { + return err + } } + + for objNr := range xRefTable.Table { + entry := xRefTable.Table[objNr] + if entry.Free || entry.Compressed { + continue + } + if err := c.Err(); err != nil { + return err + } + model.ProcessRefCounts(xRefTable, entry.Object) + } + + return nil } // Dereferences all objects including compressed objects from object streams. @@ -2776,58 +2956,21 @@ func dereferenceObjects(c context.Context, ctx *model.Context) error { log.Read.Println("dereferenceObjects: begin") } - xRefTable := ctx.XRefTable - + f := dereferenceObjectsRaw if log.StatsEnabled() { + f = dereferenceObjectsSorted + } - var keys []int - for k := range xRefTable.Table { - keys = append(keys, k) - } - sort.Ints(keys) - - for _, objNr := range keys { - if err := c.Err(); err != nil { - return err - } - if err := dereferenceObject(c, ctx, objNr); err != nil { - return err - } - } - - for _, objNr := range keys { - entry := xRefTable.Table[objNr] - if entry.Free || entry.Compressed { - continue - } - if err := c.Err(); err != nil { - return err - } - processRefCounts(xRefTable, entry.Object) + if err := f(c, ctx); err != nil { + if err != model.ErrCorruptObjectOffset { + return err } - - } else { - - for objNr := range xRefTable.Table { - if err := c.Err(); err != nil { - return err - } - if err := dereferenceObject(c, ctx, objNr); err != nil { - return err - } + if err := bypassXrefSection(c, ctx, 0, err, 1); err != nil { + return err } - - for objNr := range xRefTable.Table { - entry := xRefTable.Table[objNr] - if entry.Free || entry.Compressed { - continue - } - if err := c.Err(); err != nil { - return err - } - processRefCounts(xRefTable, entry.Object) + if err = f(c, ctx); err != nil { + return err } - } if log.ReadEnabled() { @@ -2857,7 +3000,13 @@ func identifyRootVersion(xRefTable *model.XRefTable) error { // Validate version and save corresponding constant to xRefTable. rootVersion, err := model.PDFVersion(*rootVersionStr) if err != nil { - return errors.Wrapf(err, "identifyRootVersion: unknown PDF Root version: %s\n", *rootVersionStr) + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.Wrapf(err, "identifyRootVersion: unknown PDF Root version: %s\n", *rootVersionStr) + } + rootVersion, err = model.PDFVersionRelaxed(*rootVersionStr) + if err != nil { + return errors.Wrapf(err, "identifyRootVersion: unknown PDF Root version: %s\n", *rootVersionStr) + } } xRefTable.RootVersion = &rootVersion @@ -2878,7 +3027,7 @@ func identifyRootVersion(xRefTable *model.XRefTable) error { // Parse all Objects including stream content from file and save to the corresponding xRefTableEntries. // This includes processing of object streams and linearization dicts. -func dereferenceXRefTable(c context.Context, ctx *model.Context, conf *model.Configuration) error { +func dereferenceXRefTable(c context.Context, ctx *model.Context) error { if log.ReadEnabled() { log.Read.Println("dereferenceXRefTable: begin") } @@ -2946,7 +3095,7 @@ func handlePermissions(ctx *model.Context) error { } if !ok { - return errors.New("pdfcpu: corrupted permissions after upw ok") + return errors.New("pdfcpu: invalid permissions after upw ok") } if ctx.OwnerPW == "" && ctx.UserPW == "" { @@ -2955,7 +3104,7 @@ func handlePermissions(ctx *model.Context) error { // Double check minimum permissions for pdfcpu processing. if !hasNeededPermissions(ctx.Cmd, ctx.E) { - return errors.New("pdfcpu: operation restriced via pdfcpu's permission bits setting") + return errors.New("pdfcpu: operation restricted via pdfcpu's permission bits setting") } return nil @@ -2993,7 +3142,7 @@ func setupEncryptionKey(ctx *model.Context, d types.Dict) (err error) { return err } if !ok { - return errors.New("pdfcpu: corrupted permissions after opw ok") + return errors.New("pdfcpu: invalid permissions after opw ok") } return nil } @@ -3028,6 +3177,10 @@ func checkForEncryption(c context.Context, ctx *model.Context) error { return errors.New("pdfcpu: this file is already encrypted") } + if ctx.Cmd == model.VALIDATESIGNATURE || ctx.Cmd == model.ADDSIGNATURE { + return errors.New("pdfcpu: this file is encrypted") + } + // Dereference encryptDict. d, err := dereferencedDict(c, ctx, indRef.ObjectNumber.Value()) if err != nil { diff --git a/pkg/pdfcpu/read_test.go b/pkg/pdfcpu/read_test.go index bd81d3d8..c9445e97 100644 --- a/pkg/pdfcpu/read_test.go +++ b/pkg/pdfcpu/read_test.go @@ -17,11 +17,17 @@ limitations under the License. package pdfcpu import ( + "bytes" "context" + "encoding/hex" "errors" "os" "path/filepath" "testing" + "time" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" ) func TestReadFileContext(t *testing.T) { @@ -55,3 +61,144 @@ func TestReadContext(t *testing.T) { t.Errorf("should have failed with timeout, got %s", err) } } + +func TestReadLargeDictObject(t *testing.T) { + // Test with "stream" and "endobj" inside the dictionary. + var fp bytes.Buffer + fp.WriteString("123 0 obj\n") + data := make([]byte, 10*1024*1024) + fp.WriteString("<<") + fp.WriteString("/Foo <") + fp.WriteString(hex.EncodeToString(data)) + fp.WriteString(">\n") + fp.WriteString("/Bar (stream)\n") + fp.WriteString("/Baz (endobj)\n") + fp.WriteString("/Test <") + fp.WriteString(hex.EncodeToString(data)) + fp.WriteString(">\n") + fp.WriteString(">>\n") + fp.WriteString("stream\n") + fp.WriteString("Hello world!\n") + fp.WriteString("endstream\n") + fp.WriteString("endobj\n") + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // Dummy pdfcpu context to be used for parsing a single object. + c := &model.Context{ + Read: &model.ReadContext{ + RS: bytes.NewReader(fp.Bytes()), + }, + XRefTable: &model.XRefTable{}, + } + o, err := ParseObjectWithContext(ctx, c, 0, 123, 0) + if err != nil { + t.Fatal(err) + } + + d, ok := o.(types.StreamDict) + if !ok { + t.Fatalf("expected StreamDict, got %T", o) + } + + if err := loadEncodedStreamContent(ctx, c, &d, true); err != nil { + t.Fatal(err) + } + + if foo := d.HexLiteralEntry("Foo"); foo == nil { + t.Error("expected Foo entry") + } else if expected := hex.EncodeToString(data); foo.Value() != expected { + t.Errorf("Foo value mismatch, expected %d bytes, got %d", len(expected), len(foo.Value())) + } + + if bar := d.StringEntry("Bar"); bar == nil { + t.Error("expected Bar entry") + } else if expected := "stream"; *bar != expected { + t.Errorf("expected %s for Bar, got %s", expected, *bar) + } + + if baz := d.StringEntry("Baz"); baz == nil { + t.Error("expected Baz entry") + } else if expected := "endobj"; *baz != expected { + t.Errorf("expected %s for Baz, got %s", expected, *baz) + } + + if err := d.Decode(); err != nil { + t.Fatal(err) + } + + if expected := "Hello world!"; string(d.Content) != expected { + t.Errorf("expected stream content %s, got %s", expected, string(d.Content)) + } +} + +func TestReadLargeDictObjectStream(t *testing.T) { + // Test without "stream" and "endobj" inside the dictionary. + var fp bytes.Buffer + fp.WriteString("123 0 obj\n") + data := make([]byte, 10*1024*1024) + fp.WriteString("<<") + fp.WriteString("/Foo <") + fp.WriteString(hex.EncodeToString(data)) + fp.WriteString(">\n") + fp.WriteString("/Bar (Test)\n") + fp.WriteString("/Baz <") + fp.WriteString(hex.EncodeToString(data)) + fp.WriteString(">\n") + fp.WriteString(">>\n") + fp.WriteString("stream\n") + fp.WriteString("Hello world!\n") + fp.WriteString("endstream\n") + fp.WriteString("endobj\n") + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // Dummy pdfcpu context to be used for parsing a single object. + c := &model.Context{ + Read: &model.ReadContext{ + RS: bytes.NewReader(fp.Bytes()), + }, + XRefTable: &model.XRefTable{}, + } + o, err := ParseObjectWithContext(ctx, c, 0, 123, 0) + if err != nil { + t.Fatal(err) + } + + d, ok := o.(types.StreamDict) + if !ok { + t.Fatalf("expected StreamDict, got %T", o) + } + + if err := loadEncodedStreamContent(ctx, c, &d, true); err != nil { + t.Fatal(err) + } + + if foo := d.HexLiteralEntry("Foo"); foo == nil { + t.Error("expected Foo entry") + } else if expected := hex.EncodeToString(data); foo.Value() != expected { + t.Errorf("Foo value mismatch, expected %d bytes, got %d", len(expected), len(foo.Value())) + } + + if bar := d.StringEntry("Bar"); bar == nil { + t.Error("expected Bar entry") + } else if expected := "Test"; *bar != expected { + t.Errorf("expected %s for Bar, got %s", expected, *bar) + } + + if baz := d.HexLiteralEntry("Baz"); baz == nil { + t.Error("expected Baz entry") + } else if expected := hex.EncodeToString(data); baz.Value() != expected { + t.Errorf("Foo value mismatch, expected %d bytes, got %d", len(expected), len(baz.Value())) + } + + if err := d.Decode(); err != nil { + t.Fatal(err) + } + + if expected := "Hello world!"; string(d.Content) != expected { + t.Errorf("expected stream content %s, got %s", expected, string(d.Content)) + } +} diff --git a/pkg/pdfcpu/resize.go b/pkg/pdfcpu/resize.go index c4dd83e6..201a1713 100644 --- a/pkg/pdfcpu/resize.go +++ b/pkg/pdfcpu/resize.go @@ -181,7 +181,7 @@ func resizePage(ctx *model.Context, pageNr int, res *model.Resize) error { var trans bytes.Buffer fmt.Fprintf(&trans, "q %.5f %.5f %.5f %.5f %.5f %.5f cm ", m[0][0], m[0][1], m[1][0], m[1][1], m[2][0], m[2][1]) - bb, err := ctx.PageContent(d) + bb, err := ctx.PageContent(d, pageNr) if err == model.ErrNoContent { return nil } diff --git a/pkg/pdfcpu/scan/scan.go b/pkg/pdfcpu/scan/scan.go index f7d9d2b7..1f1bf7f5 100644 --- a/pkg/pdfcpu/scan/scan.go +++ b/pkg/pdfcpu/scan/scan.go @@ -63,3 +63,40 @@ func Lines(data []byte, atEOF bool) (advance int, token []byte, err error) { // Request more data. return 0, nil, nil } + +func LinesSingleEOL(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + + indCR := bytes.IndexByte(data, '\r') + indLF := bytes.IndexByte(data, '\n') + + switch { + + case indCR >= 0 && indLF >= 0: + if indCR < indLF { + // \r + return indCR + 1, data[0:indCR], nil + } + // \n + return indLF + 1, data[0:indLF], nil + + case indCR >= 0: + // \r + return indCR + 1, data[0:indCR], nil + + case indLF >= 0: + // \n + return indLF + 1, data[0:indLF], nil + + } + + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return len(data), data, nil + } + + // Request more data. + return 0, nil, nil +} diff --git a/pkg/pdfcpu/sign.go b/pkg/pdfcpu/sign.go new file mode 100644 index 00000000..f60a4176 --- /dev/null +++ b/pkg/pdfcpu/sign.go @@ -0,0 +1,385 @@ +/* +Copyright 2025 The pdf Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pdfcpu + +import ( + "crypto/x509" + "fmt" + "io" + "sort" + "strings" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/sign" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" + "github.com/pkg/errors" +) + +// ValidateSignatures validates all digital signatures of ctx. +func ValidateSignatures(ra io.ReaderAt, ctx *model.Context, all bool) ([]*model.SignatureValidationResult, error) { + var results []*model.SignatureValidationResult + + if ctx.URSignature != nil { + svr, err := validateURSignature(ctx.URSignature, ctx, ra) + if err != nil { + return nil, err + } + results = append(results, svr) + } + + incrs := make([]int, 0, len(ctx.Signatures)) + for k := range ctx.Signatures { + incrs = append(incrs, k) + } + sort.Ints(incrs) + + first, ok := true, false + + // NOTE: Long term validation is restricted to processing the latest doc timestamp (contained in the last increment). + + // Process all increments chronologically in reverse order. + for i, inc := range incrs { + for _, sig := range ctx.Signatures[inc] { + + if i > 0 && sig.Type == model.SigTypeDTS { + continue + } + + svr, err := validateSignature(sig, ctx, ra, first, all) + if err != nil { + return nil, err + } + results = append(results, svr) + + if sig.Type == model.SigTypeDTS { + continue + } + + if all { + first = false + continue + } + + if checkForAbortAfterFirst(first, svr, ctx) { + ok = true + break + } + + first = false + } + if ok { + break + } + } + + return results, nil +} + +func checkForAbortAfterFirst(first bool, svr *model.SignatureValidationResult, ctx *model.Context) bool { + if first { + if ctx.CertifiedSigObjNr == 0 || (svr.Certified() && svr.Permissions() != model.CertifiedSigPermNoChangesAllowed) { + return true + } + } + return svr.Certified() +} + +func validateURSignature(sigDict types.Dict, ctx *model.Context, ra io.ReaderAt) (*model.SignatureValidationResult, error) { + sig := model.Signature{Type: model.SigTypeUR, Visible: false, Signed: true} + result := model.SignatureValidationResult{Signature: sig} + + result.Status = model.SignatureStatusUnknown + result.Reason = model.SignatureReasonUnknown + result.DocModified = model.Unknown + + result.Details = model.SignatureDetails{} + result.Details.SignerIdentity = "Unknown" + + if err := resultDetails(sigDict, ctx, &result.Details); err != nil { + return nil, err + } + + subFilter := sigDict.NameEntry("SubFilter") + if subFilter == nil { + result.AddProblem("missing sigDict \"SubFilter\"") + result.Reason = model.SignatureReasonInternal + return &result, nil + } + result.Details.SubFilter = *subFilter + + var f func( + ra io.ReaderAt, + sigDict types.Dict, + certified bool, + authoriative bool, + validateAll bool, + perms int, + rootCerts *x509.CertPool, + result *model.SignatureValidationResult, + ctx *model.Context) error + + switch *subFilter { + case "adbe.x509.rsa_sha1": // deprecated as of PDF 2.0 + f = sign.ValidateX509RSASHA1Signature + case "adbe.pkcs7.sha1": // deprecated as of PDF 2.0 + f = sign.ValidatePKCS7Signatures + case "adbe.pkcs7.detached": + f = sign.ValidatePKCS7Signatures + case "ETSI.CAdES.detached": + f = sign.ValidatePKCS7Signatures + //case "ETSI.RFC3161": + // TODO: Contents shall be the TimeStampToken as specified in Internet RFC 3161 as updated by Internet RFC 5816. + default: + result.AddProblem(fmt.Sprintf("unsupported subFilter: %s", *subFilter)) + return &result, nil + } + + return &result, f(ra, sigDict, false, false, true, 0, model.UserCertPool, &result, ctx) +} + +func validateSignature(sig model.Signature, ctx *model.Context, ra io.ReaderAt, first, all bool) (*model.SignatureValidationResult, error) { + sigField, err := ctx.DereferenceDict(*types.NewIndirectRef(sig.ObjNr, 0)) + if err != nil { + return nil, err + } + + result := model.SignatureValidationResult{Signature: sig} + + result.Status = model.SignatureStatusUnknown + result.Reason = model.SignatureReasonUnknown + result.DocModified = model.Unknown + + result.Details = model.SignatureDetails{} + result.Details.SignerIdentity = "Unknown" + + if sigField == nil { + result.AddProblem("missing signature field") + result.Reason = model.SignatureReasonInternal + return &result, nil + } + + if sl := sigField.StringLiteralEntry("T"); sl != nil { + s, err := types.StringLiteralToString(*sl) + if err != nil { + return nil, err + } + result.Details.FieldName = strings.TrimSpace(s) + } + + indRef := sigField.IndirectRefEntry("V") + if indRef == nil { + result.AddProblem("missing signature dict") + result.Reason = model.SignatureReasonInternal + return &result, nil + } + + sigDict, err := ctx.DereferenceDict(*indRef) + if err != nil { + result.AddProblem(fmt.Sprintf("%v", err)) + result.Reason = model.SignatureReasonInternal + return &result, nil + } + + subFilter := sigDict.NameEntry("SubFilter") + if subFilter == nil { + result.AddProblem("missing sigDict \"SubFilter\"") + result.Reason = model.SignatureReasonInternal + return &result, nil + } + result.Details.SubFilter = *subFilter + + result.Signature.Certified = indRef.ObjectNumber.Value() == ctx.CertifiedSigObjNr + if first && ctx.CertifiedSigObjNr == 0 { + result.Signature.Authoritative = true + } + + if err := resultDetails(sigDict, ctx, &result.Details); err != nil { + return nil, err + } + + perms, err := detectPermissions(sigDict, ctx) + if err != nil { + return nil, err + } + + f := sigHandler(*subFilter) + + if f == nil { + result.AddProblem(fmt.Sprintf("unsupported subFilter: %s", *subFilter)) + return &result, nil + } + + return &result, f(ra, sigDict, result.Signature.Certified, result.Signature.Authoritative, all, perms, model.UserCertPool, &result, ctx) +} + +func sigHandler(subFilter string) func( + ra io.ReaderAt, + sigDict types.Dict, + certified bool, + authoriative bool, + validateAll bool, + perms int, + rootCerts *x509.CertPool, + result *model.SignatureValidationResult, + ctx *model.Context) error { + + switch subFilter { + case "adbe.x509.rsa_sha1": // deprecated as of PDF 2.0 + return sign.ValidateX509RSASHA1Signature + case "adbe.pkcs7.sha1": // deprecated as of PDF 2.0 + return sign.ValidatePKCS7Signatures + case "adbe.pkcs7.detached": + return sign.ValidatePKCS7Signatures + case "ETSI.CAdES.detached": + return sign.ValidatePKCS7Signatures + case "ETSI.RFC3161": + return sign.ValidateDTS + } + + return nil +} + +func resultDetails(sigDict types.Dict, ctx *model.Context, resultDetails *model.SignatureDetails) error { + if sl := sigDict.StringLiteralEntry("Name"); sl != nil { + s, err := types.StringLiteralToString(*sl) + if err != nil { + return err + } + resultDetails.SignerName = strings.TrimSpace(s) + } + + if sl := sigDict.StringLiteralEntry("ContactInfo"); sl != nil { + s, err := types.StringLiteralToString(*sl) + if err != nil { + return err + } + resultDetails.ContactInfo = strings.TrimSpace(s) + } + + if sl := sigDict.StringLiteralEntry("Location"); sl != nil { + s, err := types.StringLiteralToString(*sl) + if err != nil { + return err + } + resultDetails.Location = strings.TrimSpace(s) + } + + if sl := sigDict.StringLiteralEntry("Reason"); sl != nil { + s, err := types.StringLiteralToString(*sl) + if err != nil { + return err + } + resultDetails.Reason = strings.TrimSpace(s) + } + + if o, ok := sigDict.Find("M"); ok { + // informational (cannot be relied upon for long term validation) + s, err := ctx.DereferenceStringOrHexLiteral(o, model.V10, nil) + if err != nil { + return err + } + if s != "" { + if t, ok := types.DateTime(s, ctx.XRefTable.ValidationMode == model.ValidationRelaxed); ok { + resultDetails.SigningTime = t + } + } + } + + return nil +} + +func detectPermissions(sigDict types.Dict, ctx *model.Context) (int, error) { + o, found := sigDict.Find("Reference") + if !found { + return 0, nil + } + + arr, err := ctx.DereferenceArray(o) + if err != nil || len(arr) == 0 { + return 0, err + } + + // Process signature reference dicts. + + // TODO Process UR3 Params + // + // + // + // + // + // + // + // + // >>> + // + // >> + // ]> + + for _, obj := range arr { + d, err := ctx.DereferenceDict(obj) + if err != nil { + return 0, err + } + if tm := d.NameEntry("TransformMethod"); tm == nil || *tm != "DocMDP" { + continue + } + d1 := d.DictEntry("TransformParams") + if len(d1) == 0 { + continue + } + typ := d1.Type() + if typ == nil || *typ != "TransformParams" { + continue + } + i := d1.IntEntry("P") + if i != nil { + if *i < 1 || *i > 3 { + return 0, errors.Errorf("invalid DocMDP permissions detected: %d ", *i) + } + return *i, nil + } + return 2, nil // default + } + + /* + array of signature reference dictionaries: + + + + + + Modification Detection and Prevention + + + constant + >>> + + >> + ]> + + parse the xref tables across all incremental updates. + Detect and classify new or modified objects added after the signed byte range. + */ + + return 0, nil +} diff --git a/pkg/pdfcpu/sign/dts.go b/pkg/pdfcpu/sign/dts.go new file mode 100644 index 00000000..60bacc37 --- /dev/null +++ b/pkg/pdfcpu/sign/dts.go @@ -0,0 +1,217 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sign + +import ( + "crypto/x509" + "encoding/asn1" + "fmt" + "io" + "time" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" + "github.com/hhrutter/pkcs7" + "github.com/pkg/errors" +) + +type AlgorithmIdentifier struct { + Algorithm asn1.ObjectIdentifier + Parameters asn1.RawValue `asn1:"tag:0,optional"` +} + +type TSTInfo struct { + Version int + Policy asn1.ObjectIdentifier + MessageImprint struct { + HashAlgorithm AlgorithmIdentifier + HashedMessage []byte + } + SerialNumber asn1.RawValue + GenTime time.Time + Accuracy asn1.RawValue `asn1:"optional"` + Ordering bool `asn1:"optional"` + Nonce asn1.RawValue `asn1:"optional"` + TSA asn1.RawValue `asn1:"optional"` + Extensions asn1.RawValue `asn1:"optional"` +} + +// ValidateDTS validates an ETSI.RFC3161 digital timestamp. +func ValidateDTS( + ra io.ReaderAt, + sigDict types.Dict, + certified bool, + authoritative bool, + validateAll bool, + perms int, + rootCerts *x509.CertPool, + result *model.SignatureValidationResult, + ctx *model.Context) error { + + // The last increment contains the DocTimeStamp only. + + // TODO if DocMDP ignore DTS. + + // Note: perms are disregarded for ETSI.RFC3161. + + if ctx.Configuration.Offline { + result.AddProblem("pdfcpu is offline, unable to perform certificate revocation checking") + } + + p7 := validateP7(sigDict, result) + if p7 == nil { + return nil + } + + signer := &model.Signer{} + result.Details.AddSigner(signer) + + certs := p7.Certificates + + var ( + dssCerts []*x509.Certificate + crls [][]byte + ocsps [][]byte + ok bool + ) + + if len(ctx.DSS) > 0 { + if dssCerts, crls, ocsps, ok = processDSS(ctx, signer); ok { + certs = mergeCerts(certs, dssCerts) + } + } + + if !p7.ContentType.Equal(oidTSTInfo) { + signer.AddProblem("\"ETSI.RFC3161\": missing timestamp info") + return nil + } + + var tstInfo TSTInfo + if _, err := asn1.Unmarshal(p7.Content, &tstInfo); err != nil { + signer.AddProblem("\"ETSI.RFC3161\": invalid timestamp info") + return nil + } + + // TODO Check + // ByteRange shall cover the entire document, including the Document Time-stamp dictionary + // but excluding the TimeStampToken itself (the entry with key Contents). + data, err := signedData(ra, sigDict) + if err != nil { + result.Reason = model.SignatureReasonInternal + result.AddProblem(fmt.Sprintf("\"ETSI.RFC3161\": unmarshal asn1 content: %v", err)) + return nil + } + + if ok := checkDTSDigest(&tstInfo, data, signer); !ok { + return nil + } + + if result.Status == model.SignatureStatusUnknown { + if result.DocModified == model.Unknown { + result.DocModified = model.False + } + } + + p7Signer := p7.Signers[0] + + signerCert := pkcs7.GetCertFromCertsByIssuerAndSerial(certs, p7Signer.IssuerAndSerialNumber) + if signerCert == nil { + signer.AddProblem("\"ETSI.RFC3161\": missing certificate for signer") + return nil + } + + if err := pkcs7.CheckSignature(signerCert, p7Signer, nil); err != nil { + signer.AddProblem(fmt.Sprintf("\"ETSI.RFC3161\": signature verification failure: %v", err)) + return nil + } + + signingTime := tstInfo.GenTime + signer.Timestamp = signingTime + signer.HasTimestamp = true + result.Details.SigningTime = signingTime + + // Ensure issueing TSA is trusted. + validateDTSCert(signingTime, signerCert, certs, rootCerts, crls, ocsps, signer, result, ctx) + + return nil +} + +func validateDTSCert(signingTime time.Time, + signerCert *x509.Certificate, + certs []*x509.Certificate, + rootCerts *x509.CertPool, + crls, ocsps [][]byte, + signer *model.Signer, + result *model.SignatureValidationResult, + ctx *model.Context) { + + if signingTime.After(signerCert.NotAfter) || signingTime.Before(signerCert.NotBefore) { + signer.AddProblem(fmt.Sprintf("\"ETSI.RFC3161\": signing time %q is outside of certificate validity %q to %q", + signingTime.Format(time.RFC3339), + signerCert.NotBefore.Format(time.RFC3339), + signerCert.NotAfter.Format(time.RFC3339))) + return + } + + // Does signerCert chain up to a trusted Root CA? + chains := buildP7CertChains(true, signerCert, certs, rootCerts, signer, &signingTime, result) + if len(chains) == 0 { + chains = [][]*x509.Certificate{certChain(signerCert, certs)} + } + + validateCertChains(chains, rootCerts, signer, &signingTime, crls, ocsps, result, ctx.Configuration) + + finalizeDTSResult(result, ctx, signingTime) +} + +func checkDTSDigest(tstInfo *TSTInfo, data []byte, signer *model.Signer) bool { + + oidHashAlg := tstInfo.MessageImprint.HashAlgorithm.Algorithm + digest := tstInfo.MessageImprint.HashedMessage + + if err := pkcs7.VerifyMessageDigestTSToken(oidHashAlg, digest, data); err != nil { + var mdErr *pkcs7.MessageDigestMismatchError + if errors.As(err, &mdErr) { + signer.AddProblem(fmt.Sprintf("\"ETSI.RFC3161\": message digest verification failure: %v", err)) + return false + } + signer.AddProblem(fmt.Sprintf("\"ETSI.RFC3161\": message digest verification: %v", err)) + return false + } + + return true +} + +func collectIntermediates(signerCert *x509.Certificate, certs []*x509.Certificate) []*x509.Certificate { + var intermediates []*x509.Certificate + for _, cert := range certs { + if !cert.Equal(signerCert) { + intermediates = append(intermediates, cert) + } + } + return intermediates +} + +func finalizeDTSResult(result *model.SignatureValidationResult, ctx *model.Context, signingTime time.Time) { + if result.Status == model.SignatureStatusUnknown && result.Reason == model.SignatureReasonUnknown { + result.Status = model.SignatureStatusValid + result.Reason = model.SignatureReasonDocNotModified + ctx.DTS = signingTime + } else { + ctx.DTS = time.Time{} + } +} diff --git a/pkg/pdfcpu/sign/oid.go b/pkg/pdfcpu/sign/oid.go new file mode 100644 index 00000000..c52c7a69 --- /dev/null +++ b/pkg/pdfcpu/sign/oid.go @@ -0,0 +1,55 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sign + +import "encoding/asn1" + +var ( + oidETSIQCPublicWithSSCD = asn1.ObjectIdentifier{0, 4, 0, 1456, 1, 1} // Qualified Certificate ETSI + oidSigPolicy = asn1.ObjectIdentifier{0, 4, 0, 2023, 1, 1} // ETSI Qualified Signature Policy (for EU Qualified Electronic Signatures) + oidQualSealPolicy = asn1.ObjectIdentifier{0, 4, 0, 2023, 1, 2} // ETSI Qualified Seal Policy (for legal entity seals) + oidAdvSigPolicy = asn1.ObjectIdentifier{0, 4, 0, 2023, 1, 3} // ETSI Advanced Signature Policy (for advanced e-signatures) + oidAdvSigLTVPolicy = asn1.ObjectIdentifier{0, 4, 0, 2023, 1, 4} // Advanced Signature with long term validation support + oidQESLTVPolicy = asn1.ObjectIdentifier{0, 4, 0, 2023, 1, 5} // QES with LTV (qualified + long-term archive) + oidQCESign = asn1.ObjectIdentifier{0, 4, 0, 194112, 1, 2} // Qualified Certificate for Electronic Signatures + oidQCESeal = asn1.ObjectIdentifier{0, 4, 0, 194112, 1, 3} // Qualified Certificate for Electronic Seals + oidQWebAuthCert = asn1.ObjectIdentifier{0, 4, 0, 194112, 1, 4} // Web Authentication Certificate + oidRSAESOAEP = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 1, 10} // RSAES-OAEP + oidData = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 7, 1} // PAdES-E-BES content-type, signed + oidMessageDigest = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 4} // PAdES-E-BES, signed + oidSigningTime = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 5} // PKSC#7, signed + oidTSTInfo = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 1, 4} // Time Stamp Token Information, ETSI.RFC3161 + oidSigningCertificate = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 12} // PAdES-E-BES, signed + oidTimestampToken = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 14} // PAdES-T, unsigned + oidSigPolicyID = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 15} // PAdES-EPES, signed + oidCommitmentType = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 16} // PAdES-EPES, signed + oidContentTimestamp = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 20} // PAdES-T, signed + oidCompleteCertificateRefs = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 21} // CAdES-C, unsigned + oidCompleteRevocationRefs = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 22} // CAdES-C, unsigned + oidCertificateValues = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 23} // CAdES-X, unsigned + oidRevocationValues = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 24} // CAdES-X, unsigned + oidArchiveTimestamp = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 27} // CAdES-A, unsigned + oidSigningCertificateV2 = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 47} // PAdES-E-BES, signed + oidProofOfOrigin = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 6, 1} // Signer claims authorship + oidProofOfReceipt = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 6, 2} // Signer acknowledges receipt + oidProofOfDelivery = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 6, 3} // Signer confirms delivery + oidProofOfSender = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 6, 4} // Signer confirms they sent the data + oidProofOfApproval = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 6, 5} // Signer approves content + oidProofOfCreation = asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 6, 6} // Signer created the content + oidRevocationInfoArchival = asn1.ObjectIdentifier{1, 2, 840, 113583, 1, 1, 8} // Embedded revocation data, signed + oidOCSPNoCheck = asn1.ObjectIdentifier{1, 3, 6, 1, 5, 5, 7, 48, 1, 5} // OSCP responder cert extension +) diff --git a/pkg/pdfcpu/sign/pkcs1.go b/pkg/pdfcpu/sign/pkcs1.go new file mode 100644 index 00000000..99e7999d --- /dev/null +++ b/pkg/pdfcpu/sign/pkcs1.go @@ -0,0 +1,230 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sign + +import ( + "crypto" + "crypto/rsa" + "crypto/sha1" + "crypto/x509" + "encoding/asn1" + "fmt" + "io" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" + "github.com/pkg/errors" +) + +// ValidateX509RSASHA1Signature validates signatures using subFilter adbe.x509.rsa_sha1. +func ValidateX509RSASHA1Signature( + ra io.ReaderAt, + sigDict types.Dict, + certified bool, + authoritative bool, + validateAll bool, + perms int, + rootCerts *x509.CertPool, + result *model.SignatureValidationResult, + ctx *model.Context) error { + + if ctx.Configuration.Offline { + result.AddProblem("pdfcpu is offline, unable to perform certificate revocation checking") + } + + signer := &model.Signer{} + result.Details.AddSigner(signer) + + signer.Certified = certified + signer.Authoritative = signer.Certified || authoritative + signer.Permissions = perms + + if signer.Certified && signer.Permissions != model.CertifiedSigPermNoChangesAllowed { + // TODO Check for violation of perm 2 and 3 + result.AddProblem(CertifiedSigPermsNotSupported) + result.Reason = model.SignatureReasonInternal + } + + p1Certs, err := parseP1Certificates(sigDict) + if err != nil { + result.Reason = model.SignatureReasonCertNotTrusted + result.AddProblem(fmt.Sprintf("cannot verify certificate %v", err)) + result.AddProblem("skipped certificate revocation check") + return nil + } + + cert := p1Certs[0] + + rsaPubKey := cert.PublicKey.(*rsa.PublicKey) + reason, err := verifyRSASHA1Signature(ra, sigDict, rsaPubKey) + if err != nil { + if reason == model.SignatureReasonDocModified { + // Signature is invalid and document has been modified. + result.Status = model.SignatureStatusInvalid + result.Reason = model.SignatureReasonSignatureForged + result.DocModified = model.True + } + if reason == model.SignatureReasonInternal { + result.Status = model.SignatureStatusInvalid + result.Reason = model.SignatureReasonInternal + } + result.AddProblem(fmt.Sprintf("%v", err)) + return nil + } + + if result.Reason == model.SignatureReasonDocNotModified { + result.DocModified = model.False + } + + // Signature is authenticated and the signer is who they claim to be. + // Document has not been modified since time of signing. + + // Does cert chain up to a trusted Root CA? + chains := buildP1CertChains(cert, rootCerts, signer, result) + + if len(chains) == 0 { + chains = [][]*x509.Certificate{certChain(cert, p1Certs)} + } + + validateCertChains(chains, rootCerts, signer, nil, nil, nil, result, ctx.Configuration) + + if result.Status == model.SignatureStatusUnknown && result.Reason == model.SignatureReasonUnknown { + result.Status = model.SignatureStatusValid + result.Reason = model.SignatureReasonDocNotModified + } + + return nil +} + +func parseP1Certificates(sigDict types.Dict) ([]*x509.Certificate, error) { + obj, ok := sigDict.Find("Cert") + if !ok { + // TODO Find certificate by other means. + return nil, errors.New("pdfcpu: missing \"Cert\"") + } + + var chain []*x509.Certificate + + switch obj := obj.(type) { + case types.Array: + for _, v := range obj { + cert, err := certFromObj(v) + if err != nil { + return nil, err + } + chain = append(chain, cert) + } + + case types.StringLiteral: + cert, err := certFromStringLiteral(obj) + if err != nil { + return nil, err + } + chain = append(chain, cert) + + case types.HexLiteral: + cert, err := certFromHexLiteral(obj) + if err != nil { + return nil, err + } + chain = append(chain, cert) + + default: + return nil, errors.New("pdfcpu: invalid entry: \"Cert\"") + } + + return chain, nil +} + +func certFromObj(obj types.Object) (*x509.Certificate, error) { + switch obj := obj.(type) { + case types.StringLiteral: + return certFromStringLiteral(obj) + case types.HexLiteral: + return certFromHexLiteral(obj) + } + return nil, errors.Errorf("unable to parse certificate for %T", obj) +} + +func certFromStringLiteral(obj types.StringLiteral) (*x509.Certificate, error) { + bb, err := types.Unescape(obj.Value()) + if err != nil { + return nil, err + } + return x509.ParseCertificate(bb) +} + +func certFromHexLiteral(obj types.HexLiteral) (*x509.Certificate, error) { + bb, err := obj.Bytes() + if err != nil { + return nil, err + } + return x509.ParseCertificate(bb) +} + +func verifyRSASHA1Signature(ra io.ReaderAt, sigDict types.Dict, rsaPubKey *rsa.PublicKey) (model.SignatureReason, error) { + // Use public key from the signer's certificate to verify the RSA signature. + // The signature itself is an RSA-encrypted SHA-1 hash of the signed data. + hl := sigDict.HexLiteralEntry("Contents") + if hl == nil { + return model.SignatureReasonInternal, errors.New("invalid signature dict - missing \"Contents\"") + } + + contents, err := hl.Bytes() + if err != nil { + return model.SignatureReasonInternal, errors.Errorf("invalid content data: %v", err) + } + + var bb []byte + if _, err = asn1.Unmarshal(contents, &bb); err != nil { + return model.SignatureReasonInternal, errors.Errorf("unmarshal asn1 content: %v", err) + } + + data, err := signedData(ra, sigDict) + if err != nil { + return model.SignatureReasonInternal, errors.Errorf("unmarshal asn1 content: %v", err) + } + + // Combine hash calculation and signature verification. + + // Hash signed data (extracted using ByteRange) using SHA-1, 160 Bits = 20 bytes + hashed := sha1.Sum(data) + + // Confirm that the signature was created using the private key corresponding to the public key from the certificate. + if err := rsa.VerifyPKCS1v15(rsaPubKey, crypto.SHA1, hashed[:], bb); err != nil { + return model.SignatureReasonDocModified, errors.Errorf("RSA PKCS#1v15 signature verification failure: %v\n", err) + } + + return model.SignatureReasonDocNotModified, nil +} + +func buildP1CertChains( + cert *x509.Certificate, + rootCerts *x509.CertPool, + signer *model.Signer, + result *model.SignatureValidationResult) [][]*x509.Certificate { + + chains, err := cert.Verify(x509.VerifyOptions{Roots: rootCerts}) + if err != nil { + handleCertVerifyErr(err, cert, signer, result) + return nil + } + + result.Details.SignerIdentity = cert.Subject.CommonName + + return chains +} diff --git a/pkg/pdfcpu/sign/pkcs7.go b/pkg/pdfcpu/sign/pkcs7.go new file mode 100644 index 00000000..837f9d6b --- /dev/null +++ b/pkg/pdfcpu/sign/pkcs7.go @@ -0,0 +1,493 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sign + +import ( + "crypto/x509" + "encoding/asn1" + "fmt" + "io" + "time" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" + "github.com/hhrutter/pkcs7" + "github.com/pkg/errors" +) + +// ValidatePKCS7Signature validates contained signatures using subFilter adbe.pkcs7.sha1, adbe.pkcs7.detached and ETSI.CAdES.detached. +func ValidatePKCS7Signatures( + ra io.ReaderAt, + sigDict types.Dict, + certified bool, + authoritative bool, + validateAll bool, + perms int, + rootCerts *x509.CertPool, + result *model.SignatureValidationResult, + ctx *model.Context) error { + + if ctx.Configuration.Offline { + result.AddProblem("pdfcpu is offline, unable to perform certificate revocation checking") + } + + p7 := validateP7(sigDict, result) + if p7 == nil { + return nil + } + + data, err := signedData(ra, sigDict) + if err != nil { + result.Reason = model.SignatureReasonInternal + result.AddProblem(fmt.Sprintf("unmarshal asn1 content: %v", err)) + return nil + } + + detached := len(p7.Content) == 0 + if detached { + p7.Content = data + } + + for i, p7Signer := range p7.Signers { + verifyP7Signer(p7Signer, p7.Certificates, rootCerts, p7.Content, data, detached, certified, authoritative, perms, i, result, ctx) + if (certified || authoritative) && !validateAll { + break + } + } + + finalizePKCS7Result(result) + + return nil +} + +func finalizePKCS7Result(result *model.SignatureValidationResult) { + if result.Status == model.SignatureStatusUnknown && result.Reason == model.SignatureReasonUnknown { + result.Status = model.SignatureStatusValid + result.Reason = model.SignatureReasonDocNotModified + } else { + // Show PAdES basic level for valid signatures only. + if len(result.Details.Signers) > 0 { + result.Details.Signers[0].PAdES = "" + } + } +} + +func p7(sigDict types.Dict) (*pkcs7.PKCS7, error) { + hl := sigDict.HexLiteralEntry("Contents") + if hl == nil { + return nil, errors.New("invalid signature dict - missing \"Contents\"") + } + + signature, err := hl.Bytes() + if err != nil { + return nil, errors.Errorf("invalid content data: %v", err) + } + + p7, err := pkcs7.Parse(signature) + if err != nil { + return nil, errors.Errorf("failed to parse PKCS#7: %v", err) + } + + return p7, nil +} + +func verifyP7Signer( + p7Signer pkcs7.SignerInfo, + p7Certs []*x509.Certificate, + rootCerts *x509.CertPool, + p7Content []byte, + data []byte, + detached bool, + certified bool, + authoritative bool, + perms, i int, + result *model.SignatureValidationResult, + ctx *model.Context) { + + conf := ctx.Configuration + + signer := &model.Signer{} + result.Details.AddSigner(signer) + + signer.Certified = certified + signer.Authoritative = signer.Certified || authoritative + signer.Permissions = perms + + checkPerms(signer, result) + + if ok := checkP7Digest(p7Signer, p7Content, data, detached, signer, result); !ok { + return + } + + if result.Status == model.SignatureStatusUnknown { + if result.DocModified == model.Unknown { + result.DocModified = model.False + } + } + + signerCert := pkcs7.GetCertFromCertsByIssuerAndSerial(p7Certs, p7Signer.IssuerAndSerialNumber) + if signerCert == nil { + result.Reason = model.SignatureReasonInternal + signer.AddProblem(fmt.Sprintf("pkcs7: missing certificate for signer %d", i+1)) + return + } + + if err := verifyP7Signature(p7Signer, signerCert, p7Content, detached); err != nil { + if result.Status == model.SignatureStatusUnknown { + result.Status = model.SignatureStatusInvalid + result.Reason = model.SignatureReasonSignatureForged + } + signer.AddProblem(fmt.Sprintf("pkcs7: signature verification failure: %v\n", err)) + return + } + + // Signature is authenticated and the signer is who they claim to be. + + if detached { + signer.PAdES = "B-B" + } + + // Process optional DSS and DTS for embedded revocation info and trusted timestamp. + // This may upgrade PAdES level to B-T, B-LT, B-LTA respectively. + + // Calculate the signingTime we use for validation. + // Use either a present timestamp token or document timestamp. + // Fallback to claimed signingTime and in absence to time.Now(). + + // TODO Handle oidArchiveTimestamp + + var signingTime *time.Time + + signingTime = handleClaimedSigningTime(p7Signer, signer, result) + + if !ctx.DTS.IsZero() { + if result.Details.SigningTime.After(ctx.DTS) { + signer.AddProblem(fmt.Sprintf("Claimed signing time: %s is not before document timestamp: %s", + result.Details.SigningTime.Format(conf.TimestampFormat), + ctx.DTS.Format(conf.TimestampFormat))) + } + } + + if ts := checkTimestampToken(detached, p7Signer, rootCerts, ctx, signer, result); ts != nil { + signingTime = ts + } + + // Look for embedded revocation info. + crls, ocsps := handleArchivedRevocationInfo(p7Signer, signer) + + certs := p7Certs + + handleDSS(&certs, &crls, &ocsps, ctx, signer, detached) + + // Does signerCert chain up to a trusted Root CA? + chains := buildP7CertChains(i == 0, signerCert, certs, rootCerts, signer, signingTime, result) + if len(chains) == 0 { + chains = [][]*x509.Certificate{certChain(signerCert, certs)} + } + + validateCertChains(chains, rootCerts, signer, signingTime, crls, ocsps, result, ctx.Configuration) +} + +func checkPerms(signer *model.Signer, result *model.SignatureValidationResult) { + if signer.Certified && signer.Permissions != model.CertifiedSigPermNoChangesAllowed { + // TODO Check for violation of perm 2 and 3 + signer.AddProblem(CertifiedSigPermsNotSupported) + result.Reason = model.SignatureReasonInternal + } +} + +func checkP7Digest( + p7Signer pkcs7.SignerInfo, + p7Content, + data []byte, detached bool, + signer *model.Signer, + result *model.SignatureValidationResult) bool { + + reason, err := verifyP7Digest(p7Signer, p7Content, data, detached) + if err == nil { + return true + } + + if result.Status == model.SignatureStatusUnknown { + if reason == model.SignatureReasonDocModified { + // Document has been modified since time of signing. + result.Status = model.SignatureStatusInvalid + result.Reason = model.SignatureReasonDocModified + result.DocModified = model.True + } + if reason == model.SignatureReasonInternal { + //result.Status = model.SignatureStatusInvalid + result.Reason = model.SignatureReasonInternal + } + } + + signer.AddProblem(fmt.Sprintf("%v", err)) + return false +} + +func verifyP7Digest(p7Signer pkcs7.SignerInfo, p7Content []byte, data []byte, detached bool) (model.SignatureReason, error) { + // Verify Message Digest + // Calculate fingerprint and compare with p7.Digest (content hash comparison). + // Ensures integrity of the document content itself and ensures that the document has not been tampered with since it was signed. + + if detached { + + if len(p7Signer.AuthenticatedAttributes) == 0 { + return model.SignatureReasonInternal, errors.New("pkcs7: missing authenticated attributes") + } + + if err := pkcs7.VerifyMessageDigestDetached(p7Signer, p7Content); err != nil { + var mdErr *pkcs7.MessageDigestMismatchError + if errors.As(err, &mdErr) { + return model.SignatureReasonDocModified, errors.Errorf("pkcs7: message digest verification failure: %v\n", err) + } + return model.SignatureReasonInternal, errors.Errorf("pkcs7: message digest verification: %v\n", err) + } + + } else { + + if err := pkcs7.VerifyMessageDigestEmbedded(p7Content, data); err != nil { + return model.SignatureReasonDocModified, errors.Errorf("pkcs7: message digest verification failure: %v\n", err) + } + + } + + return model.SignatureReasonDocNotModified, nil +} + +func checkTimestampToken( + detached bool, + p7Signer pkcs7.SignerInfo, + rootCerts *x509.CertPool, + ctx *model.Context, + signer *model.Signer, + result *model.SignatureValidationResult) (signingTime *time.Time) { + + token := handleTimestampToken(p7Signer, rootCerts, signer, result) + + if token != nil { + signingTime = token + signer.HasTimestamp = true + signer.Timestamp = *token + if detached { + signer.PAdES = "B-T" + } + } else if !ctx.DTS.IsZero() { + signingTime = &ctx.DTS + signer.HasTimestamp = true + signer.Timestamp = ctx.DTS + if detached { + signer.PAdES = "B-T" + } + } + + return signingTime +} + +func handleDSS(certs *[]*x509.Certificate, crls *[][]byte, ocsps *[][]byte, ctx *model.Context, signer *model.Signer, detached bool) { + if len(ctx.DSS) > 0 { + if dssCerts, dssCRLs, dssOCSPs, ok := processDSS(ctx, signer); ok { + *certs = mergeCerts(*certs, dssCerts) + if len(dssCRLs) > 0 { + *crls = dssCRLs + } + if len(dssOCSPs) > 0 { + *ocsps = dssOCSPs + } + if detached && signer.PAdES == "B-T" { + signer.PAdES = "B-LT" + } + signer.LTVEnabled = true + } + } + + if signer.PAdES == "B-LT" && !ctx.DTS.IsZero() { + signer.PAdES = "B-LTA" + } +} + +func verifyP7Signature(p7Signer pkcs7.SignerInfo, cert *x509.Certificate, p7Content []byte, detached bool) error { + // Verify signature against expected hash using the public key. + // Ensures integrity and authenticity of the signature itself. + // Confirms the signer is who they claim to be. + + var content []byte + if !detached { + content = p7Content + } + return pkcs7.CheckSignature(cert, p7Signer, content) +} + +func handleClaimedSigningTime(signerInfo pkcs7.SignerInfo, signer *model.Signer, result *model.SignatureValidationResult) *time.Time { + var ( + err error + signingTime time.Time + ) + + for _, attr := range signerInfo.AuthenticatedAttributes { + if attr.Type.Equal(oidSigningTime) { + _, err = asn1.Unmarshal(attr.Value.Bytes, &signingTime) + break + } + } + + if err != nil { + signer.AddProblem(fmt.Sprintf("invalid signing time: %v", err)) + if result.Status == model.SignatureStatusUnknown { + result.Reason = model.SignatureReasonSigningTimeInvalid + } + return nil + } + + if !signingTime.IsZero() { + result.Details.SigningTime = signingTime + return &signingTime + } + + return nil +} + +func timestampToken(p7Signer pkcs7.SignerInfo, rootCerts *x509.CertPool) (time.Time, error) { + // A trusted timestamp token aka trusted signing time. + if bb := locateTimestampToken(p7Signer); len(bb) > 0 { + return validateTimestampToken(bb, rootCerts) + } + return time.Time{}, nil +} + +func locateTimestampToken(signerInfo pkcs7.SignerInfo) []byte { + for _, attr := range signerInfo.UnauthenticatedAttributes { + if attr.Type.Equal(oidTimestampToken) { + return attr.Value.Bytes + } + } + return nil +} + +func validateTimestampToken(data []byte, rootCAs *x509.CertPool) (time.Time, error) { + var defTime time.Time + p7, err := pkcs7.Parse(data) + if err != nil { + return defTime, errors.Errorf("failed to parse timestamp token: %v", err) + } + + if len(p7.Signers) != 1 { + return defTime, errors.Errorf("malformed timestamp token") + } + signer := p7.Signers[0] + + // if err := p7.VerifyWithChain(rootCAs); err != nil { + // return defTime, errors.Errorf("timestamp token signature verification failed: %v", err) + // } + + for _, attr := range signer.AuthenticatedAttributes { + if attr.Type.Equal(oidSigningTime) { + var rawValue asn1.RawValue + if _, err := asn1.Unmarshal(attr.Value.Bytes, &rawValue); err != nil { + return defTime, errors.Errorf("failed to unmarshal signing time: %v", err) + } + if rawValue.Tag == asn1.TagUTCTime { + return time.Parse("060102150405Z", string(rawValue.Bytes)) + } + if rawValue.Tag == asn1.TagGeneralizedTime { + return time.Parse("20060102150405Z", string(rawValue.Bytes)) + } + return defTime, errors.Errorf("unexpected tag for signing time: %d", rawValue.Tag) + } + } + + return defTime, errors.New("unable to resolve timestamp info") +} + +func handleArchivedRevocationInfo(p7Signer pkcs7.SignerInfo, signer *model.Signer) (crls [][]byte, ocsps [][]byte) { + if !signer.HasTimestamp { + return nil, nil + } + ria, err := revocationInfoArchival(p7Signer) + if err != nil { + signer.LTVEnabled = true + signer.AddProblem(fmt.Sprintf("revocationInfoArchival extraction failed: %v", err)) + } + if ria == nil { + return nil, nil + } + + signer.LTVEnabled = true + + for _, raw := range ria.CRLs { + crls = append(crls, raw.FullBytes) + } + + for _, raw := range ria.OCSPs { + ocsps = append(ocsps, raw.FullBytes) + } + + return +} + +func buildP7CertChains( + first bool, + cert *x509.Certificate, + certs []*x509.Certificate, + rootCerts *x509.CertPool, + signer *model.Signer, + signingTime *time.Time, + result *model.SignatureValidationResult) [][]*x509.Certificate { + + currentTime := time.Now() + if signingTime != nil { + currentTime = *signingTime + } + + intermediates := collectIntermediates(cert, certs) + chains, err := pkcs7.VerifyCertChain(cert, intermediates, rootCerts, currentTime) + if err != nil { + handleCertVerifyErr(err, cert, signer, result) + return nil + } + if first { + result.Details.SignerIdentity = cert.Subject.CommonName + } + return chains +} + +func handleTimestampToken(p7Signer pkcs7.SignerInfo, rootCerts *x509.CertPool, signer *model.Signer, result *model.SignatureValidationResult) *time.Time { + ts, err := timestampToken(p7Signer, rootCerts) + if err != nil { + signer.HasTimestamp = true + signer.AddProblem(fmt.Sprintf("invalid TimestampToken: %v", err)) + if result.Status == model.SignatureStatusUnknown { + result.Reason = model.SignatureReasonTimestampTokenInvalid + } + } else if !ts.IsZero() { + signer.HasTimestamp = true + signer.Timestamp = ts + return &ts + } + return nil +} + +func revocationInfoArchival(p7Signer pkcs7.SignerInfo) (*RevocationInfoArchival, error) { + for _, attr := range p7Signer.AuthenticatedAttributes { + if attr.Type.Equal(oidRevocationInfoArchival) { + var ria RevocationInfoArchival + _, err := asn1.Unmarshal(attr.Value.Bytes, &ria) + return &ria, err + } + } + return nil, nil +} diff --git a/pkg/pdfcpu/sign/revocate.go b/pkg/pdfcpu/sign/revocate.go new file mode 100644 index 00000000..aef2386f --- /dev/null +++ b/pkg/pdfcpu/sign/revocate.go @@ -0,0 +1,510 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sign + +import ( + "bytes" + "crypto/x509" + "encoding/asn1" + "fmt" + "io" + "net/http" + "slices" + "time" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/pkg/errors" + "golang.org/x/crypto/ocsp" +) + +const ( + crlReasonUnspecified = iota + crlReasonKeyCompromise + crlReasonCACompromise + crlReasonAffiliationChanged + crlReasonSuperseded + crlReasonCessationOfOperation + crlReasonCertificateHold + _ // unused + crlReasonRemoveFromCRL + crlReasonPrivilegeWithdrawn + crlReasonAACompromise +) + +type OtherRevInfo struct { + Type asn1.ObjectIdentifier + Value []byte +} + +type RevocationInfoArchival struct { + CRLs []asn1.RawValue `asn1:"optional,explicit,tag:0"` // [0] EXPLICIT SEQUENCE of CRLs, OPTIONAL RFC 5280 + OCSPs []asn1.RawValue `asn1:"optional,explicit,tag:1"` // [1] EXPLICIT SEQUENCE of OCSPResponse, OPTIONAL RFC 6960 + OtherRevInfo []OtherRevInfo `asn1:"optional,explicit,tag:2"` // [2] EXPLICIT SEQUENCE of OtherRevInfo, OPTIONAL +} + +func checkRevocation( + cert, issuer *x509.Certificate, + rootCerts *x509.CertPool, + signer *model.Signer, + certDetails *model.CertificateDetails, + signingTime *time.Time, + crls [][]byte, + ocsps [][]byte, + result *model.SignatureValidationResult, + conf *model.Configuration) { + + revocationDetails, err := checkCertificateRevocation(cert, issuer, rootCerts, signer, signingTime, crls, ocsps, conf) + if err != nil { + signer.AddProblem(fmt.Sprintf("certificate revocation check failed: %v", err)) + certDetails.Revocation.Reason = fmt.Sprintf("%v", err) + if result.Reason == model.SignatureReasonUnknown { + result.Reason = model.SignatureReasonCertNotTrusted + } + return + } + + certDetails.Revocation = *revocationDetails + + // The certificate is revoked and considered invalid. + if certDetails.Revocation.Status == model.False { + if result.Reason == model.SignatureReasonUnknown { + result.Reason = model.SignatureReasonCertRevoked + } + return + } + + // The certificate revocation status is unknown. + if certDetails.Revocation.Status == model.Unknown { + if result.Reason == model.SignatureReasonUnknown { + result.Reason = model.SignatureReasonCertNotTrusted + } + } +} + +func checkCertificateRevocation( + cert, issuer *x509.Certificate, + rootCerts *x509.CertPool, + signer *model.Signer, + signingTime *time.Time, + crls [][]byte, + ocsps [][]byte, + conf *model.Configuration) (*model.RevocationDetails, error) { + + // Hybrid Approach - configure your preferredCertRevocationChecker in config.yml + + var f1, f2 func( + cert, issuer *x509.Certificate, + rootCerts *x509.CertPool, + signingTime *time.Time, + bbb [][]byte, // crls or ocsps + conf *model.Configuration) (*model.RevocationDetails, error) + + pcrc := conf.PreferredCertRevocationChecker + + if len(crls) > 0 && len(ocsps) == 0 { + pcrc = model.CRL + } + if len(crls) == 0 && len(ocsps) > 0 { + pcrc = model.OCSP + } + + f1, f2 = checkCertAgainstCRL, checkCertViaOCSP + f1bbb, f2bbb := crls, ocsps + if pcrc == model.OCSP { + f1, f2 = f2, f1 + f1bbb, f2bbb = f2bbb, f1bbb + } + + revocationDetails, err := f1(cert, issuer, rootCerts, signingTime, f1bbb, conf) + if err == nil { + return revocationDetails, nil + } + + s := "CRL" + if pcrc == model.OCSP { + s = "OCSP" + } + signer.AddProblem(fmt.Sprintf("%s certificate revocation check failed: %v", s, err)) + + // Fall back revocation checker. + return f2(cert, issuer, rootCerts, signingTime, f2bbb, conf) +} + +func checkCertAgainstCRL( + cert, issuer *x509.Certificate, + rootCerts *x509.CertPool, + signingTime *time.Time, + crls [][]byte, + conf *model.Configuration) (*model.RevocationDetails, error) { + + if signingTime != nil && len(crls) > 0 { + // Assumption: signingTime in the past + rd, err := processArchivedCRLs(cert, *signingTime, crls) + if err != nil || rd != nil { + return rd, err + } + } + + if conf.Offline { + return nil, errors.New("offline: unable to check CRLs") + } + + if len(cert.CRLDistributionPoints) == 0 { + return nil, errors.New("no CRL distribution points found") + } + + return processCurrentCRLs(cert, conf) +} + +func processArchivedCRLs(cert *x509.Certificate, signingTime time.Time, crls [][]byte) (*model.RevocationDetails, error) { + + const ( + reasonUnspecified = 0 + reasonKeyCompromise = 1 + reasonCACompromise = 2 + ) + + ok := false + for _, bb := range crls { + crl, err := x509.ParseRevocationList(bb) + if err != nil { + return nil, errors.Errorf("failed to process archived CRL: %v", err) + } + + if crl.NextUpdate.IsZero() || crl.ThisUpdate.After(signingTime) || crl.NextUpdate.Before(signingTime) { + continue + } + + ok = true + + for _, revoked := range crl.RevokedCertificateEntries { + if revoked.SerialNumber.Cmp(cert.SerialNumber) != 0 { + continue + } + + switch revoked.ReasonCode { + case reasonUnspecified, reasonKeyCompromise, reasonCACompromise: + if !revoked.RevocationTime.After(signingTime) { + return &model.RevocationDetails{ + Status: model.False, + Reason: fmt.Sprintf("CRL: revoked due to %v at %v (before or at signing time)", revoked.ReasonCode, revoked.RevocationTime), + }, nil + } + + default: + if revoked.RevocationTime.Before(signingTime) { + return &model.RevocationDetails{ + Status: model.False, + Reason: fmt.Sprintf("CRL: revoked due to %v at %v (before signing time)", revoked.ReasonCode, revoked.RevocationTime), + }, nil + } + return &model.RevocationDetails{Status: model.True, Reason: "revoked after signing time, not relevant for timestamp"}, nil + } + } + } + if ok { + return &model.RevocationDetails{Status: model.True, Reason: "not revoked (CRL check ok)"}, nil + } + return nil, nil +} + +func processCurrentCRLs(cert *x509.Certificate, conf *model.Configuration) (*model.RevocationDetails, error) { + client := &http.Client{ + Timeout: time.Duration(conf.TimeoutCRL) * time.Second, + } + + now := time.Now() + + for _, url := range cert.CRLDistributionPoints { + + resp, err := client.Get(url) + if err != nil { + return nil, errors.Errorf("failed to fetch CRL from %s: %v", url, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, errors.Errorf("CRL responder at: %s returned http status: %d", url, resp.StatusCode) + } + + crlData, err := io.ReadAll(resp.Body) + if err != nil { + return nil, errors.Errorf("CRL: read error: %v", err) + } + + crl, err := x509.ParseRevocationList(crlData) + if err != nil { + return nil, errors.Errorf("CRL: parse error: %v", err) + } + + if now.Before(crl.ThisUpdate) || now.After(crl.NextUpdate) { + continue + } + + for _, revoked := range crl.RevokedCertificateEntries { + if revoked.SerialNumber.Cmp(cert.SerialNumber) == 0 { + return &model.RevocationDetails{Status: model.False, Reason: "revoked (CRL check not ok)"}, nil + } + } + } + + return &model.RevocationDetails{Status: model.True, Reason: "not revoked (CRL check ok)"}, nil +} + +func checkCertViaOCSP( + cert, issuer *x509.Certificate, + rootCerts *x509.CertPool, + signingTime *time.Time, + ocsps [][]byte, + conf *model.Configuration) (*model.RevocationDetails, error) { + + if conf.Offline { + return nil, errors.New("offline: unable to contact OSCP responder") // / unable to verify OSCP certificate") + } + + client := &http.Client{ + Timeout: time.Duration(conf.TimeoutOCSP) * time.Second, + } + + if issuer == nil { + c, err := getIssuerCertificate(cert, rootCerts, client) + if err != nil { + return nil, errors.Errorf("OCSP: failed to load certificate issuer: %v", err) + } + issuer = c + } + + if signingTime != nil && len(ocsps) > 0 { + // Assumption: signingTime in the past + rd, err := processArchivedOCSPResponses(cert, issuer, rootCerts, *signingTime, ocsps, client) + if err != nil || rd != nil { + return rd, err + } + } + + if len(cert.OCSPServer) == 0 { + return nil, errors.New("no OCSP responder found in certificate") + } + + return processCurrentOCSPResponses(cert, issuer, rootCerts, client) +} + +func processArchivedOCSPResponses( + cert, issuer *x509.Certificate, + rootCerts *x509.CertPool, + signingTime time.Time, + ocsps [][]byte, + client *http.Client) (*model.RevocationDetails, error) { + + var lastErr error + + for _, bb := range ocsps { + resp, err := ocsp.ParseResponseForCert(bb, cert, issuer) + if err != nil { + lastErr = err + continue + } + + if err := checkArchivedOCSPResponse(resp, signingTime); err != nil { + return nil, err + } + + if err := checkResponderCert(resp, rootCerts); err != nil { + return nil, err + } + + switch resp.Status { + case ocsp.Good: + return &model.RevocationDetails{Status: model.True, Reason: "not revoked (OCSP responder says \"Good\")"}, nil + case ocsp.Revoked: + return &model.RevocationDetails{Status: model.False, Reason: "revoked (OCSP responder says \"Revoked\")"}, nil + case ocsp.Unknown: + return &model.RevocationDetails{Status: model.Unknown, Reason: "OCSP responder returned \"Unknown\""}, nil + } + } + + if lastErr != nil { + return nil, errors.Errorf("no valid OCSP response found, last parse error: %v", lastErr) + } + return nil, errors.New("no valid OCSP response found") +} + +func checkArchivedOCSPResponse(resp *ocsp.Response, signingTime time.Time) error { + const skew = 5 * time.Minute + + // ProducedAt should not be before this update. + if !resp.ProducedAt.IsZero() && resp.ProducedAt.Before(resp.ThisUpdate) { + // TODO Warning instead of error + return errors.New("OCSP: response ProducedAt is before ThisUpdate") + } + + // ProducedAt should not be after signing time (with tolerance). + if !resp.ProducedAt.IsZero() && resp.ProducedAt.After(signingTime.Add(skew)) { + return errors.New("OCSP: response is suspicious") + } + + // NextUpdate should not be before signing time (expired). + if !resp.NextUpdate.IsZero() && resp.NextUpdate.Before(signingTime) { + return errors.New("OCSP: response is expired") + } + + // ThisUpdate should not be after signing time (with tolerance). + if resp.ThisUpdate.After(signingTime.Add(skew)) { + return errors.New("OCSP: ThisUpdate is after signing time") + } + + return nil +} + +func processCurrentOCSPResponses( + cert, issuer *x509.Certificate, + rootCerts *x509.CertPool, + client *http.Client) (*model.RevocationDetails, error) { + + ocspRequest, err := ocsp.CreateRequest(cert, issuer, nil) + if err != nil { + return nil, errors.Errorf("OCSP: failed to create request: %v", err) + } + + ocspURL := cert.OCSPServer[0] + + resp, err := client.Post(ocspURL, "application/ocsp-request", io.NopCloser(bytes.NewReader(ocspRequest))) + if err != nil { + return nil, errors.Errorf("OCSP: failed to send request to %s: %v", ocspURL, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, errors.Errorf("OCSP responder at: %s returned http status: %d", ocspURL, resp.StatusCode) + } + + ocspResponseData, err := io.ReadAll(resp.Body) + if err != nil { + return nil, errors.Errorf("OCSP: failed to read response: %v", err) + } + + ocspResponse, err := ocsp.ParseResponse(ocspResponseData, nil) + if err != nil { + return nil, errors.Errorf("OCSP: failed to parse response: %v", err) + } + + if err := checkCurrentOCSPResponse(ocspResponse); err != nil { + return nil, err + } + + if err := checkResponderCert(ocspResponse, rootCerts); err != nil { + return nil, err + } + + switch ocspResponse.Status { + case ocsp.Good: + return &model.RevocationDetails{Status: model.True, Reason: "not revoked (OCSP responder says \"Good\")"}, nil + case ocsp.Revoked: + return &model.RevocationDetails{Status: model.False, Reason: "revoked (OCSP responder says \"Revoked\")"}, nil + case ocsp.Unknown: + return &model.RevocationDetails{Status: model.Unknown, Reason: "OCSP responder says \"Unknown\""}, nil + } + + return nil, errors.New("unexpected OCSP response") +} + +func checkCurrentOCSPResponse(resp *ocsp.Response) error { + const skew = 5 * time.Minute + now := time.Now() + + // ProducedAt should not be before this update. + if !resp.ProducedAt.IsZero() && resp.ProducedAt.Before(resp.ThisUpdate) { + // TODO Warning instead of error + return errors.New("OCSP: response ProducedAt is before ThisUpdate") + } + + // ProducedAt should not be in the future (with tolerance). + if !resp.ProducedAt.IsZero() && resp.ProducedAt.After(now.Add(skew)) { + return errors.Errorf("OCSP: response ProducedAt (%v) is in the future", resp.ProducedAt) + } + + // ThisUpdate should not be in the future (with tolerance). + if resp.ThisUpdate.After(now.Add(skew)) { + return errors.Errorf("OCSP: ThisUpdate (%v) is in the future", resp.ThisUpdate) + } + + // NextUpdate should not be in the past (expired). + if !resp.NextUpdate.IsZero() && resp.NextUpdate.Before(now) { + return errors.Errorf("OCSP: response is expired (NextUpdate: %v < now: %v)", resp.NextUpdate, now) + } + + return nil +} + +func checkResponderCert(resp *ocsp.Response, rootCerts *x509.CertPool) error { + cert, err := findOCSPResponderCert(resp, rootCerts) + if err != nil { + return errors.Errorf("OCSP: failed to find responder certificate: %v", err) + } + + // Validate OCSP response signature using responder's certificate + if err := resp.CheckSignatureFrom(cert); err != nil { + return errors.Errorf("OCSP: invalid response signature: %v", err) + } + + // Check if the OCSP responder has the No Check extension + if hasNoCheckExtension(cert) { + return errors.New("OCSP: disabled for cert by responder") + } + + // Must have OCSP Signing EKU + if found := slices.Contains(resp.Certificate.ExtKeyUsage, x509.ExtKeyUsageOCSPSigning); !found { + return errors.New("OCSP signer cert missing OCSP Signing EKU") + } + + // TODO check if resp.Certificate chains up to issuer + + return nil +} + +func findOCSPResponderCert(resp *ocsp.Response, rootCerts *x509.CertPool) (*x509.Certificate, error) { + if resp.Certificate != nil { + return resp.Certificate, nil + } + for _, rawCert := range rootCerts.Subjects() { + cert, err := x509.ParseCertificate(rawCert) + if err == nil && bytes.Equal(cert.SubjectKeyId, resp.ResponderKeyHash) { + return cert, nil + } + } + return nil, errors.New("OCSP: responder certificate unavailable") +} + +func hasNoCheckExtension(cert *x509.Certificate) bool { + for _, ext := range cert.Extensions { + if ext.Id.Equal(oidOCSPNoCheck) { + return true + } + } + return false +} + +func getIssuerCertificate(cert *x509.Certificate, pool *x509.CertPool, client *http.Client) (*x509.Certificate, error) { + // Try to find the issuer in the provided CertPool + for _, potentialIssuer := range pool.Subjects() { + candidate, err := x509.ParseCertificate(potentialIssuer) + if err == nil && cert.CheckSignatureFrom(candidate) == nil { + return candidate, nil // Found the issuer + } + } + return nil, errors.Errorf("issuer certificate not found") +} diff --git a/pkg/pdfcpu/sign/sign.go b/pkg/pdfcpu/sign/sign.go new file mode 100644 index 00000000..bee99924 --- /dev/null +++ b/pkg/pdfcpu/sign/sign.go @@ -0,0 +1,489 @@ +/* +Copyright 2025 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sign + +import ( + "crypto/dsa" + "crypto/ecdsa" + "crypto/ed25519" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "fmt" + "io" + "slices" + "strings" + "time" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" + "github.com/hhrutter/pkcs7" + "github.com/pkg/errors" +) + +const CertifiedSigPermsNotSupported = "Certified signature detected. Permission validation not supported." + +func validateCertChains( + chains [][]*x509.Certificate, // All chain paths for cert leading to a root CA. + rootCerts *x509.CertPool, + signer *model.Signer, + signingTime *time.Time, + crls [][]byte, + ocsps [][]byte, + result *model.SignatureValidationResult, + conf *model.Configuration) { + + var cd *model.CertificateDetails + + // TODO Process all chains. + chain := chains[0] + + for i, cert := range chain { + + certDetails := model.CertificateDetails{} + + if signer.Certificate == nil { + signer.Certificate = &certDetails + } else { + cd.IssuerCertificate = &certDetails + } + cd = &certDetails + + if ok := setupCertDetails(cert, &certDetails, signer, signingTime, result, i); !ok { + continue + } + + selfSigned, err := isSelfSigned(cert) + if selfSigned { + certDetails.SelfSigned = true + } + if err != nil { + signer.AddProblem(fmt.Sprintf("selfSigned cert verification for against public key failed: %s: %v\n", certInfo(cert), err)) + if result.Reason == model.SignatureReasonUnknown { + result.Reason = model.SignatureReasonSelfSignedCertErr + } + certDetails.Trust.Status = model.False + certDetails.Trust.Reason = "certificate not trusted" + continue + } + + if selfSigned || certDetails.CA { + certDetails.Trust.Status = model.True + certDetails.Trust.Reason = "CA" + if selfSigned { + certDetails.Trust.Reason = "self signed" + } + continue + } + + if certDetails.Expired && signingTime == nil && len(crls) == 0 && len(ocsps) == 0 { + certDetails.Trust.Status = model.False + certDetails.Trust.Reason = "certificate expired" + continue + } + + setTrustStatus(&certDetails, result) + + var issuer *x509.Certificate + if len(chain) > 1 { + issuer = chain[1] + } + checkRevocation(cert, issuer, rootCerts, signer, &certDetails, signingTime, crls, ocsps, result, conf) + } +} + +func setupCertDetails( + cert *x509.Certificate, + certDetails *model.CertificateDetails, + signer *model.Signer, + signingTime *time.Time, + result *model.SignatureValidationResult, + i int) bool { + + certDetails.Leaf = i == 0 + certDetails.Subject = cert.Subject.CommonName + certDetails.Issuer = cert.Issuer.CommonName + certDetails.SerialNumber = cert.SerialNumber.Text(16) + certDetails.Version = cert.Version + certDetails.ValidFrom = cert.NotBefore + certDetails.ValidThru = cert.NotAfter + + ts := time.Now() + if signingTime != nil { + ts = *signingTime + } + certDetails.Expired = ts.Before(cert.NotBefore) || ts.After(cert.NotAfter) + + certDetails.Usage = certUsage(cert) + certDetails.Qualified = qualifiedCertificate(cert) + certDetails.CA = cert.IsCA + + certDetails.SignAlg = cert.PublicKeyAlgorithm.String() + + keySize, ok := getKeySize(cert, signer, certDetails, result) + if !ok { + return false + } + certDetails.KeySize = keySize + + return true +} + +func getKeySize(cert *x509.Certificate, signer *model.Signer, certDetails *model.CertificateDetails, result *model.SignatureValidationResult) (int, bool) { + keySize, err := publicKeySize(cert) + if err == nil { + return keySize, true + } + signer.AddProblem(fmt.Sprintf("%v", err)) + if result.Reason == model.SignatureReasonUnknown { + result.Reason = model.SignatureReasonCertNotTrusted + } + certDetails.Trust.Status = model.False + certDetails.Trust.Reason = "certificate not trusted" + return 0, false +} + +func setTrustStatus(certDetails *model.CertificateDetails, result *model.SignatureValidationResult) { + if result.Reason == model.SignatureReasonCertNotTrusted { + certDetails.Trust.Status = model.False + certDetails.Trust.Reason = "certificate not trusted" + } else { + certDetails.Trust.Status = model.True + certDetails.Trust.Reason = "cert chain up to root CA is trusted" + } +} + +func signedData(ra io.ReaderAt, sigDict types.Dict) ([]byte, error) { + arr := sigDict.ArrayEntry("ByteRange") + if len(arr) != 4 { + return nil, errors.New("pdfcpu: invalid signature dict - missing \"ByteRange\"") + } + return bytesForByteRange(ra, arr) +} + +func bytesForByteRange(ra io.ReaderAt, arr types.Array) ([]byte, error) { + off1 := int64((arr[0].(types.Integer)).Value()) + size1 := int64((arr[1].(types.Integer)).Value()) + off2 := int64((arr[2].(types.Integer)).Value()) + size2 := int64((arr[3].(types.Integer)).Value()) + + buf1 := make([]byte, size1) + _, err := ra.ReadAt(buf1, off1) + if err != nil { + return nil, err + } + + buf2 := make([]byte, size2) + _, err = ra.ReadAt(buf2, off2) + if err != nil { + return nil, err + } + + return append(buf1, buf2...), nil +} + +// isSelfSigned checks if a given certificate is self-signed. +func isSelfSigned(cert *x509.Certificate) (bool, error) { + // Check if subject and issuer are the same + if !comparePKIXName(cert.Subject, cert.Issuer) { + return false, nil + } + + // Verify the certificate's signature against its own public key + err := cert.CheckSignatureFrom(cert) + + return true, err +} + +func comparePKIXName(a, b pkix.Name) bool { + return slices.Equal(a.Country, b.Country) && + slices.Equal(a.Organization, b.Organization) && + slices.Equal(a.OrganizationalUnit, b.OrganizationalUnit) && + slices.Equal(a.Locality, b.Locality) && + slices.Equal(a.Province, b.Province) && + slices.Equal(a.StreetAddress, b.StreetAddress) && + slices.Equal(a.PostalCode, b.PostalCode) && + a.CommonName == b.CommonName +} + +func certUsage(cert *x509.Certificate) string { + ss := []string{} + for _, usage := range cert.ExtKeyUsage { + switch usage { + case x509.ExtKeyUsageServerAuth: + ss = append(ss, "Server Authentication") + case x509.ExtKeyUsageClientAuth: + ss = append(ss, "Client Authentication") + case x509.ExtKeyUsageCodeSigning: + ss = append(ss, "Code Signing") + case x509.ExtKeyUsageEmailProtection: + ss = append(ss, "Email Protection") + case x509.ExtKeyUsageTimeStamping: + ss = append(ss, "Time Stamping") + case x509.ExtKeyUsageOCSPSigning: + ss = append(ss, "OCSP Signing") + case x509.ExtKeyUsageIPSECEndSystem: + ss = append(ss, "IPSEC End System") + case x509.ExtKeyUsageIPSECTunnel: + ss = append(ss, "IPSEC Tunnel") + case x509.ExtKeyUsageIPSECUser: + ss = append(ss, "IPSEC User") + case x509.ExtKeyUsageAny: + ss = append(ss, "Any") + default: + ss = append(ss, "Any") + } + } + return strings.Join(ss, ",") +} + +func qualifiedCertificate(cert *x509.Certificate) bool { + for _, policy := range cert.PolicyIdentifiers { + switch { + case policy.Equal(oidQCESign): + return true + case policy.Equal(oidQCESeal): + return true + case policy.Equal(oidQWebAuthCert): + return true + case policy.Equal(oidETSIQCPublicWithSSCD): + return true + } + } + return false +} + +func certChain(cert *x509.Certificate, certs []*x509.Certificate) []*x509.Certificate { + certMap := make(map[string]*x509.Certificate) + for _, cert := range certs { + certMap[string(cert.RawSubject)] = cert + } + + current := cert + + var sorted []*x509.Certificate + + for current != nil && len(sorted) < len(certs) { + sorted = append(sorted, current) + current = certMap[string(current.RawIssuer)] + } + + return sorted +} + +func publicKeySize(cert *x509.Certificate) (int, error) { + switch pubKey := cert.PublicKey.(type) { + case *rsa.PublicKey: + return pubKey.Size() * 8, nil + case *ecdsa.PublicKey: + return pubKey.Curve.Params().BitSize, nil + case ed25519.PublicKey: + return 256, nil + case *dsa.PublicKey: + return pubKey.Y.BitLen(), nil + default: + return 0, errors.Errorf("unknown public key type %T", pubKey) + } +} + +func handleCertVerifyErr(err error, cert *x509.Certificate, signer *model.Signer, result *model.SignatureValidationResult) { + switch certErr := err.(type) { + case x509.UnknownAuthorityError: + if result.Reason == model.SignatureReasonUnknown { + result.Reason = model.SignatureReasonCertNotTrusted + } + case x509.CertificateInvalidError: + if certErr.Reason == x509.Expired { + if result.Reason == model.SignatureReasonUnknown { + result.Reason = model.SignatureReasonCertExpired + } + } else { + if result.Reason == model.SignatureReasonUnknown { + result.Reason = model.SignatureReasonCertInvalid + } + } + default: + if result.Reason == model.SignatureReasonUnknown { + result.Reason = model.SignatureReasonCertInvalid + } + } + signer.AddProblem(fmt.Sprintf("certificate verification failed for %s: %v", certInfo(cert), err)) +} + +func certInfo(cert *x509.Certificate) string { + return fmt.Sprintf("serial=%q", cert.SerialNumber.Text(16)) +} + +func processDSS(ctx *model.Context, signer *model.Signer) ([]*x509.Certificate, [][]byte, [][]byte, bool) { + ok := true + dssCerts, err := extractCertsFromDSS(ctx) + if err != nil { + signer.AddProblem(fmt.Sprintf("DSS: extract certs: %v", err)) + ok = false + } + + dssCRLs, err := extractCRLsFromDSS(ctx) + if err != nil { + signer.AddProblem(fmt.Sprintf("DSS: extract crls %v", err)) + ok = false + } + + dssOCSPs, err := extractOCSPsFromDSS(ctx) + if err != nil { + signer.AddProblem(fmt.Sprintf("DSS: extract ocsps %v", err)) + ok = false + } + + if _, ok := ctx.DSS.Find("VRI"); ok { + signer.AddProblem("DSS: VRI currently unsupported") + ok = false + } + + return dssCerts, dssCRLs, dssOCSPs, ok +} + +func extractCertsFromDSS(ctx *model.Context) ([]*x509.Certificate, error) { + entry, found := ctx.DSS.Find("Certs") + if !found { + return nil, nil + } + + arr, err := ctx.DereferenceArray(entry) + if err != nil { + return nil, err + } + + var certs []*x509.Certificate + + for _, obj := range arr { + sd, _, err := ctx.DereferenceStreamDict(obj) + if err != nil { + return nil, err + } + if sd == nil { + return nil, errors.New("invalid DSS cert streamdict") + } + if err := sd.Decode(); err != nil { + return nil, err + } + cert, err := x509.ParseCertificate(sd.Content) + if err != nil { + return nil, err + } + certs = append(certs, cert) + } + + return certs, nil +} + +func mergeCerts(certLists ...[]*x509.Certificate) []*x509.Certificate { + visited := map[string]bool{} + var result []*x509.Certificate + for _, list := range certLists { + for _, cert := range list { + fingerprint := string(cert.Raw) + if !visited[fingerprint] { + visited[fingerprint] = true + result = append(result, cert) + } + } + } + return result +} + +func extractCRLsFromDSS(ctx *model.Context) ([][]byte, error) { + entry, found := ctx.DSS.Find("CLRs") + if !found { + return nil, nil + } + + arr, err := ctx.DereferenceArray(entry) + if err != nil { + return nil, err + } + + var crls [][]byte + + for _, obj := range arr { + sd, _, err := ctx.DereferenceStreamDict(obj) + if err != nil { + return nil, err + } + if sd == nil { + return nil, errors.New("invalid DSS CRL streamdict") + } + if err := sd.Decode(); err != nil { + return nil, err + } + crls = append(crls, sd.Content) + } + + return crls, nil +} + +func extractOCSPsFromDSS(ctx *model.Context) ([][]byte, error) { + entry, found := ctx.DSS.Find("OCSPs") + if !found { + return nil, nil + } + + arr, err := ctx.DereferenceArray(entry) + if err != nil { + return nil, err + } + + var ocsps [][]byte + + for _, obj := range arr { + sd, _, err := ctx.DereferenceStreamDict(obj) + if err != nil { + return nil, err + } + if sd == nil { + return nil, errors.New("invalid DSS OCSP streamdict") + } + if err := sd.Decode(); err != nil { + return nil, err + } + ocsps = append(ocsps, sd.Content) + } + + return ocsps, nil +} + +func validateP7(sigDict types.Dict, result *model.SignatureValidationResult) *pkcs7.PKCS7 { + p7, err := p7(sigDict) + if err != nil { + result.Reason = model.SignatureReasonInternal + result.AddProblem(fmt.Sprintf("pkcs5: %v", err)) + return nil + } + + if len(p7.Signers) == 0 { + result.Reason = model.SignatureReasonInternal + result.AddProblem("pkcs7: message without signers") + return nil + } + + if len(p7.Signers) != 1 && result.Details.IsETSI_CAdES_detached() { + result.Reason = model.SignatureReasonInternal + result.AddProblem("pkcs7: \"ETSI.CAdES.detached\" requires a single signer") + return nil + } + + return p7 +} diff --git a/pkg/pdfcpu/stamp.go b/pkg/pdfcpu/stamp.go index c0591f89..67a3f2ef 100644 --- a/pkg/pdfcpu/stamp.go +++ b/pkg/pdfcpu/stamp.go @@ -676,8 +676,8 @@ func createPDFRes(ctx, otherCtx *model.Context, pageNrSrc, pageNrDest int, migra } // Retrieve content stream bytes of page dict. - pdfRes.Content, err = otherXRefTable.PageContent(d) - if err != nil { + pdfRes.Content, err = otherXRefTable.PageContent(d, pageNrSrc) + if err != nil && err != model.ErrNoContent { return err } @@ -715,7 +715,7 @@ func createPDFResForWM(ctx *model.Context, wm *model.Watermark) error { if err != nil { return err } - if otherCtx.Version() == model.V20 { + if otherCtx.XRefTable.Version() == model.V20 { return ErrUnsupportedVersion } @@ -746,7 +746,7 @@ func createPDFResForWM(ctx *model.Context, wm *model.Watermark) error { } func createImageResForWM(ctx *model.Context, wm *model.Watermark) (err error) { - wm.Img, wm.Width, wm.Height, err = model.CreateImageResource(ctx.XRefTable, wm.Image, false, false) + wm.Img, wm.Width, wm.Height, err = model.CreateImageResource(ctx.XRefTable, wm.Image) return err } @@ -1100,7 +1100,7 @@ func createExtGStateForStamp(ctx *model.Context, opacity float64) (*types.Indire return ctx.IndRefForNewObject(d) } -func insertPageResourcesForWM(ctx *model.Context, pageDict types.Dict, wm model.Watermark, gsID, xoID string) error { +func insertPageResourcesForWM(pageDict types.Dict, wm model.Watermark, gsID, xoID string) error { resourceDict := types.Dict( map[string]types.Object{ "ExtGState": types.Dict(map[string]types.Object{gsID: *wm.ExtGState}), @@ -1341,18 +1341,22 @@ func handleLink(ctx *model.Context, pageIndRef *types.IndirectRef, d types.Dict, } ann := model.NewLinkAnnotation( - *wm.BbTrans.EnclosingRectangle(5.0), - types.QuadPoints{wm.BbTrans}, - nil, - wm.URL, - "pdfcpu", - model.AnnNoZoom+model.AnnNoRotate, - 0, - model.BSSolid, - nil, - false) - - _, err := AddAnnotation(ctx, pageIndRef, d, pageNr, ann, false) + *wm.BbTrans.EnclosingRectangle(5.0), // rect + 0, // apObjNr + "", // contents + "pdfcpu", // id + "", // modDate + model.AnnNoZoom+model.AnnNoRotate, // f + &color.Red, // borderCol + nil, // dest + wm.URL, // uri + types.QuadPoints{wm.BbTrans}, // quad + false, // border + 0, // borderWidth + model.BSSolid, // borderStyle + ) + + _, _, err := AddAnnotation(ctx, pageIndRef, d, pageNr, ann, false) return err } @@ -1413,7 +1417,7 @@ func addPageWatermark(ctx *model.Context, pageNr int, wm model.Watermark) error err = updatePageResourcesForWM(ctx, inhPAttrs.Resources, wm, &gsID, &xoID) d.Update("Resources", inhPAttrs.Resources) } else { - err = insertPageResourcesForWM(ctx, d, wm, gsID, xoID) + err = insertPageResourcesForWM(d, wm, gsID, xoID) } if err != nil { return err @@ -1560,16 +1564,49 @@ func AddWatermarksMap(ctx *model.Context, m map[int]*model.Watermark) error { return nil } +func resolveFonts(fm map[string]types.IntSet, xRefTable *model.XRefTable, m1 map[int][]*model.Watermark) error { + // TODO Take existing font dicts in xref into account. + for fontName, pageSet := range fm { + ir, err := pdffont.EnsureFontDict(xRefTable, fontName, "", "", false, nil) + if err != nil { + return err + } + for pageNr, v := range pageSet { + if !v { + continue + } + for _, wm := range m1[pageNr] { + if wm.IsText() && wm.FontName == fontName { + wm.Font = ir + } + } + } + } + return nil +} + // AddWatermarksSliceMap adds watermarks in m to corresponding pages. func AddWatermarksSliceMap(ctx *model.Context, m map[int][]*model.Watermark) error { var ( onTop bool opacity float64 ) - for _, wms := range m { - onTop = wms[0].OnTop - opacity = wms[0].Opacity - break + m1 := map[int][]*model.Watermark{} + gotParms := false + for p, wms := range m { + if len(wms) == 0 { + continue + } + m1[p] = wms + if !gotParms { + onTop = wms[0].OnTop + opacity = wms[0].Opacity + gotParms = true + } + } + + if len(m1) == 0 { + errors.Errorf("pdfcpu: no watermarks available") } ocgIndRef, err := prepareOCPropertiesInRoot(ctx, onTop) @@ -1582,30 +1619,16 @@ func AddWatermarksSliceMap(ctx *model.Context, m map[int][]*model.Watermark) err return err } - fm, err := createResourcesForWMSliceMap(ctx, m, ocgIndRef, extGStateIndRef, onTop, opacity) + fm, err := createResourcesForWMSliceMap(ctx, m1, ocgIndRef, extGStateIndRef, onTop, opacity) if err != nil { return err } - // TODO Take existing font dicts in xref into account. - for fontName, pageSet := range fm { - ir, err := pdffont.EnsureFontDict(ctx.XRefTable, fontName, "", "", false, nil) - if err != nil { - return err - } - for pageNr, v := range pageSet { - if !v { - continue - } - for _, wm := range m[pageNr] { - if wm.IsText() && wm.FontName == fontName { - wm.Font = ir - } - } - } + if err := resolveFonts(fm, ctx.XRefTable, m1); err != nil { + return err } - for k, wms := range m { + for k, wms := range m1 { for _, wm := range wms { if err := addPageWatermark(ctx, k, *wm); err != nil { return err diff --git a/pkg/pdfcpu/types/array.go b/pkg/pdfcpu/types/array.go index 9a51819a..e8ef1ab8 100644 --- a/pkg/pdfcpu/types/array.go +++ b/pkg/pdfcpu/types/array.go @@ -194,3 +194,19 @@ func (a Array) PDFString() string { return strings.Join(logstr, "") } + +func (a Array) RemoveNulls() Array { + if len(a) == 0 { + return a + } + + a1 := Array{} + + for _, v := range a { + if v != nil { + a1 = append(a1, v) + } + } + + return a1 +} diff --git a/pkg/pdfcpu/types/date.go b/pkg/pdfcpu/types/date.go index a2f7a214..0257541a 100644 --- a/pkg/pdfcpu/types/date.go +++ b/pkg/pdfcpu/types/date.go @@ -78,7 +78,7 @@ func parseTimezoneHours(s string, o byte) (int, bool) { return 0, false } - // Opininated hack. + // Opinionated hack. tzh = tzh % 24 if o == 'Z' && tzh != 0 { @@ -106,15 +106,19 @@ func parseTimezoneMinutes(s string, o byte) (int, bool) { return tzm, true } -func validateTimezoneSeparator(c byte) bool { +func timezoneSeparator(c byte) bool { return c == '+' || c == '-' || c == 'Z' } -func parseTimezone(s string, relaxed bool) (h, m int, ok bool) { +// func validateTimezoneSeparator(c byte) bool { +// return c == '+' || c == '-' || c == 'Z' +// } - o := s[14] +func parseTimezone(s string, off int, relaxed bool) (h, m int, ok bool) { - if !validateTimezoneSeparator(o) { + o := s[off] // 14 + + if !timezoneSeparator(o) { // Ignore timezone on corrupt timezone separator if relaxed. return 0, 0, relaxed } @@ -125,15 +129,17 @@ func parseTimezone(s string, relaxed bool) (h, m int, ok bool) { // "YYYYMMDDHHmmSSZ'" // "YYYYMMDDHHmmSSZ'0" + off++ + if o == 'Z' { - t := s[15:] + t := s[off:] if t == "" || relaxed && (t == "'" || t == "'0") { return 0, 0, true } } // HH'mm - s = s[15:] + s = s[off:] if s[0] == '-' { s = s[1:] } @@ -291,24 +297,31 @@ func parseMinute(s string) (min int, finished, ok bool) { return min, false, true } -func parseSecond(s string) (sec int, finished, ok bool) { +func parseSecond(s string) (sec int, finished bool, off int, ok bool) { + + off = 14 + second := s[12:14] + if len(second) == 2 && timezoneSeparator(second[1]) { + second = second[:1] + off = 13 + } sec, err := strconv.Atoi(second) if err != nil { - return 0, false, false + return 0, false, off, false } if sec > 59 { - return 0, false, false + return 0, false, off, false } // "YYYYMMDDHHmmSS" if len(s) == 14 { - return sec, true, true + return sec, true, off, true } - return sec, false, true + return sec, false, off, true } func digestPopularOutOfSpecDates(s string) (time.Time, bool) { @@ -406,7 +419,7 @@ func DateTime(s string, relaxed bool) (time.Time, bool) { return d, true } - sec, finished, ok := parseSecond(s) + sec, finished, off, ok := parseSecond(s) if !ok { return d, false } @@ -417,7 +430,7 @@ func DateTime(s string, relaxed bool) (time.Time, bool) { } // Process timezone - tzh, tzm, ok := parseTimezone(s, relaxed) + tzh, tzm, ok := parseTimezone(s, off, relaxed) if !ok { return d, false } diff --git a/pkg/pdfcpu/types/date_test.go b/pkg/pdfcpu/types/date_test.go index ee29aa85..e6021401 100644 --- a/pkg/pdfcpu/types/date_test.go +++ b/pkg/pdfcpu/types/date_test.go @@ -104,6 +104,9 @@ func TestParseDateTime(t *testing.T) { s = "D:20210515103719-02'00" doParseDateTimeOK(s, t) + s = "D:2025022513222+01'00'" + doParseDateTimeOK(s, t) + s = "D:20170430155901+66'A9" doParseDateTimeFail(s, t) diff --git a/pkg/pdfcpu/types/dict.go b/pkg/pdfcpu/types/dict.go index 58cf8a85..66d8a85a 100644 --- a/pkg/pdfcpu/types/dict.go +++ b/pkg/pdfcpu/types/dict.go @@ -132,17 +132,23 @@ func (d Dict) NewIDForPrefix(prefix string, i int) string { return id } -// Entry returns the value for given key. -func (d Dict) Entry(dictName, key string, required bool) (Object, error) { +// Entry returns the value for a given key and if the entry was found. +func (d Dict) Entry(dictName, key string, required bool) (Object, bool, error) { obj, found := d.Find(key) - if !found || obj == nil { + if !found { if required { - return nil, errors.Errorf("dict=%s required entry=%s missing", dictName, key) + return nil, false, errors.Errorf("dict=%s required entry=%s missing", dictName, key) } - //log.Trace.Printf("dict=%s entry %s is nil\n", dictName, key) - return nil, nil + return nil, false, nil } - return obj, nil + + if obj == nil { + if required { + return nil, true, errors.Errorf("dict=%s required entry=%s corrupt", dictName, key) + } + } + + return obj, found, nil } // BooleanEntry expects and returns a BooleanEntry for given key. diff --git a/pkg/pdfcpu/types/paperSize.go b/pkg/pdfcpu/types/paperSize.go index 63b8bbd1..93ee9f3b 100644 --- a/pkg/pdfcpu/types/paperSize.go +++ b/pkg/pdfcpu/types/paperSize.go @@ -82,31 +82,31 @@ var PaperSize = map[string]*Dim{ "SRA3++": {2835, 4008}, // 12.6" x 18.3" 320 x 464 mm // American - "SuperB": {936, 1368}, // 13" x 19" + "SuperB": {936, 1368}, // 13" x 19" "B+": {936, 1368}, - "Tabloid": {791, 1225}, // 11" x 17" ANSIB, DobleCarta - "ExtraTabloid": {865, 1296}, // 12" x 18" ARCHB, Arch2 - "Ledger": {1225, 791}, // 17" x 11" ANSIB - "Legal": {612, 1009}, // 8 1/2" x 14" + "Tabloid": {792, 1224}, // 11" x 17" ANSIB, DobleCarta + "ExtraTabloid": {864, 1296}, // 12" x 18" ARCHB, Arch2 + "Ledger": {1224, 792}, // 17" x 11" ANSIB + "Legal": {612, 1008}, // 8 1/2" x 14" "GovLegal": {612, 936}, // 8 1/2" x 13" "Oficio": {612, 936}, "Folio": {612, 936}, - "Letter": {612, 791}, // 8 1/2" x 11" ANSIA - "Carta": {612, 791}, - "AmericanQuarto": {612, 791}, + "Letter": {612, 792}, // 8 1/2" x 11" ANSIA + "Carta": {612, 792}, + "AmericanQuarto": {612, 792}, - "DobleCarta": {791, 1225}, // 11" x 17" Tabloid, ANSIB + "DobleCarta": {792, 1224}, // 11" x 17" Tabloid, ANSIB - "GovLetter": {576, 757}, // 8" x 10 1/2" + "GovLetter": {576, 756}, // 8" x 10 1/2" "Executive": {522, 756}, // 7 1/4" x 10 1/2" - "HalfLetter": {397, 612}, // 5 1/2" x 8 1/2" - "Memo": {397, 612}, - "Statement": {397, 612}, - "Stationary": {397, 612}, + "HalfLetter": {396, 612}, // 5 1/2" x 8 1/2" + "Memo": {396, 612}, + "Statement": {396, 612}, + "Stationary": {396, 612}, "JuniorLegal": {360, 576}, // 5" x 8" "IndexCard": {360, 576}, @@ -114,11 +114,11 @@ var PaperSize = map[string]*Dim{ "Photo": {288, 432}, // 4" x 6" // ANSI/ASME Y14.1 - "ANSIA": {612, 791}, // 8 1/2" x 11" Letter, Carta, AmericanQuarto - "ANSIB": {791, 1225}, // 11" x 17" Ledger, Tabloid, DobleCarta - "ANSIC": {1225, 1585}, // 17" x 22" - "ANSID": {1585, 2449}, // 22" x 34" - "ANSIE": {2449, 3170}, // 34" x 44" + "ANSIA": {612, 792}, // 8 1/2" x 11" Letter, Carta, AmericanQuarto + "ANSIB": {792, 1224}, // 11" x 17" Ledger, Tabloid, DobleCarta + "ANSIC": {1224, 1584}, // 17" x 22" + "ANSID": {1584, 2448}, // 22" x 34" + "ANSIE": {2448, 3168}, // 34" x 44" "ANSIF": {2016, 2880}, // 28" x 40" // ANSI/ASME Y14.1 Architectural series @@ -131,12 +131,12 @@ var PaperSize = map[string]*Dim{ "ARCHE2": {1871, 2736}, // 26" x 38" "ARCHE3": {1945, 2809}, // 27" x 39" - "Arch1": {649, 865}, // 9" x 12" ARCHA - "Arch2": {865, 1296}, // 12" x 18" ARCHB, ExtraTabloide - "Arch3": {1296, 1729}, // 18" x 24" ARCHC - "Arch4": {1729, 2591}, // 24" x 36" ARCHD - "Arch5": {2160, 3025}, // 30" x 42" ARCHE1 - "Arch6": {2591, 3456}, // 36" x 48" ARCHE + "Arch1": {648, 864}, // 9" x 12" ARCHA + "Arch2": {864, 1296}, // 12" x 18" ARCHB, ExtraTabloide + "Arch3": {1296, 1728}, // 18" x 24" ARCHC + "Arch4": {1728, 2592}, // 24" x 36" ARCHD + "Arch5": {2160, 3024}, // 30" x 42" ARCHE1 + "Arch6": {2592, 3456}, // 36" x 48" ARCHE // American Uncut "Bond": {1584, 1224}, // 22" x 17" @@ -154,7 +154,7 @@ var PaperSize = map[string]*Dim{ "Crown": {1170, 1512}, // 16 1/4" x 21" "DoubleCrown": {1440, 2160}, // 20" x 30" "Quad": {2160, 2880}, // 30" x 40" - "Demy": {1242, 1620}, // 17 3/4" x 22 1/2" + "Demy": {1278, 1620}, // 17 3/4" x 22 1/2" "DoubleDemy": {1620, 2556}, // 22 1/2" x 35 1/2" "Medium": {1314, 1656}, // 18 1/4" x 23" "Royal": {1440, 1804}, // 20" x 25 1/16" @@ -164,7 +164,7 @@ var PaperSize = map[string]*Dim{ "Foolscap": {972, 1224}, // 13 1/2" x 17" "DoubleFoolscap": {1224, 1944}, // 17" x 27" - "F4": {595, 935}, // 8 1/4" x 13" + "F4": {594, 936}, // 8 1/4" x 13" // GB/T 148-1997 D Series China "D0": {2166, 3016}, // 29.9" x 41.9" 764 x 1064 mm diff --git a/pkg/pdfcpu/types/streamdict.go b/pkg/pdfcpu/types/streamdict.go index 151a04b9..3a1651e8 100644 --- a/pkg/pdfcpu/types/streamdict.go +++ b/pkg/pdfcpu/types/streamdict.go @@ -18,6 +18,7 @@ package types import ( "bytes" + "context" "fmt" "io" @@ -102,6 +103,90 @@ func (sd StreamDict) Image() bool { return true } +type DecodeLazyObjectStreamObjectFunc func(c context.Context, s string) (Object, error) + +type LazyObjectStreamObject struct { + osd *ObjectStreamDict + startOffset int + endOffset int + + decodeFunc DecodeLazyObjectStreamObjectFunc + decodedObject Object + decodedError error +} + +func NewLazyObjectStreamObject(osd *ObjectStreamDict, startOffset, endOffset int, decodeFunc DecodeLazyObjectStreamObjectFunc) Object { + return LazyObjectStreamObject{ + osd: osd, + startOffset: startOffset, + endOffset: endOffset, + + decodeFunc: decodeFunc, + } +} + +func (l LazyObjectStreamObject) Clone() Object { + return LazyObjectStreamObject{ + osd: l.osd, + startOffset: l.startOffset, + endOffset: l.endOffset, + + decodeFunc: l.decodeFunc, + decodedObject: l.decodedObject, + decodedError: l.decodedError, + } +} + +func (l LazyObjectStreamObject) PDFString() string { + data, err := l.GetData() + if err != nil { + panic(err) + } + + return string(data) +} + +func (l LazyObjectStreamObject) String() string { + return l.PDFString() +} + +func (l *LazyObjectStreamObject) GetData() ([]byte, error) { + if err := l.osd.Decode(); err != nil { + return nil, err + } + + var data []byte + if l.endOffset == -1 { + data = l.osd.Content[l.startOffset:] + } else { + data = l.osd.Content[l.startOffset:l.endOffset] + } + return data, nil +} + +func (l *LazyObjectStreamObject) DecodedObject(c context.Context) (Object, error) { + if l.decodedObject == nil && l.decodedError == nil { + data, err := l.GetData() + if err != nil { + return nil, err + } + + if log.ReadEnabled() { + log.Read.Printf("parseObjectStream: objString = %s\n", string(data)) + } + + l.decodedObject, l.decodedError = l.decodeFunc(c, string(data)) + if l.decodedError != nil { + return nil, l.decodedError + } + + if log.ReadEnabled() { + //log.Read.Printf("parseObjectStream: [%d] = obj %s:\n%s\n", i/2-1, objs[i-2], o) + } + } + return l.decodedObject, l.decodedError +} + // ObjectStreamDict represents a object stream dictionary. type ObjectStreamDict struct { StreamDict @@ -152,6 +237,11 @@ func parmsForFilter(d Dict) map[string]int { // Encode applies sd's filter pipeline to sd.Content in order to produce sd.Raw. func (sd *StreamDict) Encode() error { + if sd.Content == nil && sd.Raw != nil { + // Not decoded yet, no need to encode. + return nil + } + // No filter specified, nothing to encode. if sd.FilterPipeline == nil { if log.TraceEnabled() { @@ -231,27 +321,16 @@ func fixParms(f PDFFilter, parms map[string]int, sd *StreamDict) error { // Decode applies sd's filter pipeline to sd.Raw in order to produce sd.Content. func (sd *StreamDict) Decode() error { - if sd.Content != nil { - // This stream has already been decoded. - return nil - } - - fpl := sd.FilterPipeline - - // No filter or sole filter DTC && !CMYK or JPX - nothing to decode. - if fpl == nil || len(fpl) == 1 && ((fpl[0].Name == filter.DCT && sd.CSComponents != 4) || fpl[0].Name == filter.JPX) { - sd.Content = sd.Raw - //fmt.Printf("decodedStream returning %d(#%02x)bytes: \n%s\n", len(sd.Content), len(sd.Content), hex.Dump(sd.Content)) - return nil - } - - //fmt.Printf("decodedStream before:\n%s\n", hex.Dump(sd.Raw)) + _, err := sd.DecodeLength(-1) + return err +} +func (sd *StreamDict) decodeLength(maxLen int64) ([]byte, error) { var b, c io.Reader b = bytes.NewReader(sd.Raw) // Apply each filter in the pipeline to result of preceding filter. - for _, f := range sd.FilterPipeline { + for idx, f := range sd.FilterPipeline { if f.Name == filter.JPX { break @@ -276,40 +355,78 @@ func (sd *StreamDict) Decode() error { parms := parmsForFilter(f.DecodeParms) if err := fixParms(f, parms, sd); err != nil { - return err + return nil, err } fi, err := filter.NewFilter(f.Name, parms) if err != nil { - return err + return nil, err } - c, err = fi.Decode(b) + if maxLen >= 0 && idx == len(sd.FilterPipeline)-1 { + c, err = fi.DecodeLength(b, maxLen) + } else { + c, err = fi.Decode(b) + } if err != nil { - return err + return nil, err } //fmt.Printf("decodedStream after:%s\n%s\n", f.Name, hex.Dump(c.Bytes())) b = c } + var data []byte if bb, ok := c.(*bytes.Buffer); ok { - sd.Content = bb.Bytes() + data = bb.Bytes() } else { var buf bytes.Buffer if _, err := io.Copy(&buf, c); err != nil { - return err + return nil, err } - sd.Content = buf.Bytes() + data = buf.Bytes() } - return nil + if maxLen < 0 { + sd.Content = data + return data, nil + } + + return data[:maxLen], nil +} + +func (sd *StreamDict) DecodeLength(maxLen int64) ([]byte, error) { + if sd.Content != nil { + // This stream has already been decoded. + if maxLen < 0 { + return sd.Content, nil + } + + return sd.Content[:maxLen], nil + } + + fpl := sd.FilterPipeline + + // No filter or sole filter DTC && !CMYK or JPX - nothing to decode. + if fpl == nil || len(fpl) == 1 && ((fpl[0].Name == filter.DCT && sd.CSComponents != 4) || fpl[0].Name == filter.JPX) { + sd.Content = sd.Raw + //fmt.Printf("decodedStream returning %d(#%02x)bytes: \n%s\n", len(sd.Content), len(sd.Content), hex.Dump(sd.Content)) + if maxLen < 0 { + return sd.Content, nil + } + + return sd.Content[:maxLen], nil + } + + //fmt.Printf("decodedStream before:\n%s\n", hex.Dump(sd.Raw)) + + return sd.decodeLength(maxLen) } // IndexedObject returns the object at given index from a ObjectStreamDict. func (osd *ObjectStreamDict) IndexedObject(index int) (Object, error) { - if osd.ObjArray == nil { + if osd.ObjArray == nil || index < 0 || index >= len(osd.ObjArray) { return nil, errors.Errorf("IndexedObject(%d): object not available", index) } return osd.ObjArray[index], nil diff --git a/pkg/pdfcpu/types/string.go b/pkg/pdfcpu/types/string.go index 6ba4bed2..3d50eefc 100644 --- a/pkg/pdfcpu/types/string.go +++ b/pkg/pdfcpu/types/string.go @@ -27,6 +27,17 @@ import ( "golang.org/x/text/unicode/norm" ) +func RemoveControlChars(s string) string { + return strings.Map(func(r rune) rune { + switch r { + case '\n', '\r', '\t', '\b', '\f': + return -1 + default: + return r + } + }, s) +} + // NewStringSet returns a new StringSet for slice. func NewStringSet(slice []string) StringSet { strSet := StringSet{} @@ -194,7 +205,7 @@ func Unescape(s string) ([]byte, error) { return b.Bytes(), nil } -// UTF8ToCP1252 converts UTF-8 to CP1252. +// UTF8ToCP1252 converts UTF-8 to CP1252. Unused func UTF8ToCP1252(s string) string { bb := []byte{} for _, r := range s { @@ -203,7 +214,7 @@ func UTF8ToCP1252(s string) string { return string(bb) } -// CP1252ToUTF8 converts CP1252 to UTF-8. +// CP1252ToUTF8 converts CP1252 to UTF-8. Unused func CP1252ToUTF8(s string) string { utf8Buf := make([]byte, utf8.UTFMax) bb := []byte{} @@ -304,3 +315,17 @@ func DecodeName(s string) (string, error) { } return sb.String(), nil } + +func TrimLeadingComment(s string) string { + for i := 0; i < len(s); i++ { + switch s[i] { + case ' ', '\t', '\r', '\n', '\f': + continue + case '%': + return "" + default: + return s + } + } + return "" +} diff --git a/pkg/pdfcpu/types/types.go b/pkg/pdfcpu/types/types.go index dcc79774..06cbf3a2 100644 --- a/pkg/pdfcpu/types/types.go +++ b/pkg/pdfcpu/types/types.go @@ -150,6 +150,10 @@ type Point struct { Y float64 `json:"y"` } +func NewPoint(x, y float64) Point { + return Point{X: x, Y: y} +} + // Translate modifies p's coordinates. func (p *Point) Translate(dx, dy float64) { p.X += dx @@ -171,6 +175,30 @@ func NewRectangle(llx, lly, urx, ury float64) *Rectangle { return &Rectangle{LL: Point{llx, lly}, UR: Point{urx, ury}} } +func decodeFloat(number Object) float64 { + var f float64 + switch v := number.(type) { + case Float: + f = v.Value() + case Integer: + f = float64(v.Value()) + } + return f +} + +func RectForArray(arr Array) *Rectangle { + if len(arr) != 4 { + return nil + } + + llx := decodeFloat(arr[0]) + lly := decodeFloat(arr[1]) + urx := decodeFloat(arr[2]) + ury := decodeFloat(arr[3]) + + return NewRectangle(llx, lly, urx, ury) +} + // RectForDim returns a new rectangle for given dimensions. func RectForDim(width, height float64) *Rectangle { return NewRectangle(0.0, 0.0, width, height) @@ -206,6 +234,10 @@ func (r Rectangle) FitsWithin(r2 *Rectangle) bool { return r.Width() <= r2.Width() && r.Height() <= r2.Height() } +func (r Rectangle) Visible() bool { + return r.Width() != 0 && r.Height() != 0 +} + // AspectRatio returns the relation between width and height of a rectangle. func (r Rectangle) AspectRatio() float64 { return r.Width() / r.Height() @@ -358,6 +390,20 @@ type QuadLiteral struct { P1, P2, P3, P4 Point } +func NewQuadLiteralForRect(r *Rectangle) *QuadLiteral { + // p1 := Point{X: r.LL.X, Y: r.LL.Y} + // p2 := Point{X: r.UR.X, Y: r.LL.Y} + // p3 := Point{X: r.UR.X, Y: r.UR.Y} + // p4 := Point{X: r.LL.X, Y: r.UR.Y} + + p3 := Point{X: r.LL.X, Y: r.LL.Y} + p4 := Point{X: r.UR.X, Y: r.LL.Y} + p2 := Point{X: r.UR.X, Y: r.UR.Y} + p1 := Point{X: r.LL.X, Y: r.UR.Y} + + return &QuadLiteral{P1: p1, P2: p2, P3: p3, P4: p4} +} + // Array returns the PDF representation of ql. func (ql QuadLiteral) Array() Array { return NewNumberArray(ql.P1.X, ql.P1.Y, ql.P2.X, ql.P2.Y, ql.P3.X, ql.P3.Y, ql.P4.X, ql.P4.Y) @@ -410,7 +456,7 @@ func (nameObject Name) Clone() Object { } func (nameObject Name) String() string { - return fmt.Sprint(string(nameObject)) + return string(nameObject) } // PDFString returns a string representation as found in and written to a PDF file. @@ -424,7 +470,7 @@ func (nameObject Name) PDFString() string { // Value returns a string value for this PDF object. func (nameObject Name) Value() string { - return string(nameObject) + return nameObject.String() } /////////////////////////////////////////////////////////////////////////////////// @@ -602,5 +648,5 @@ func (d Dim) Portrait() bool { } func (d Dim) String() string { - return fmt.Sprintf("%fx%f points", d.Width, d.Height) + return fmt.Sprintf("%fx%f", d.Width, d.Height) } diff --git a/pkg/pdfcpu/types/utf16.go b/pkg/pdfcpu/types/utf16.go index 873cf26a..54b21bd4 100644 --- a/pkg/pdfcpu/types/utf16.go +++ b/pkg/pdfcpu/types/utf16.go @@ -121,7 +121,7 @@ func EncodeUTF16String(s string) string { return string(bb) } -func EscapeUTF16String(s string) (*string, error) { +func EscapedUTF16String(s string) (*string, error) { return Escape(EncodeUTF16String(s)) } @@ -152,6 +152,14 @@ func HexLiteralToString(hl HexLiteral) (string, error) { if IsUTF16BE(bb) { return decodeUTF16String(bb) } + + bb, err = Unescape(string(bb)) + if err != nil { + return "", err + } + + bb = bytes.TrimPrefix(bb, []byte{239, 187, 191}) + return string(bb), nil } diff --git a/pkg/pdfcpu/validate/action.go b/pkg/pdfcpu/validate/action.go index d61af2f2..f7b3effd 100644 --- a/pkg/pdfcpu/validate/action.go +++ b/pkg/pdfcpu/validate/action.go @@ -110,11 +110,26 @@ func validateGoToEActionDict(xRefTable *model.XRefTable, d types.Dict, dictName // D, required, name, byte string or array err = validateActionDestinationEntry(xRefTable, d, dictName, "D", REQUIRED, model.V10) if err != nil { - return err + if xRefTable.ValidationMode == model.ValidationStrict { + return err + } + err = validateActionDestinationEntry(xRefTable, d, dictName, "Dest", REQUIRED, model.V10) + if err != nil && xRefTable.ValidationMode == model.ValidationRelaxed { + err = nil + model.ShowSkipped("GotoEAction: missing \"D\"") + } else { + d["D"] = d["Dest"] + delete(d, "Dest") + model.ShowRepaired("GotoEAction destination") + } } // NewWindow, optional, boolean, since V1.2 - _, err = validateBooleanEntry(xRefTable, d, dictName, "NewWindow", OPTIONAL, model.V12, nil) + sinceVersion := model.V12 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V11 + } + _, err = validateBooleanEntry(xRefTable, d, dictName, "NewWindow", OPTIONAL, sinceVersion, nil) if err != nil { return err } @@ -561,12 +576,12 @@ func validateNamedActionDict(xRefTable *model.XRefTable, d types.Dict, dictName validate := func(s string) bool { - if types.MemberOf(s, []string{"NextPage", "PrevPage", "FirstPage", "Lastpage"}) { + if types.MemberOf(s, []string{"NextPage", "PrevPage", "FirstPage", "LastPage"}) { return true } // Some known non standard named actions - if types.MemberOf(s, []string{"GoToPage", "GoBack", "GoForward", "Find", "Print", "SaveAs", "Quit", "FullScreen"}) { + if types.MemberOf(s, []string{"AcroSrch:Query", "Find", "FindAgain", "FindAgainDoc", "FindPrevious", "FindPreviousDoc", "FullScreen", "GoBack", "GoBackDoc", "GoForward", "GoToPage", "Print", "Quit", "SaveAs", "FitPage", "FitWidth"}) { return true } @@ -652,7 +667,11 @@ func validateImportDataActionDict(xRefTable *model.XRefTable, d types.Dict, dict func validateJavaScript(xRefTable *model.XRefTable, d types.Dict, dictName, entryName string, required bool) error { - o, err := validateEntry(xRefTable, d, dictName, entryName, required, model.V13) + sinceVersion := model.V13 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V12 + } + o, err := validateEntry(xRefTable, d, dictName, entryName, required, sinceVersion) if err != nil || o == nil { return err } @@ -709,7 +728,11 @@ func validateRenditionActionDict(xRefTable *model.XRefTable, d types.Dict, dictN // OP or JS need to be present. // OP, integer - op, err := validateIntegerEntry(xRefTable, d, dictName, "OP", OPTIONAL, model.V15, func(i int) bool { return 0 <= i && i <= 4 }) + sinceVersion := model.V15 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + } + op, err := validateIntegerEntry(xRefTable, d, dictName, "OP", OPTIONAL, sinceVersion, func(i int) bool { return 0 <= i && i <= 4 }) if err != nil { return err } @@ -729,12 +752,16 @@ func validateRenditionActionDict(xRefTable *model.XRefTable, d types.Dict, dictN return v == 0 || v == 4 }(op) - d1, err := validateDictEntry(xRefTable, d, dictName, "R", required, model.V15, nil) + sinceVersion = model.V15 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + } + d1, err := validateDictEntry(xRefTable, d, dictName, "R", required, sinceVersion, nil) if err != nil { return err } if d1 != nil { - err = validateRenditionDict(xRefTable, d1, model.V15) + err = validateRenditionDict(xRefTable, d1, sinceVersion) if err != nil { return err } @@ -793,31 +820,37 @@ func validateGoTo3DViewActionDict(xRefTable *model.XRefTable, d types.Dict, dict func validateActionDictCore(xRefTable *model.XRefTable, n *types.Name, d types.Dict) error { for k, v := range map[string]struct { - validate func(xRefTable *model.XRefTable, d types.Dict, dictName string) error - sinceVersion model.Version + validate func(xRefTable *model.XRefTable, d types.Dict, dictName string) error + sinceVersion model.Version + sinceVersionRelaxed model.Version }{ - "GoTo": {validateGoToActionDict, model.V10}, - "GoToR": {validateGoToRActionDict, model.V10}, - "GoToE": {validateGoToEActionDict, model.V16}, - "Launch": {validateLaunchActionDict, model.V10}, - "Thread": {validateThreadActionDict, model.V10}, - "URI": {validateURIActionDict, model.V10}, - "Sound": {validateSoundActionDict, model.V12}, - "Movie": {validateMovieActionDict, model.V12}, - "Hide": {validateHideActionDict, model.V12}, - "Named": {validateNamedActionDict, model.V12}, - "SubmitForm": {validateSubmitFormActionDict, model.V10}, - "ResetForm": {validateResetFormActionDict, model.V12}, - "ImportData": {validateImportDataActionDict, model.V12}, - "JavaScript": {validateJavaScriptActionDict, model.V13}, - "SetOCGState": {validateSetOCGStateActionDict, model.V15}, - "Rendition": {validateRenditionActionDict, model.V15}, - "Trans": {validateTransActionDict, model.V15}, - "GoTo3DView": {validateGoTo3DViewActionDict, model.V16}, + "GoTo": {validateGoToActionDict, model.V10, model.V10}, + "GoToR": {validateGoToRActionDict, model.V10, model.V10}, + "GoToE": {validateGoToEActionDict, model.V16, model.V11}, + "Launch": {validateLaunchActionDict, model.V10, model.V10}, + "Thread": {validateThreadActionDict, model.V10, model.V10}, + "URI": {validateURIActionDict, model.V10, model.V10}, + "Sound": {validateSoundActionDict, model.V12, model.V12}, + "Movie": {validateMovieActionDict, model.V12, model.V12}, + "Hide": {validateHideActionDict, model.V12, model.V12}, + "Named": {validateNamedActionDict, model.V12, model.V12}, + "SubmitForm": {validateSubmitFormActionDict, model.V10, model.V10}, + "ResetForm": {validateResetFormActionDict, model.V12, model.V12}, + "ImportData": {validateImportDataActionDict, model.V12, model.V12}, + "JavaScript": {validateJavaScriptActionDict, model.V13, model.V12}, + "SetOCGState": {validateSetOCGStateActionDict, model.V15, model.V15}, + "Rendition": {validateRenditionActionDict, model.V15, model.V14}, + "Trans": {validateTransActionDict, model.V15, model.V15}, + "GoTo3DView": {validateGoTo3DViewActionDict, model.V16, model.V16}, } { if n.Value() == k { - err := xRefTable.ValidateVersion(k, v.sinceVersion) + sinceVersion := v.sinceVersion + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = v.sinceVersionRelaxed + } + + err := xRefTable.ValidateVersion(k, sinceVersion) if err != nil { return err } @@ -919,7 +952,7 @@ func validateAdditionalActions(xRefTable *model.XRefTable, dict types.Dict, dict case "fieldOrAnnot": // A terminal form field may be merged with a widget annotation. fieldOptions := []string{"K", "F", "V", "C"} - annotOptions := []string{"E", "X", "D", "U", "Fo", "Bl", "PO", "PC", "PV", "Pl"} + annotOptions := []string{"E", "X", "D", "U", "Fo", "Bl", "PO", "PC", "PV", "PI"} options := append(fieldOptions, annotOptions...) if types.MemberOf(s, options) { return true diff --git a/pkg/pdfcpu/validate/annotation.go b/pkg/pdfcpu/validate/annotation.go index 4b183934..ad1cb88a 100644 --- a/pkg/pdfcpu/validate/annotation.go +++ b/pkg/pdfcpu/validate/annotation.go @@ -17,6 +17,9 @@ limitations under the License. package validate import ( + "strconv" + "strings" + "github.com/angel-one/pdfcpu/pkg/log" "github.com/angel-one/pdfcpu/pkg/pdfcpu" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" @@ -26,37 +29,6 @@ import ( var errInvalidPageAnnotArray = errors.New("pdfcpu: validatePageAnnotations: page annotation array without indirect references.") -func validateAAPLAKExtrasDictEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entryName string, required bool, sinceVersion model.Version) error { - - // No documentation for this PDF-Extension - purely speculative implementation. - - d1, err := validateDictEntry(xRefTable, d, dictName, entryName, required, sinceVersion, nil) - if err != nil || d1 == nil { - return err - } - - dictName = "AAPLAKExtrasDict" - - // AAPL:AKAnnotationObject, string - _, err = validateStringEntry(xRefTable, d1, dictName, "AAPL:AKAnnotationObject", OPTIONAL, sinceVersion, nil) - if err != nil { - return err - } - - // AAPL:AKPDFAnnotationDictionary, annotationDict - ad, err := validateDictEntry(xRefTable, d1, dictName, "AAPL:AKPDFAnnotationDictionary", OPTIONAL, sinceVersion, nil) - if err != nil { - return err - } - - _, err = validateAnnotationDict(xRefTable, ad) - if err != nil { - return err - } - - return nil -} - func validateBorderEffectDictEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entryName string, required bool, sinceVersion model.Version) error { // see 12.5.4 @@ -69,14 +41,16 @@ func validateBorderEffectDictEntry(xRefTable *model.XRefTable, d types.Dict, dic dictName = "borderEffectDict" // S, optional, name, S or C - _, err = validateNameEntry(xRefTable, d1, dictName, "S", OPTIONAL, model.V10, func(s string) bool { return s == "S" || s == "C" }) - if err != nil { + if _, err = validateNameEntry(xRefTable, d1, dictName, "S", OPTIONAL, model.V10, func(s string) bool { return s == "S" || s == "C" }); err != nil { return err } // I, optional, number in the range 0 to 2 - _, err = validateNumberEntry(xRefTable, d1, dictName, "I", OPTIONAL, model.V10, func(f float64) bool { return 0 <= f && f <= 2 }) // validation missing - if err != nil { + validateI := func(f float64) bool { return 0 <= f && f <= 2 } + if xRefTable.ValidationMode == model.ValidationRelaxed { + validateI = func(f float64) bool { return 0 <= f && f <= 2.5 } + } + if _, err = validateNumberEntry(xRefTable, d1, dictName, "I", OPTIONAL, model.V10, validateI); err != nil { return err } @@ -95,26 +69,27 @@ func validateBorderStyleDict(xRefTable *model.XRefTable, d types.Dict, dictName, dictName = "borderStyleDict" // Type, optional, name, "Border" - _, err = validateNameEntry(xRefTable, d1, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "Border" }) - if err != nil { + if _, err = validateNameEntry(xRefTable, d1, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "Border" }); err != nil { return err } // W, optional, number, border width in points - _, err = validateNumberEntry(xRefTable, d1, dictName, "W", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateNumberEntry(xRefTable, d1, dictName, "W", OPTIONAL, model.V10, nil); err != nil { return err } // S, optional, name, border style validate := func(s string) bool { return types.MemberOf(s, []string{"S", "D", "B", "I", "U", "A"}) } - _, err = validateNameEntry(xRefTable, d1, dictName, "S", OPTIONAL, model.V10, validate) - if err != nil { - return err + if _, err = validateNameEntry(xRefTable, d1, dictName, "S", OPTIONAL, model.V10, validate); err != nil { + if !strings.Contains(err.Error(), "invalid dict entry") { + return err + } + // The PDF spec mandates interpreting undefined values as "S". + err = nil } // D, optional, dash array - _, err = validateNumberArrayEntry(xRefTable, d1, dictName, "D", OPTIONAL, model.V10, func(a types.Array) bool { return len(a) <= 2 }) + _, err = validateNumberArrayEntry(xRefTable, d1, dictName, "D", OPTIONAL, model.V10, nil) return err } @@ -132,26 +107,22 @@ func validateIconFitDictEntry(xRefTable *model.XRefTable, d types.Dict, dictName // SW, optional, name, A,B,S,N validate := func(s string) bool { return types.MemberOf(s, []string{"A", "B", "S", "N"}) } - _, err = validateNameEntry(xRefTable, d1, dictName, "SW", OPTIONAL, model.V10, validate) - if err != nil { + if _, err = validateNameEntry(xRefTable, d1, dictName, "SW", OPTIONAL, model.V10, validate); err != nil { return err } // S, optional, name, A,P - _, err = validateNameEntry(xRefTable, d1, dictName, "S", OPTIONAL, model.V10, func(s string) bool { return s == "A" || s == "P" }) - if err != nil { + if _, err = validateNameEntry(xRefTable, d1, dictName, "S", OPTIONAL, model.V10, func(s string) bool { return s == "A" || s == "P" }); err != nil { return err } // A,optional, array of 2 numbers between 0.0 and 1.0 - _, err = validateNumberArrayEntry(xRefTable, d1, dictName, "A", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateNumberArrayEntry(xRefTable, d1, dictName, "A", OPTIONAL, model.V10, nil); err != nil { return err } // FB, optional, bool, since V1.5 - _, err = validateBooleanEntry(xRefTable, d1, dictName, "FB", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateBooleanEntry(xRefTable, d1, dictName, "FB", OPTIONAL, model.V10, nil); err != nil { return err } @@ -170,62 +141,52 @@ func validateAppearanceCharacteristicsDictEntry(xRefTable *model.XRefTable, d ty dictName = "appCharDict" // R, optional, integer - _, err = validateIntegerEntry(xRefTable, d1, dictName, "R", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateIntegerEntry(xRefTable, d1, dictName, "R", OPTIONAL, model.V10, nil); err != nil { return err } // BC, optional, array of numbers, len=0,1,3,4 - _, err = validateNumberArrayEntry(xRefTable, d1, dictName, "BC", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateNumberArrayEntry(xRefTable, d1, dictName, "BC", OPTIONAL, model.V10, nil); err != nil { return err } // BG, optional, array of numbers between 0.0 and 0.1, len=0,1,3,4 - _, err = validateNumberArrayEntry(xRefTable, d1, dictName, "BG", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateNumberArrayEntry(xRefTable, d1, dictName, "BG", OPTIONAL, model.V10, nil); err != nil { return err } // CA, optional, text string - _, err = validateStringEntry(xRefTable, d1, dictName, "CA", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateStringEntry(xRefTable, d1, dictName, "CA", OPTIONAL, model.V10, nil); err != nil { return err } // RC, optional, text string - _, err = validateStringEntry(xRefTable, d1, dictName, "RC", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateStringEntry(xRefTable, d1, dictName, "RC", OPTIONAL, model.V10, nil); err != nil { return err } // AC, optional, text string - _, err = validateStringEntry(xRefTable, d1, dictName, "AC", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateStringEntry(xRefTable, d1, dictName, "AC", OPTIONAL, model.V10, nil); err != nil { return err } // I, optional, stream dict - _, err = validateStreamDictEntry(xRefTable, d1, dictName, "I", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateStreamDictEntry(xRefTable, d1, dictName, "I", OPTIONAL, model.V10, nil); err != nil { return err } // RI, optional, stream dict - _, err = validateStreamDictEntry(xRefTable, d1, dictName, "RI", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateStreamDictEntry(xRefTable, d1, dictName, "RI", OPTIONAL, model.V10, nil); err != nil { return err } // IX, optional, stream dict - _, err = validateStreamDictEntry(xRefTable, d1, dictName, "IX", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateStreamDictEntry(xRefTable, d1, dictName, "IX", OPTIONAL, model.V10, nil); err != nil { return err } // IF, optional, icon fit dict, - err = validateIconFitDictEntry(xRefTable, d1, dictName, "IF", OPTIONAL, model.V10) - if err != nil { + if err = validateIconFitDictEntry(xRefTable, d1, dictName, "IF", OPTIONAL, model.V10); err != nil { return err } @@ -240,14 +201,12 @@ func validateAnnotationDictText(xRefTable *model.XRefTable, d types.Dict, dictNa // see 12.5.6.4 // Open, optional, boolean - _, err := validateBooleanEntry(xRefTable, d, dictName, "Open", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateBooleanEntry(xRefTable, d, dictName, "Open", OPTIONAL, model.V10, nil); err != nil { return err } // Name, optional, name - _, err = validateNameEntry(xRefTable, d, dictName, "Name", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateNameEntry(xRefTable, d, dictName, "Name", OPTIONAL, model.V10, nil); err != nil { return err } @@ -283,26 +242,29 @@ func validateAnnotationDictText(xRefTable *model.XRefTable, d types.Dict, dictNa return nil } -func validateActionOrDestination(xRefTable *model.XRefTable, d types.Dict, dictName string, sinceVersion model.Version) error { +func validateActionOrDestination(xRefTable *model.XRefTable, d types.Dict, dictName string, sinceVersion model.Version) (string, error) { // The action that shall be performed when this item is activated. d1, err := validateDictEntry(xRefTable, d, dictName, "A", OPTIONAL, sinceVersion, nil) if err != nil { - return err + return "", err } if d1 != nil { - return validateActionDict(xRefTable, d1) + return "", validateActionDict(xRefTable, d1) } // A destination that shall be displayed when this item is activated. + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V10 + } obj, err := validateEntry(xRefTable, d, dictName, "Dest", OPTIONAL, sinceVersion) if err != nil || obj == nil { - return err + return "", err } name, err := validateDestination(xRefTable, obj, false) if err != nil { - return err + return "", err } if len(name) > 0 && xRefTable.IsMerging() { @@ -310,7 +272,7 @@ func validateActionOrDestination(xRefTable *model.XRefTable, d types.Dict, dictN nm.Add(name, d) } - return nil + return name, nil } func validateURIActionDictEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entryName string, required bool, sinceVersion model.Version) error { @@ -323,14 +285,12 @@ func validateURIActionDictEntry(xRefTable *model.XRefTable, d types.Dict, dictNa dictName = "URIActionDict" // Type, optional, name - _, err = validateNameEntry(xRefTable, d1, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "Action" }) - if err != nil { + if _, err = validateNameEntry(xRefTable, d1, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "Action" }); err != nil { return err } // S, required, name, action Type - _, err = validateNameEntry(xRefTable, d1, dictName, "S", REQUIRED, model.V10, func(s string) bool { return s == "URI" }) - if err != nil { + if _, err = validateNameEntry(xRefTable, d1, dictName, "S", REQUIRED, model.V10, func(s string) bool { return s == "URI" }); err != nil { return err } @@ -342,20 +302,20 @@ func validateAnnotationDictLink(xRefTable *model.XRefTable, d types.Dict, dictNa // see 12.5.6.5 // A or Dest, required either or - err := validateActionOrDestination(xRefTable, d, dictName, model.V11) - if err != nil { - return err + if _, err := validateActionOrDestination(xRefTable, d, dictName, model.V11); err != nil { + if xRefTable.ValidationMode == model.ValidationStrict { + return err + } + model.ShowDigestedSpecViolation("link annotation with unresolved destination") } // H, optional, name, since V1.2 - _, err = validateNameEntry(xRefTable, d, dictName, "H", OPTIONAL, model.V12, nil) - if err != nil { + if _, err := validateNameEntry(xRefTable, d, dictName, "H", OPTIONAL, model.V12, nil); err != nil { return err } // PA, optional, URI action dict, since V1.3 - err = validateURIActionDictEntry(xRefTable, d, dictName, "PA", OPTIONAL, model.V13) - if err != nil { + if err := validateURIActionDictEntry(xRefTable, d, dictName, "PA", OPTIONAL, model.V13); err != nil { return err } @@ -364,23 +324,38 @@ func validateAnnotationDictLink(xRefTable *model.XRefTable, d types.Dict, dictNa if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V13 } - _, err = validateNumberArrayEntry(xRefTable, d, dictName, "QuadPoints", OPTIONAL, sinceVersion, func(a types.Array) bool { return len(a)%8 == 0 }) - if err != nil { + if _, err := validateNumberArrayEntry(xRefTable, d, dictName, "QuadPoints", OPTIONAL, sinceVersion, func(a types.Array) bool { return len(a)%8 == 0 }); err != nil { return err } // BS, optional, border style dict, since V1.6 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V12 + } return validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, sinceVersion) } -func validateAnnotationDictFreeTextPart1(xRefTable *model.XRefTable, d types.Dict, dictName string) error { +func validateAPAndDA(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + + required := REQUIRED // DA, required, string validate := validateDA if xRefTable.ValidationMode == model.ValidationRelaxed { + validate = validateDARelaxed + + // An existing AP entry takes precedence over a DA entry. + d1, err := validateDictEntry(xRefTable, d, dictName, "AP", OPTIONAL, model.V12, nil) + if err != nil { + return err + } + if len(d1) > 0 { + required = OPTIONAL + } } - da, err := validateStringEntry(xRefTable, d, dictName, "DA", REQUIRED, model.V10, validate) + + da, err := validateStringEntry(xRefTable, d, dictName, "DA", required, model.V10, validate) if err != nil { return err } @@ -389,33 +364,39 @@ func validateAnnotationDictFreeTextPart1(xRefTable *model.XRefTable, d types.Dic d["DA"] = types.StringLiteral(*da) } + return nil +} + +func validateAnnotationDictFreeTextPart1(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + + if err := validateAPAndDA(xRefTable, d, dictName); err != nil { + return err + } + // Q, optional, integer, since V1.4, 0,1,2 sinceVersion := model.V14 if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V13 } - _, err = validateIntegerEntry(xRefTable, d, dictName, "Q", OPTIONAL, sinceVersion, func(i int) bool { return 0 <= i && i <= 2 }) - if err != nil { + if _, err := validateIntegerEntry(xRefTable, d, dictName, "Q", OPTIONAL, sinceVersion, func(i int) bool { return 0 <= i && i <= 2 }); err != nil { return err } // RC, optional, text string or text stream, since V1.5 sinceVersion = model.V15 if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V14 + sinceVersion = model.V13 } - err = validateStringOrStreamEntry(xRefTable, d, dictName, "RC", OPTIONAL, sinceVersion) - if err != nil { + if err := validateStringOrStreamEntry(xRefTable, d, dictName, "RC", OPTIONAL, sinceVersion); err != nil { return err } // DS, optional, text string, since V1.5 sinceVersion = model.V15 if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V14 + sinceVersion = model.V13 } - _, err = validateStringEntry(xRefTable, d, dictName, "DS", OPTIONAL, sinceVersion, nil) - if err != nil { + if _, err := validateStringEntry(xRefTable, d, dictName, "DS", OPTIONAL, sinceVersion, nil); err != nil { return err } @@ -425,7 +406,7 @@ func validateAnnotationDictFreeTextPart1(xRefTable *model.XRefTable, d types.Dic sinceVersion = model.V14 } - _, err = validateNumberArrayEntry(xRefTable, d, dictName, "CL", OPTIONAL, sinceVersion, func(a types.Array) bool { return len(a) == 4 || len(a) == 6 }) + _, err := validateNumberArrayEntry(xRefTable, d, dictName, "CL", OPTIONAL, sinceVersion, func(a types.Array) bool { return len(a) == 4 || len(a) == 6 }) return err } @@ -440,14 +421,16 @@ func validateAnnotationDictFreeTextPart2(xRefTable *model.XRefTable, d types.Dic validate := func(s string) bool { return types.MemberOf(s, []string{"FreeText", "FreeTextCallout", "FreeTextTypeWriter", "FreeTextTypewriter"}) } - _, err := validateNameEntry(xRefTable, d, dictName, "IT", OPTIONAL, sinceVersion, validate) - if err != nil { + if _, err := validateNameEntry(xRefTable, d, dictName, "IT", OPTIONAL, sinceVersion, validate); err != nil { return err } // BE, optional, border effect dict, since V1.6 - err = validateBorderEffectDictEntry(xRefTable, d, dictName, "BE", OPTIONAL, model.V15) - if err != nil { + sinceVersion = model.V16 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + } + if err := validateBorderEffectDictEntry(xRefTable, d, dictName, "BE", OPTIONAL, sinceVersion); err != nil { return err } @@ -456,8 +439,7 @@ func validateAnnotationDictFreeTextPart2(xRefTable *model.XRefTable, d types.Dic if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V14 } - _, err = validateRectangleEntry(xRefTable, d, dictName, "RD", OPTIONAL, sinceVersion, nil) - if err != nil { + if _, err := validateRectangleEntry(xRefTable, d, dictName, "RD", OPTIONAL, sinceVersion, nil); err != nil { return err } @@ -466,8 +448,7 @@ func validateAnnotationDictFreeTextPart2(xRefTable *model.XRefTable, d types.Dic if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V12 } - err = validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, sinceVersion) - if err != nil { + if err := validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, sinceVersion); err != nil { return err } @@ -476,7 +457,7 @@ func validateAnnotationDictFreeTextPart2(xRefTable *model.XRefTable, d types.Dic if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V14 } - _, err = validateNameEntry(xRefTable, d, dictName, "LE", OPTIONAL, sinceVersion, nil) + _, err := validateNameEntry(xRefTable, d, dictName, "LE", OPTIONAL, sinceVersion, nil) return err } @@ -485,8 +466,7 @@ func validateAnnotationDictFreeText(xRefTable *model.XRefTable, d types.Dict, di // see 12.5.6.6 - err := validateAnnotationDictFreeTextPart1(xRefTable, d, dictName) - if err != nil { + if err := validateAnnotationDictFreeTextPart1(xRefTable, d, dictName); err != nil { return err } @@ -509,19 +489,14 @@ func validateEntryMeasure(xRefTable *model.XRefTable, d types.Dict, dictName str func validateCP(s string) bool { return s == "Inline" || s == "Top" } -func validateAnnotationDictLine(xRefTable *model.XRefTable, d types.Dict, dictName string) error { - - // see 12.5.6.7 - +func validateAnnotationDictLinePart1(xRefTable *model.XRefTable, d types.Dict, dictName string) error { // L, required, array of numbers, len:4 - _, err := validateNumberArrayEntry(xRefTable, d, dictName, "L", REQUIRED, model.V10, func(a types.Array) bool { return len(a) == 4 }) - if err != nil { + if _, err := validateNumberArrayEntry(xRefTable, d, dictName, "L", REQUIRED, model.V10, func(a types.Array) bool { return len(a) == 4 }); err != nil { return err } // BS, optional, border style dict - err = validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, model.V10) - if err != nil { + if err := validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, model.V10); err != nil { return err } @@ -530,72 +505,98 @@ func validateAnnotationDictLine(xRefTable *model.XRefTable, d types.Dict, dictNa if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V13 } - _, err = validateNameArrayEntry(xRefTable, d, dictName, "LE", OPTIONAL, sinceVersion, func(a types.Array) bool { return len(a) == 2 }) - if err != nil { + if _, err := validateNameArrayEntry(xRefTable, d, dictName, "LE", OPTIONAL, sinceVersion, func(a types.Array) bool { return len(a) == 2 }); err != nil { return err } // IC, optional, number array, since V1.4, len:0,1,3,4 - _, err = validateNumberArrayEntry(xRefTable, d, dictName, "IC", OPTIONAL, sinceVersion, nil) - if err != nil { + if _, err := validateNumberArrayEntry(xRefTable, d, dictName, "IC", OPTIONAL, sinceVersion, nil); err != nil { return err } - // LLE, optional, number, since V1.6, >0 - lle, err := validateNumberEntry(xRefTable, d, dictName, "LLE", OPTIONAL, model.V16, func(f float64) bool { return f > 0 }) + // LLE, optional, number, since V1.6, > 0 + sinceVersion = model.V16 + validateLLE := func(f float64) bool { return f > 0 } + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + validateLLE = func(f float64) bool { return f >= 0 } + } + lle, err := validateNumberEntry(xRefTable, d, dictName, "LLE", OPTIONAL, sinceVersion, validateLLE) if err != nil { return err } // LL, required if LLE present, number, since V1.6 - _, err = validateNumberEntry(xRefTable, d, dictName, "LL", lle != nil, model.V16, nil) - if err != nil { + sinceVersion = model.V16 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + } + if _, err := validateNumberEntry(xRefTable, d, dictName, "LL", lle != nil, sinceVersion, nil); err != nil { return err } // Cap, optional, bool, since V1.6 - _, err = validateBooleanEntry(xRefTable, d, dictName, "Cap", OPTIONAL, model.V16, nil) - if err != nil { - return err + sinceVersion = model.V16 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 } + _, err = validateBooleanEntry(xRefTable, d, dictName, "Cap", OPTIONAL, sinceVersion, nil) + return err +} + +func validateAnnotationDictLinePart2(xRefTable *model.XRefTable, d types.Dict, dictName string) error { // IT, optional, name, since V1.6 - _, err = validateNameEntry(xRefTable, d, dictName, "IT", OPTIONAL, model.V16, nil) - if err != nil { + if _, err := validateNameEntry(xRefTable, d, dictName, "IT", OPTIONAL, model.V16, nil); err != nil { return err } // LLO, optionl, number, since V1.7, >0 - _, err = validateNumberEntry(xRefTable, d, dictName, "LLO", OPTIONAL, model.V17, func(f float64) bool { return f > 0 }) - if err != nil { + if _, err := validateNumberEntry(xRefTable, d, dictName, "LLO", OPTIONAL, model.V17, func(f float64) bool { return f > 0 }); err != nil { return err } // CP, optional, name, since V1.7 - _, err = validateNameEntry(xRefTable, d, dictName, "CP", OPTIONAL, model.V17, validateCP) - if err != nil { + sinceVersion := model.V17 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V15 + } + if _, err := validateNameEntry(xRefTable, d, dictName, "CP", OPTIONAL, sinceVersion, validateCP); err != nil { return err } // Measure, optional, measure dict, since V1.7 - err = validateEntryMeasure(xRefTable, d, dictName, OPTIONAL, model.V17) - if err != nil { + if err := validateEntryMeasure(xRefTable, d, dictName, OPTIONAL, model.V17); err != nil { return err } // CO, optional, number array, since V1.7, len=2 - _, err = validateNumberArrayEntry(xRefTable, d, dictName, "CO", OPTIONAL, model.V17, func(a types.Array) bool { return len(a) == 2 }) + sinceVersion = model.V17 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V15 + } + _, err := validateNumberArrayEntry(xRefTable, d, dictName, "CO", OPTIONAL, sinceVersion, func(a types.Array) bool { return len(a) == 2 }) return err } +func validateAnnotationDictLine(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + + // see 12.5.6.7 + + if err := validateAnnotationDictLinePart1(xRefTable, d, dictName); err != nil { + return err + } + + return validateAnnotationDictLinePart2(xRefTable, d, dictName) +} + func validateAnnotationDictCircleOrSquare(xRefTable *model.XRefTable, d types.Dict, dictName string) error { // see 12.5.6.8 // BS, optional, border style dict - err := validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, model.V10) - if err != nil { + if err := validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, model.V10); err != nil { return err } @@ -604,29 +605,35 @@ func validateAnnotationDictCircleOrSquare(xRefTable *model.XRefTable, d types.Di if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V13 } - _, err = validateNumberArrayEntry(xRefTable, d, dictName, "IC", OPTIONAL, sinceVersion, nil) - if err != nil { + if _, err := validateNumberArrayEntry(xRefTable, d, dictName, "IC", OPTIONAL, sinceVersion, nil); err != nil { return err } // BE, optional, border effect dict, since V1.5 - err = validateBorderEffectDictEntry(xRefTable, d, dictName, "BE", OPTIONAL, model.V15) - if err != nil { + if err := validateBorderEffectDictEntry(xRefTable, d, dictName, "BE", OPTIONAL, model.V15); err != nil { return err } // RD, optional, rectangle, since V1.5 - _, err = validateRectangleEntry(xRefTable, d, dictName, "RD", OPTIONAL, model.V15, nil) + sinceVersion = model.V15 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V13 + } + _, err := validateRectangleEntry(xRefTable, d, dictName, "RD", OPTIONAL, sinceVersion, nil) return err } func validateEntryIT(xRefTable *model.XRefTable, d types.Dict, dictName string, required bool, sinceVersion model.Version) error { + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + } + // IT, optional, name, since V1.6 validateIntent := func(s string) bool { - if xRefTable.Version() == model.V16 { + if xRefTable.Version() == sinceVersion { return s == "PolygonCloud" } @@ -650,22 +657,19 @@ func validateAnnotationDictPolyLine(xRefTable *model.XRefTable, d types.Dict, di // see 12.5.6.9 // Vertices, required, array of numbers - _, err := validateNumberArrayEntry(xRefTable, d, dictName, "Vertices", REQUIRED, model.V10, nil) - if err != nil { + if _, err := validateNumberArrayEntry(xRefTable, d, dictName, "Vertices", REQUIRED, model.V10, nil); err != nil { return err } // LE, optional, array of 2 names, meaningful only for polyline annotations. if dictName == "PolyLine" { - _, err = validateNameArrayEntry(xRefTable, d, dictName, "LE", OPTIONAL, model.V10, func(a types.Array) bool { return len(a) == 2 }) - if err != nil { + if _, err := validateNameArrayEntry(xRefTable, d, dictName, "LE", OPTIONAL, model.V10, func(a types.Array) bool { return len(a) == 2 }); err != nil { return err } } // BS, optional, border style dict - err = validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, model.V10) - if err != nil { + if err := validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, model.V10); err != nil { return err } @@ -678,15 +682,13 @@ func validateAnnotationDictPolyLine(xRefTable *model.XRefTable, d types.Dict, di } return false } - _, err = validateNumberArrayEntry(xRefTable, d, dictName, "IC", OPTIONAL, model.V14, func(a types.Array) bool { return ensureArrayLength(a, 1, 3, 4) }) - if err != nil { + if _, err := validateNumberArrayEntry(xRefTable, d, dictName, "IC", OPTIONAL, model.V14, func(a types.Array) bool { return ensureArrayLength(a, 1, 3, 4) }); err != nil { return err } // BE, optional, border effect dict, meaningful only for polygon annotations if dictName == "Polygon" { - err = validateBorderEffectDictEntry(xRefTable, d, dictName, "BE", OPTIONAL, model.V10) - if err != nil { + if err := validateBorderEffectDictEntry(xRefTable, d, dictName, "BE", OPTIONAL, model.V10); err != nil { return err } } @@ -723,13 +725,16 @@ func validateAnnotationDictCaret(xRefTable *model.XRefTable, d types.Dict, dictN // see 12.5.6.11 // RD, optional, rectangle, since V1.5 - _, err := validateRectangleEntry(xRefTable, d, dictName, "RD", OPTIONAL, model.V15, nil) - if err != nil { + sinceVersion := model.V15 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + } + if _, err := validateRectangleEntry(xRefTable, d, dictName, "RD", OPTIONAL, sinceVersion, nil); err != nil { return err } // Sy, optional, name - _, err = validateNameEntry(xRefTable, d, dictName, "Sy", OPTIONAL, model.V10, func(s string) bool { return s == "P" || s == "None" }) + _, err := validateNameEntry(xRefTable, d, dictName, "Sy", OPTIONAL, model.V10, func(s string) bool { return s == "P" || s == "None" }) return err } @@ -739,8 +744,11 @@ func validateAnnotationDictInk(xRefTable *model.XRefTable, d types.Dict, dictNam // see 12.5.6.13 // InkList, required, array of stroked path arrays - _, err := validateArrayArrayEntry(xRefTable, d, dictName, "InkList", REQUIRED, model.V10, nil) - if err != nil { + required := REQUIRED + if xRefTable.ValidationMode == model.ValidationRelaxed { + required = OPTIONAL + } + if _, err := validateArrayArrayEntry(xRefTable, d, dictName, "InkList", required, model.V10, nil); err != nil { return err } @@ -775,15 +783,12 @@ func validateAnnotationDictFileAttachment(xRefTable *model.XRefTable, d types.Di // see 12.5.6.15 // FS, required, file specification - _, err := validateFileSpecEntry(xRefTable, d, dictName, "FS", REQUIRED, model.V10) - if err != nil { + if _, err := validateFileSpecEntry(xRefTable, d, dictName, "FS", REQUIRED, model.V10); err != nil { return err } // Name, optional, name - _, err = validateNameEntry(xRefTable, d, dictName, "Name", OPTIONAL, model.V10, nil) - - return err + return validateNameOrStringEntry(xRefTable, d, dictName, "Name", OPTIONAL, model.V10) } func validateAnnotationDictSound(xRefTable *model.XRefTable, d types.Dict, dictName string) error { @@ -791,13 +796,12 @@ func validateAnnotationDictSound(xRefTable *model.XRefTable, d types.Dict, dictN // see 12.5.6.16 // Sound, required, stream dict - err := validateSoundDictEntry(xRefTable, d, dictName, "Sound", REQUIRED, model.V10) - if err != nil { + if err := validateSoundDictEntry(xRefTable, d, dictName, "Sound", REQUIRED, model.V10); err != nil { return err } // Name, optional, name - _, err = validateNameEntry(xRefTable, d, dictName, "Name", OPTIONAL, model.V10, nil) + _, err := validateNameEntry(xRefTable, d, dictName, "Name", OPTIONAL, model.V10, nil) return err } @@ -807,20 +811,17 @@ func validateMovieDict(xRefTable *model.XRefTable, d types.Dict) error { dictName := "movieDict" // F, required, file specification - _, err := validateFileSpecEntry(xRefTable, d, dictName, "F", REQUIRED, model.V10) - if err != nil { + if _, err := validateFileSpecEntry(xRefTable, d, dictName, "F", REQUIRED, model.V10); err != nil { return err } // Aspect, optional, integer array, length 2 - _, err = validateIntegerArrayEntry(xRefTable, d, dictName, "Ascpect", OPTIONAL, model.V10, func(a types.Array) bool { return len(a) == 2 }) - if err != nil { + if _, err := validateIntegerArrayEntry(xRefTable, d, dictName, "Aspect", OPTIONAL, model.V10, func(a types.Array) bool { return len(a) == 2 }); err != nil { return err } // Rotate, optional, integer - _, err = validateIntegerEntry(xRefTable, d, dictName, "Rotate", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateIntegerEntry(xRefTable, d, dictName, "Rotate", OPTIONAL, model.V10, nil); err != nil { return err } @@ -836,8 +837,7 @@ func validateAnnotationDictMovie(xRefTable *model.XRefTable, d types.Dict, dictN // They are superseded by the general multimedia framework described in 13.2, “Multimedia.” // T, optional, text string - _, err := validateStringEntry(xRefTable, d, dictName, "T", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateStringEntry(xRefTable, d, dictName, "T", OPTIONAL, model.V10, nil); err != nil { return err } @@ -847,8 +847,7 @@ func validateAnnotationDictMovie(xRefTable *model.XRefTable, d types.Dict, dictN return err } - err = validateMovieDict(xRefTable, d1) - if err != nil { + if err = validateMovieDict(xRefTable, d1); err != nil { return err } @@ -886,16 +885,14 @@ func validateAnnotationDictWidget(xRefTable *model.XRefTable, d types.Dict, dict // H, optional, name validate := func(s string) bool { return types.MemberOf(s, []string{"N", "I", "O", "P", "T", "A"}) } - _, err := validateNameEntry(xRefTable, d, dictName, "H", OPTIONAL, model.V10, validate) - if err != nil { + if _, err := validateNameEntry(xRefTable, d, dictName, "H", OPTIONAL, model.V10, validate); err != nil { return err } // MK, optional, dict // An appearance characteristics dictionary that shall be used in constructing // a dynamic appearance stream specifying the annotation’s visual presentation on the page.dict - err = validateAppearanceCharacteristicsDictEntry(xRefTable, d, dictName, "MK", OPTIONAL, model.V10) - if err != nil { + if err := validateAppearanceCharacteristicsDictEntry(xRefTable, d, dictName, "MK", OPTIONAL, model.V10); err != nil { return err } @@ -906,24 +903,21 @@ func validateAnnotationDictWidget(xRefTable *model.XRefTable, d types.Dict, dict return err } if d1 != nil { - err = validateActionDict(xRefTable, d1) - if err != nil { + if err = validateActionDict(xRefTable, d1); err != nil { return err } } // AA, optional, dict, since V1.2 // An additional-actions dictionary defining the annotation’s behaviour in response to various trigger events. - err = validateAdditionalActions(xRefTable, d, dictName, "AA", OPTIONAL, model.V12, "fieldOrAnnot") - if err != nil { + if err = validateAdditionalActions(xRefTable, d, dictName, "AA", OPTIONAL, model.V12, "fieldOrAnnot"); err != nil { return err } // BS, optional, border style dict, since V1.2 // A border style dictionary specifying the width and dash pattern // that shall be used in drawing the annotation’s border. - validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, model.V12) - if err != nil { + if err = validateBorderStyleDict(xRefTable, d, dictName, "BS", OPTIONAL, model.V12); err != nil { return err } @@ -938,15 +932,13 @@ func validateAnnotationDictScreen(xRefTable *model.XRefTable, d types.Dict, dict // see 12.5.6.18 - // T, optional, name - _, err := validateNameEntry(xRefTable, d, dictName, "T", OPTIONAL, model.V10, nil) - if err != nil { + // T, optional, text string + if _, err := validateStringEntry(xRefTable, d, dictName, "T", OPTIONAL, model.V10, nil); err != nil { return err } // MK, optional, appearance characteristics dict - err = validateAppearanceCharacteristicsDictEntry(xRefTable, d, dictName, "MK", OPTIONAL, model.V10) - if err != nil { + if err := validateAppearanceCharacteristicsDictEntry(xRefTable, d, dictName, "MK", OPTIONAL, model.V10); err != nil { return err } @@ -956,8 +948,7 @@ func validateAnnotationDictScreen(xRefTable *model.XRefTable, d types.Dict, dict return err } if d1 != nil { - err = validateActionDict(xRefTable, d1) - if err != nil { + if err = validateActionDict(xRefTable, d1); err != nil { return err } } @@ -971,19 +962,21 @@ func validateAnnotationDictPrinterMark(xRefTable *model.XRefTable, d types.Dict, // see 12.5.6.20 // MN, optional, name - _, err := validateNameEntry(xRefTable, d, dictName, "MN", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateNameEntry(xRefTable, d, dictName, "MN", OPTIONAL, model.V10, nil); err != nil { return err } // F, required integer, since V1.1, annotation flags - _, err = validateIntegerEntry(xRefTable, d, dictName, "F", REQUIRED, model.V11, nil) - if err != nil { + if _, err := validateIntegerEntry(xRefTable, d, dictName, "F", REQUIRED, model.V11, nil); err != nil { return err } // AP, required, appearance dict, since V1.2 - return validateAppearDictEntry(xRefTable, d, dictName, REQUIRED, model.V12) + sinceVersion := model.V12 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V11 + } + return validateAppearDictEntry(xRefTable, d, dictName, REQUIRED, sinceVersion) } func validateAnnotationDictTrapNet(xRefTable *model.XRefTable, d types.Dict, dictName string) error { @@ -991,20 +984,17 @@ func validateAnnotationDictTrapNet(xRefTable *model.XRefTable, d types.Dict, dic // see 12.5.6.21 // LastModified, optional, date - _, err := validateDateEntry(xRefTable, d, dictName, "LastModified", OPTIONAL, model.V10) - if err != nil { + if _, err := validateDateEntry(xRefTable, d, dictName, "LastModified", OPTIONAL, model.V10); err != nil { return err } // Version, optional, array - _, err = validateArrayEntry(xRefTable, d, dictName, "Version", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateArrayEntry(xRefTable, d, dictName, "Version", OPTIONAL, model.V10, nil); err != nil { return err } // AnnotStates, optional, array of names - _, err = validateNameArrayEntry(xRefTable, d, dictName, "AnnotStates", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateNameArrayEntry(xRefTable, d, dictName, "AnnotStates", OPTIONAL, model.V10, nil); err != nil { return err } @@ -1039,12 +1029,11 @@ func validateAnnotationDictTrapNet(xRefTable *model.XRefTable, d types.Dict, dic return retValue } - _, err = validateArrayEntry(xRefTable, d, dictName, "FontFauxing", OPTIONAL, model.V10, validateFontDictArray) - if err != nil { + if _, err := validateArrayEntry(xRefTable, d, dictName, "FontFauxing", OPTIONAL, model.V10, validateFontDictArray); err != nil { return err } - _, err = validateIntegerEntry(xRefTable, d, dictName, "F", REQUIRED, model.V11, nil) + _, err := validateIntegerEntry(xRefTable, d, dictName, "F", REQUIRED, model.V11, nil) return err } @@ -1060,25 +1049,22 @@ func validateAnnotationDictWatermark(xRefTable *model.XRefTable, d types.Dict, d dictName := "fixedPrintDict" // Type, required, name - _, err := validateNameEntry(xRefTable, d, dictName, "Type", REQUIRED, model.V10, func(s string) bool { return s == "FixedPrint" }) - if err != nil { + if _, err := validateNameEntry(xRefTable, d, dictName, "Type", REQUIRED, model.V10, func(s string) bool { return s == "FixedPrint" }); err != nil { return false } // Matrix, optional, integer array, length = 6 - _, err = validateIntegerArrayEntry(xRefTable, d, dictName, "Matrix", OPTIONAL, model.V10, func(a types.Array) bool { return len(a) == 6 }) - if err != nil { + if _, err := validateIntegerArrayEntry(xRefTable, d, dictName, "Matrix", OPTIONAL, model.V10, func(a types.Array) bool { return len(a) == 6 }); err != nil { return false } // H, optional, number - _, err = validateNumberEntry(xRefTable, d, dictName, "H", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateNumberEntry(xRefTable, d, dictName, "H", OPTIONAL, model.V10, nil); err != nil { return false } // V, optional, number - _, err = validateNumberEntry(xRefTable, d, dictName, "V", OPTIONAL, model.V10, nil) + _, err := validateNumberEntry(xRefTable, d, dictName, "V", OPTIONAL, model.V10, nil) return err == nil } @@ -1094,25 +1080,22 @@ func validateAnnotationDict3D(xRefTable *model.XRefTable, d types.Dict, dictName // AP with entry N, required // 3DD, required, 3D stream or 3D reference dict - err := validateStreamDictOrDictEntry(xRefTable, d, dictName, "3DD", REQUIRED, model.V16) - if err != nil { + if err := validateStreamDictOrDictEntry(xRefTable, d, dictName, "3DD", REQUIRED, model.V16); err != nil { return err } // 3DV, optional, various - _, err = validateEntry(xRefTable, d, dictName, "3DV", OPTIONAL, model.V16) - if err != nil { + if _, err := validateEntry(xRefTable, d, dictName, "3DV", OPTIONAL, model.V16); err != nil { return err } // 3DA, optional, activation dict - _, err = validateDictEntry(xRefTable, d, dictName, "3DA", OPTIONAL, model.V16, nil) - if err != nil { + if _, err := validateDictEntry(xRefTable, d, dictName, "3DA", OPTIONAL, model.V16, nil); err != nil { return err } // 3DI, optional, boolean - _, err = validateBooleanEntry(xRefTable, d, dictName, "3DI", OPTIONAL, model.V16, nil) + _, err := validateBooleanEntry(xRefTable, d, dictName, "3DI", OPTIONAL, model.V16, nil) return err } @@ -1159,32 +1142,27 @@ func validateAnnotationDictRedact(xRefTable *model.XRefTable, d types.Dict, dict // see 12.5.6.23 // QuadPoints, optional, len: a multiple of 8 - _, err := validateNumberArrayEntry(xRefTable, d, dictName, "QuadPoints", OPTIONAL, model.V10, func(a types.Array) bool { return len(a)%8 == 0 }) - if err != nil { + if _, err := validateNumberArrayEntry(xRefTable, d, dictName, "QuadPoints", OPTIONAL, model.V10, func(a types.Array) bool { return len(a)%8 == 0 }); err != nil { return err } // IC, optional, number array, length:3 [0.0 .. 1.0] - err = validateEntryIC(xRefTable, d, dictName, OPTIONAL, model.V10) - if err != nil { + if err := validateEntryIC(xRefTable, d, dictName, OPTIONAL, model.V10); err != nil { return err } // RO, optional, stream - _, err = validateStreamDictEntry(xRefTable, d, dictName, "RO", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateStreamDictEntry(xRefTable, d, dictName, "RO", OPTIONAL, model.V10, nil); err != nil { return err } // OverlayText, optional, text string - _, err = validateStringEntry(xRefTable, d, dictName, "OverlayText", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateStringEntry(xRefTable, d, dictName, "OverlayText", OPTIONAL, model.V10, nil); err != nil { return err } // Repeat, optional, boolean - _, err = validateBooleanEntry(xRefTable, d, dictName, "Repeat", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateBooleanEntry(xRefTable, d, dictName, "Repeat", OPTIONAL, model.V10, nil); err != nil { return err } @@ -1217,12 +1195,11 @@ func validateExDataDict(xRefTable *model.XRefTable, d types.Dict) error { dictName := "ExData" - _, err := validateNameEntry(xRefTable, d, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "ExData" }) - if err != nil { + if _, err := validateNameEntry(xRefTable, d, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "ExData" }); err != nil { return err } - _, err = validateNameEntry(xRefTable, d, dictName, "Subtype", REQUIRED, model.V10, func(s string) bool { return s == "Markup3D" }) + _, err := validateNameEntry(xRefTable, d, dictName, "Subtype", REQUIRED, model.V10, func(s string) bool { return s == "Markup3D" }) return err } @@ -1239,13 +1216,11 @@ func validatePopupEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entr if d1 != nil { - _, err = validateNameEntry(xRefTable, d1, dictName, "Subtype", REQUIRED, model.V10, func(s string) bool { return s == "Popup" }) - if err != nil { + if _, err = validateNameEntry(xRefTable, d1, dictName, "Subtype", REQUIRED, model.V10, func(s string) bool { return s == "Popup" }); err != nil { return err } - _, err = validateAnnotationDict(xRefTable, d1) - if err != nil { + if _, err = validateAnnotationDict(xRefTable, d1); err != nil { return err } @@ -1262,8 +1237,7 @@ func validateIRTEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entryN } if d1 != nil { - _, err = validateAnnotationDict(xRefTable, d1) - if err != nil { + if _, err = validateAnnotationDict(xRefTable, d1); err != nil { return err } } @@ -1284,14 +1258,18 @@ func validateMarkupAnnotationPart1(xRefTable *model.XRefTable, d types.Dict, dic } // CA, optional, number, since V1.4 - if _, err := validateNumberEntry(xRefTable, d, dictName, "CA", OPTIONAL, model.V14, nil); err != nil { + sinceVersion := model.V14 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V13 + } + if _, err := validateNumberEntry(xRefTable, d, dictName, "CA", OPTIONAL, sinceVersion, nil); err != nil { return err } // RC, optional, text string or stream, since V1.5 - sinceVersion := model.V15 + sinceVersion = model.V15 if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V14 + sinceVersion = model.V13 } if err := validateStringOrStreamEntry(xRefTable, d, dictName, "RC", OPTIONAL, sinceVersion); err != nil { return err @@ -1323,7 +1301,7 @@ func validateMarkupAnnotationPart2(xRefTable *model.XRefTable, d types.Dict, dic // Subj, optional, text string, since V1.5 sinceVersion = model.V15 if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V14 + sinceVersion = model.V13 } if _, err := validateStringEntry(xRefTable, d, dictName, "Subj", OPTIONAL, sinceVersion, nil); err != nil { return err @@ -1331,14 +1309,18 @@ func validateMarkupAnnotationPart2(xRefTable *model.XRefTable, d types.Dict, dic // RT, optional, name, since V1.6 validate := func(s string) bool { return s == "R" || s == "Group" } - if _, err := validateNameEntry(xRefTable, d, dictName, "RT", OPTIONAL, model.V16, validate); err != nil { + sinceVersion = model.V16 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + } + if _, err := validateNameEntry(xRefTable, d, dictName, "RT", OPTIONAL, sinceVersion, validate); err != nil { return err } // IT, optional, name, since V1.6 sinceVersion = model.V16 if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V14 + sinceVersion = model.V13 } if _, err := validateNameEntry(xRefTable, d, dictName, "IT", OPTIONAL, sinceVersion, nil); err != nil { return err @@ -1411,6 +1393,54 @@ func validateAppearDictEntry(xRefTable *model.XRefTable, d types.Dict, dictName return err } +func validateDashPatternArray(xRefTable *model.XRefTable, arr types.Array) bool { + + // len must be 0,1,2,3 numbers (dont'allow only 0s) + + if len(arr) > 3 { + return false + } + + all0 := true + for j := 0; j < len(arr); j++ { + o, err := xRefTable.Dereference(arr[j]) + if err != nil || o == nil { + return false + } + + var f float64 + + switch o := o.(type) { + case types.Integer: + f = float64(o.Value()) + case types.Float: + f = o.Value() + default: + return false + } + + if f < 0 { + return false + } + + if f != 0 { + all0 = false + } + + } + + if all0 { + if xRefTable.ValidationMode != model.ValidationRelaxed { + return false + } + if log.ValidateEnabled() { + log.Validate.Println("digesting invalid dash pattern array: %s", arr) + } + } + + return true +} + func validateBorderArray(xRefTable *model.XRefTable, a types.Array) bool { if len(a) == 0 { return true @@ -1429,58 +1459,16 @@ func validateBorderArray(xRefTable *model.XRefTable, a types.Array) bool { if i == 3 { // validate dash pattern array // len must be 0,1,2,3 numbers (dont'allow only 0s) - a1, ok := a[i].(types.Array) + dpa, ok := a[i].(types.Array) if !ok { return xRefTable.ValidationMode == model.ValidationRelaxed } - if len(a1) == 0 { + if len(dpa) == 0 { return true } - if len(a1) > 3 { - return false - } - - all0 := true - for j := 0; j < len(a1); j++ { - o, err := xRefTable.Dereference(a1[j]) - if err != nil || o == nil { - return false - } - - var f float64 - - switch o := o.(type) { - case types.Integer: - f = float64(o.Value()) - case types.Float: - f = o.Value() - default: - return false - } - - if f < 0 { - return false - } - - if f != 0 { - all0 = false - break - } - - } - - if all0 { - if xRefTable.ValidationMode != model.ValidationRelaxed { - return false - } - if log.ValidateEnabled() { - log.Validate.Println("digesting invalid dash pattern array: %s", a1) - } - } - - continue + return validateDashPatternArray(xRefTable, dpa) } o, err := xRefTable.Dereference(a[i]) @@ -1509,8 +1497,7 @@ func validateBorderArray(xRefTable *model.XRefTable, a types.Array) bool { func validateAnnotationDictGeneralPart1(xRefTable *model.XRefTable, d types.Dict, dictName string) (*types.Name, error) { // Type, optional, name - _, err := validateNameEntry(xRefTable, d, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "Annot" }) - if err != nil { + if _, err := validateNameEntry(xRefTable, d, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "Annot" }); err != nil { return nil, err } @@ -1521,20 +1508,30 @@ func validateAnnotationDictGeneralPart1(xRefTable *model.XRefTable, d types.Dict } // Rect, required, rectangle - _, err = validateRectangleEntry(xRefTable, d, dictName, "Rect", REQUIRED, model.V10, nil) - if err != nil { - return nil, err + if _, err = validateRectangleEntry(xRefTable, d, dictName, "Rect", REQUIRED, model.V10, nil); err != nil { + if xRefTable.ValidationMode == model.ValidationStrict { + return nil, err + } } // Contents, optional, text string - _, err = validateStringEntry(xRefTable, d, dictName, "Contents", OPTIONAL, model.V10, nil) - if err != nil { - return nil, err + if _, err = validateStringEntry(xRefTable, d, dictName, "Contents", OPTIONAL, model.V10, nil); err != nil { + if xRefTable.ValidationMode != model.ValidationRelaxed { + return nil, err + } + i, err := validateIntegerEntry(xRefTable, d, dictName, "Contents", OPTIONAL, model.V10, nil) + if err != nil { + return nil, err + } + if i != nil { + // Repair + s := strconv.Itoa(i.Value()) + d["Contents"] = types.StringLiteral(s) + } } // P, optional, indRef of page dict - err = validateEntryP(xRefTable, d, dictName, OPTIONAL, model.V10) - if err != nil { + if err = validateEntryP(xRefTable, d, dictName, OPTIONAL, model.V10); err != nil { return nil, err } @@ -1543,8 +1540,7 @@ func validateAnnotationDictGeneralPart1(xRefTable *model.XRefTable, d types.Dict if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V13 } - _, err = validateStringEntry(xRefTable, d, dictName, "NM", OPTIONAL, sinceVersion, nil) - if err != nil { + if _, err = validateStringEntry(xRefTable, d, dictName, "NM", OPTIONAL, sinceVersion, nil); err != nil { return nil, err } @@ -1563,7 +1559,11 @@ func validateAnnotationDictGeneralPart2(xRefTable *model.XRefTable, d types.Dict } // AP, optional, appearance dict, since V1.2 - if err := validateAppearDictEntry(xRefTable, d, dictName, OPTIONAL, model.V12); err != nil { + sinceVersion := model.V12 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V11 + } + if err := validateAppearDictEntry(xRefTable, d, dictName, OPTIONAL, sinceVersion); err != nil { return err } @@ -1621,41 +1621,47 @@ func validateAnnotationDictConcrete(xRefTable *model.XRefTable, d types.Dict, di // see table 169 for k, v := range map[string]struct { - validate func(xRefTable *model.XRefTable, d types.Dict, dictName string) error - sinceVersion model.Version - markup bool + validate func(xRefTable *model.XRefTable, d types.Dict, dictName string) error + sinceVersion model.Version + sinceVersionRelaxed model.Version + markup bool }{ - "Text": {validateAnnotationDictText, model.V10, true}, - "Link": {validateAnnotationDictLink, model.V10, false}, - "FreeText": {validateAnnotationDictFreeText, model.V12, true}, // model.V13 - "Line": {validateAnnotationDictLine, model.V13, true}, - "Polygon": {validateAnnotationDictPolyLine, model.V15, true}, - "PolyLine": {validateAnnotationDictPolyLine, model.V15, true}, - "Highlight": {validateTextMarkupAnnotation, model.V13, true}, - "Underline": {validateTextMarkupAnnotation, model.V13, true}, - "Squiggly": {validateTextMarkupAnnotation, model.V14, true}, - "StrikeOut": {validateTextMarkupAnnotation, model.V13, true}, - "Square": {validateAnnotationDictCircleOrSquare, model.V13, true}, - "Circle": {validateAnnotationDictCircleOrSquare, model.V13, true}, - "Stamp": {validateAnnotationDictStamp, model.V13, true}, - "Caret": {validateAnnotationDictCaret, model.V15, true}, - "Ink": {validateAnnotationDictInk, model.V13, true}, - "Popup": {validateAnnotationDictPopup, model.V12, false}, // model.V13 - "FileAttachment": {validateAnnotationDictFileAttachment, model.V13, true}, - "Sound": {validateAnnotationDictSound, model.V12, true}, - "Movie": {validateAnnotationDictMovie, model.V12, false}, - "Widget": {validateAnnotationDictWidget, model.V12, false}, - "Screen": {validateAnnotationDictScreen, model.V15, false}, - "PrinterMark": {validateAnnotationDictPrinterMark, model.V14, false}, - "TrapNet": {validateAnnotationDictTrapNet, model.V13, false}, - "Watermark": {validateAnnotationDictWatermark, model.V16, false}, - "3D": {validateAnnotationDict3D, model.V16, false}, - "Redact": {validateAnnotationDictRedact, model.V17, true}, - "RichMedia": {validateRichMediaAnnotation, model.V17, false}, + "Text": {validateAnnotationDictText, model.V10, model.V10, true}, + "Link": {validateAnnotationDictLink, model.V10, model.V10, false}, + "FreeText": {validateAnnotationDictFreeText, model.V13, model.V12, true}, + "Line": {validateAnnotationDictLine, model.V13, model.V13, true}, + "Polygon": {validateAnnotationDictPolyLine, model.V15, model.V14, true}, + "PolyLine": {validateAnnotationDictPolyLine, model.V15, model.V14, true}, + "Highlight": {validateTextMarkupAnnotation, model.V13, model.V13, true}, + "Underline": {validateTextMarkupAnnotation, model.V13, model.V13, true}, + "Squiggly": {validateTextMarkupAnnotation, model.V14, model.V14, true}, + "StrikeOut": {validateTextMarkupAnnotation, model.V13, model.V13, true}, + "Square": {validateAnnotationDictCircleOrSquare, model.V13, model.V13, true}, + "Circle": {validateAnnotationDictCircleOrSquare, model.V13, model.V13, true}, + "Stamp": {validateAnnotationDictStamp, model.V13, model.V13, true}, + "Caret": {validateAnnotationDictCaret, model.V15, model.V14, true}, + "Ink": {validateAnnotationDictInk, model.V13, model.V13, true}, + "Popup": {validateAnnotationDictPopup, model.V13, model.V12, false}, + "FileAttachment": {validateAnnotationDictFileAttachment, model.V13, model.V13, true}, + "Sound": {validateAnnotationDictSound, model.V12, model.V12, true}, + "Movie": {validateAnnotationDictMovie, model.V12, model.V12, false}, + "Widget": {validateAnnotationDictWidget, model.V12, model.V11, false}, + "Screen": {validateAnnotationDictScreen, model.V15, model.V14, false}, + "PrinterMark": {validateAnnotationDictPrinterMark, model.V14, model.V14, false}, + "TrapNet": {validateAnnotationDictTrapNet, model.V13, model.V13, false}, + "Watermark": {validateAnnotationDictWatermark, model.V16, model.V16, false}, + "3D": {validateAnnotationDict3D, model.V16, model.V16, false}, + "Redact": {validateAnnotationDictRedact, model.V17, model.V17, true}, + "RichMedia": {validateRichMediaAnnotation, model.V17, model.V14, false}, } { if subtype.Value() == k { - err := xRefTable.ValidateVersion(k, v.sinceVersion) + sinceVersion := v.sinceVersion + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = v.sinceVersionRelaxed + } + + err := xRefTable.ValidateVersion(k, sinceVersion) if err != nil { return err } @@ -1671,14 +1677,9 @@ func validateAnnotationDictConcrete(xRefTable *model.XRefTable, d types.Dict, di } } - return errors.Errorf("validateAnnotationDictConcrete: unsupported annotation subtype:%s\n", subtype) -} - -func validateAnnotationDictSpecial(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + xRefTable.CustomExtensions = true - // AAPL:AKExtras - // No documentation for this PDF-Extension - this is a speculative implementation. - return validateAAPLAKExtrasDictEntry(xRefTable, d, dictName, "AAPL:AKExtras", OPTIONAL, model.V10) + return nil } func validateAnnotationDict(xRefTable *model.XRefTable, d types.Dict) (isTrapNet bool, err error) { @@ -1690,48 +1691,54 @@ func validateAnnotationDict(xRefTable *model.XRefTable, d types.Dict) (isTrapNet return false, err } - err = validateAnnotationDictConcrete(xRefTable, d, dictName, *subtype) - if err != nil { - return false, err - } - - err = validateAnnotationDictSpecial(xRefTable, d, dictName) - if err != nil { + if err = validateAnnotationDictConcrete(xRefTable, d, dictName, *subtype); err != nil { return false, err } return *subtype == "TrapNet", nil } -func validatePageAnnotations(xRefTable *model.XRefTable, d types.Dict) error { - - a, err := validateArrayEntry(xRefTable, d, "pageDict", "Annots", OPTIONAL, model.V10, nil) - if err != nil || a == nil { - return err +func addAnnotation(ann model.AnnotationRenderer, pgAnnots model.PgAnnots, i int, hasIndRef bool, indRef types.IndirectRef) { + annots, ok := pgAnnots[ann.Type()] + if !ok { + annots = model.Annot{} + annots.IndRefs = &[]types.IndirectRef{} + annots.Map = model.AnnotMap{} + pgAnnots[ann.Type()] = annots } - if len(a) == 0 { - return nil + objNr := -i + if hasIndRef { + objNr = indRef.ObjectNumber.Value() + *(annots.IndRefs) = append(*(annots.IndRefs), indRef) } + annots.Map[objNr] = ann +} - // array of indrefs to annotation dicts. - var annotsDict types.Dict +func validateAnnotationsArray(xRefTable *model.XRefTable, a types.Array) error { - // an optional TrapNetAnnotation has to be the final entry in this list. - hasTrapNet := false + // a ... array of indrefs to annotation dicts. + + var annotDict types.Dict pgAnnots := model.PgAnnots{} xRefTable.PageAnnots[xRefTable.CurPage] = pgAnnots + // an optional TrapNetAnnotation has to be the final entry in this list. + hasTrapNet := false + for i, v := range a { if hasTrapNet { - return errors.New("pdfcpu: validatePageAnnotations: corrupted page annotation list, \"TrapNet\" has to be the last entry") + return errors.New("pdfcpu: validatePageAnnotations: invalid page annotation list, \"TrapNet\" has to be the last entry") } var ( - ok, hasIndRef bool - indRef types.IndirectRef + ok bool + hasIndRef bool + indRef types.IndirectRef + incr int + err error ) if indRef, ok = v.(types.IndirectRef); ok { @@ -1739,16 +1746,16 @@ func validatePageAnnotations(xRefTable *model.XRefTable, d types.Dict) error { if log.ValidateEnabled() { log.Validate.Printf("processing annotDict %d\n", indRef.ObjectNumber) } - annotsDict, err = xRefTable.DereferenceDict(indRef) + annotDict, incr, err = xRefTable.DereferenceDictWithIncr(indRef) if err != nil { return err } - if annotsDict == nil { + if len(annotDict) == 0 { continue } } else if xRefTable.ValidationMode != model.ValidationRelaxed { return errInvalidPageAnnotArray - } else if annotsDict, ok = v.(types.Dict); !ok { + } else if annotDict, ok = v.(types.Dict); !ok { return errInvalidPageAnnotArray } else { if log.ValidateEnabled() { @@ -1756,48 +1763,53 @@ func validatePageAnnotations(xRefTable *model.XRefTable, d types.Dict) error { } } - hasTrapNet, err = validateAnnotationDict(xRefTable, annotsDict) - if err != nil { - return err + if hasIndRef { + objNr := indRef.ObjectNumber.Value() + if objNr > 0 { + if err := cacheSig(xRefTable, annotDict, "formFieldDict", false, objNr, incr); err != nil { + return err + } + } } - // Collect annotations. - ann, err := pdfcpu.Annotation(xRefTable, annotsDict) + hasTrapNet, err = validateAnnotationDict(xRefTable, annotDict) if err != nil { return err } - annots, ok1 := pgAnnots[ann.Type()] - if !ok1 { - annots = model.Annot{} - annots.IndRefs = &[]types.IndirectRef{} - annots.Map = model.AnnotMap{} - pgAnnots[ann.Type()] = annots - } + // Collect annotation. - objNr := -i - if hasIndRef { - objNr = indRef.ObjectNumber.Value() - *(annots.IndRefs) = append(*(annots.IndRefs), indRef) + ann, err := pdfcpu.Annotation(xRefTable, annotDict) + if err != nil { + return err } - annots.Map[objNr] = ann + + addAnnotation(ann, pgAnnots, i, hasIndRef, indRef) } return nil } -func validatePagesAnnotations(xRefTable *model.XRefTable, d types.Dict, curPage int) (int, error) { - - // Get number of pages of this PDF file. - pageCount := d.IntEntry("Count") - if pageCount == nil { - return curPage, errors.New("pdfcpu: validatePagesAnnotations: missing \"Count\"") +func validatePageAnnotations(xRefTable *model.XRefTable, d types.Dict) error { + a, err := validateArrayEntry(xRefTable, d, "pageDict", "Annots", OPTIONAL, model.V10, nil) + if err != nil || a == nil { + return err } - if log.ValidateEnabled() { - log.Validate.Printf("validatePagesAnnotations: This page node has %d pages\n", *pageCount) + a = a.RemoveNulls() + + if len(a) == 0 { + delete(d, "Annots") + return nil } + d["Annots"] = a + + return validateAnnotationsArray(xRefTable, a) +} + +func validatePagesAnnotations(xRefTable *model.XRefTable, d types.Dict, curPage int) (int, error) { + // Iterate over page tree. kidsArray := d.ArrayEntry("Kids") @@ -1834,8 +1846,7 @@ func validatePagesAnnotations(xRefTable *model.XRefTable, d types.Dict, curPage case "Page": curPage++ xRefTable.CurPage = curPage - err = validatePageAnnotations(xRefTable, d) - if err != nil { + if err = validatePageAnnotations(xRefTable, d); err != nil { return curPage, err } diff --git a/pkg/pdfcpu/validate/colorspace.go b/pkg/pdfcpu/validate/colorspace.go index 88fe8a0f..baf17dce 100644 --- a/pkg/pdfcpu/validate/colorspace.go +++ b/pkg/pdfcpu/validate/colorspace.go @@ -17,6 +17,8 @@ limitations under the License. package validate import ( + "fmt" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -628,8 +630,10 @@ func validateColorSpace(xRefTable *model.XRefTable, o types.Object, excludePatte err = validateColorSpaceArray(xRefTable, o, excludePatternCS) default: - err = errors.New("pdfcpu: validateColorSpace: corrupt obj typ, must be Name or Array") - + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.Errorf("pdfcpu: validateColorSpace: corrupt obj type(%T), must be Name or Array", o) + } + model.ShowSkipped(fmt.Sprintf("invalid color space type: %s", o)) } return err @@ -646,7 +650,10 @@ func validateColorSpaceEntry(xRefTable *model.XRefTable, d types.Dict, dictName case types.Name: if ok := validateDeviceColorSpaceName(o.Value()); !ok { - err = errors.Errorf("pdfcpu: validateColorSpaceEntry: Name:%s\n", o.Value()) + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.Errorf("pdfcpu: invalid colorSpaceEntry: Name:%s\n", o.Value()) + } + model.ShowSkipped(fmt.Sprintf("invalid colorSpaceEntry: %s", o.Value())) } case types.Array: diff --git a/pkg/pdfcpu/validate/destination.go b/pkg/pdfcpu/validate/destination.go index f6c70d00..773c3562 100644 --- a/pkg/pdfcpu/validate/destination.go +++ b/pkg/pdfcpu/validate/destination.go @@ -29,89 +29,75 @@ func validateDestinationArrayFirstElement(xRefTable *model.XRefTable, a types.Ar return nil, err } - if o == nil { - return nil, errors.Errorf("destination array invalid: %s", a) - } - switch o := o.(type) { case types.Integer, types.Name: // no further processing case types.Dict: if o.Type() == nil || (o.Type() != nil && (*o.Type() != "Page" && *o.Type() != "Pages")) { - err = errors.Errorf("pdfcpu: validateDestinationArrayFirstElement: first element must be a pageDict indRef or an integer: %v (%T)", o, o) + err = errors.Errorf("pdfcpu: validateDestinationArrayFirstElement: must be a pageDict indRef or an integer: %v (%T)", o, o) } default: - err = errors.Errorf("pdfcpu: validateDestinationArrayFirstElement: first element must be a pageDict indRef or an integer: %v (%T)", o, o) + err = errors.Errorf("pdfcpu: validateDestinationArrayFirstElement: must be a pageDict indRef or an integer: %v (%T)", o, o) + if xRefTable.ValidationMode == model.ValidationRelaxed { + err = nil + } } return o, err } func validateDestinationArrayLength(a types.Array) bool { - l := len(a) - return l == 2 || l == 3 || l == 5 || l == 6 || l == 4 // 4 = hack! see below + return len(a) >= 2 && len(a) <= 6 +} + +func validateDestType(a types.Array, destType types.Name) error { + switch destType { + case "Fit": + case "FitB": + if len(a) > 2 { + return errors.Errorf("pdfcpu: validateDestinationArray: %s - invalid length: %d", destType, len(a)) + } + case "FitH": + case "FitV": + case "FitBH": + case "FitBV": + if len(a) > 3 { + return errors.Errorf("pdfcpu: validateDestinationArray: %s - invalid length: %d", destType, len(a)) + } + case "XYZ": + if len(a) > 5 { + return errors.Errorf("pdfcpu: validateDestinationArray: %s - invalid length: %d", destType, len(a)) + } + case "FitR": + if len(a) > 6 { + return errors.Errorf("pdfcpu: validateDestinationArray: %s - invalid length: %d", destType, len(a)) + } + default: + return errors.Errorf("pdfcpu: validateDestinationArray j- invalid mode: %s", destType) + } + + return nil } func validateDestinationArray(xRefTable *model.XRefTable, a types.Array) error { + if !validateDestinationArrayLength(a) { + return errors.Errorf("pdfcpu: validateDestinationArray: invalid length: %d", len(a)) + } // Validate first element: indRef of page dict or pageNumber(int) of remote doc for remote Go-to Action or nil. - o, err := validateDestinationArrayFirstElement(xRefTable, a) if err != nil || o == nil { return err } - if !validateDestinationArrayLength(a) { - return errors.Errorf("pdfcpu: validateDestinationArray: invalid length: %d", len(a)) - } - - // NOTE if len == 4 we possible have a missing first element, which should be an indRef to the dest page. - // TODO Investigate. - i := 1 - // if len(a) == 4 { - // i = 0 - // } - - // Validate rest of array elements. - - name, ok := a[i].(types.Name) + name, ok := a[1].(types.Name) if !ok { - return errors.Errorf("pdfcpu: validateDestinationArray: second element must be a name %v (%d)", a[i], i) - } - - var nameErr bool - - switch len(a) { - - case 2: - nameErr = !types.MemberOf(name.Value(), []string{"Fit", "FitB", "FitH", "FitV", "FitBH", "FitBV"}) - - case 3: - nameErr = !types.MemberOf(name.Value(), []string{"FitH", "FitV", "FitBH", "FitBV"}) - - case 4: - // TODO Cleanup - // hack for #381 - possibly zoom == null or 0 - // eg. [(886 0 R) XYZ 53 303] - nameErr = name.Value() != "XYZ" - - case 5: - nameErr = name.Value() != "XYZ" - - case 6: - nameErr = name.Value() != "FitR" - - default: - return errors.Errorf("validateDestinationArray: array length %d not allowed: %v", len(a), a) - } - - if nameErr { - return errors.New("pdfcpu: validateDestinationArray: arr[1] corrupt") + return errors.Errorf("pdfcpu: validateDestinationArray: second element must be a name %v", a[1]) } - return nil + return validateDestType(a, name) } func validateDestinationDict(xRefTable *model.XRefTable, d types.Dict) error { diff --git a/pkg/pdfcpu/validate/extGState.go b/pkg/pdfcpu/validate/extGState.go index 6e53c154..b87a35a3 100644 --- a/pkg/pdfcpu/validate/extGState.go +++ b/pkg/pdfcpu/validate/extGState.go @@ -28,7 +28,7 @@ func validateBlendMode(s string) bool { // see 11.3.5; table 136 - return types.MemberOf(s, []string{"None", "Normal", "Compatible", "Multiply", "Screen", "Overlay", "Darken", "Lighten", + return types.MemberOf(s, []string{"None", "Normal", "Compatible", "Multiply", "Mult", "Screen", "Overlay", "Darken", "Lighten", "ColorDodge", "ColorBurn", "HardLight", "SoftLight", "Difference", "Exclusion", "Hue", "Saturation", "Color", "Luminosity"}) } @@ -40,12 +40,14 @@ func validateLineDashPatternEntry(xRefTable *model.XRefTable, d types.Dict, dict return err } - _, err = validateIntegerArray(xRefTable, a[0]) + // We are dealing with integers which may be represented by Integer or Float objects. + + _, err = validateNumberArray(xRefTable, a[0]) if err != nil { return err } - _, err = validateInteger(xRefTable, a[1], nil) + _, err = validateNumber(xRefTable, a[1]) return err } @@ -297,7 +299,7 @@ func validateTREntry(xRefTable *model.XRefTable, d types.Dict, dictName string, return validateTR(xRefTable, o) } -func validateTR2Name(xRefTable *model.XRefTable, name types.Name) error { +func validateTR2Name(name types.Name) error { s := name.Value() if s != "Identity" && s != "Default" { return errors.Errorf("pdfcpu: validateTR2: corrupt name\n") @@ -310,7 +312,7 @@ func validateTR2(xRefTable *model.XRefTable, o types.Object) (err error) { switch o := o.(type) { case types.Name: - if err = validateTR2Name(xRefTable, o); err != nil { + if err = validateTR2Name(o); err != nil { return err } @@ -332,7 +334,7 @@ func validateTR2(xRefTable *model.XRefTable, o types.Object) (err error) { } if o, ok := o.(types.Name); ok { - if err = validateTR2Name(xRefTable, o); err != nil { + if err = validateTR2Name(o); err != nil { return err } continue @@ -874,7 +876,11 @@ func validateExtGStateDictPart2(xRefTable *model.XRefTable, d types.Dict, dictNa // HT, dict, stream or name, optional // half tone dictionary or stream or /Default, see 10.5 - err = validateHalfToneEntry(xRefTable, d, dictName, "HT", OPTIONAL, model.V12) + sinceVersion := model.V12 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V11 + } + err = validateHalfToneEntry(xRefTable, d, dictName, "HT", OPTIONAL, sinceVersion) if err != nil { return err } @@ -886,7 +892,11 @@ func validateExtGStateDictPart2(xRefTable *model.XRefTable, d types.Dict, dictNa } // SM, number, optional, since V1.3, smoothness tolerance - _, err = validateNumberEntry(xRefTable, d, dictName, "SM", OPTIONAL, model.V13, nil) + sinceVersion = model.V13 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V12 + } + _, err = validateNumberEntry(xRefTable, d, dictName, "SM", OPTIONAL, sinceVersion, nil) if err != nil { return err } @@ -932,7 +942,7 @@ func validateExtGStateDictPart3(xRefTable *model.XRefTable, d types.Dict, dictNa // ca, number, optional, since V1.4, same as CA but for nonstroking operations. sinceVersion = model.V14 if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V13 + sinceVersion = model.V11 } _, err = validateNumberEntry(xRefTable, d, dictName, "ca", OPTIONAL, sinceVersion, nil) if err != nil { @@ -986,7 +996,21 @@ func validateExtGStateDict(xRefTable *model.XRefTable, o types.Object) error { return err } - return validateExtGStateDictPart3(xRefTable, d, dictName) + err = validateExtGStateDictPart3(xRefTable, d, dictName) + if err != nil { + return err + } + + // Check for AAPL extensions. + o, _, err = d.Entry(dictName, "AAPL:AA", OPTIONAL) + if err != nil { + return err + } + if o != nil { + xRefTable.CustomExtensions = true + } + + return nil } func validateExtGStateResourceDict(xRefTable *model.XRefTable, o types.Object, sinceVersion model.Version) error { diff --git a/pkg/pdfcpu/validate/fileSpec.go b/pkg/pdfcpu/validate/fileSpec.go index a50d2143..012f8976 100644 --- a/pkg/pdfcpu/validate/fileSpec.go +++ b/pkg/pdfcpu/validate/fileSpec.go @@ -27,7 +27,6 @@ import ( // See 7.11.4 func validateFileSpecString(s string) bool { - // see 7.11.2 // The standard format for representing a simple file specification in string form divides the string into component substrings // separated by the SOLIDUS character (2Fh) (/). The SOLIDUS is a generic component separator that shall be mapped to the appropriate @@ -44,7 +43,6 @@ func validateFileSpecString(s string) bool { } func validateURLString(s string) bool { - // RFC1738 compliant URL, see 7.11.5 _, err := url.ParseRequestURI(s) @@ -53,32 +51,30 @@ func validateURLString(s string) bool { } func validateEmbeddedFileStreamMacParameterDict(xRefTable *model.XRefTable, d types.Dict) error { - dictName := "embeddedFileStreamMacParameterDict" // Subtype, optional integer // The embedded file's file type integer encoded according to Mac OS conventions. - _, err := validateIntegerEntry(xRefTable, d, dictName, "Subtype", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateIntegerEntry(xRefTable, d, dictName, "Subtype", OPTIONAL, model.V10, nil); err != nil { return err } // Creator, optional integer // The embedded file's creator signature integer encoded according to Mac OS conventions. - _, err = validateIntegerEntry(xRefTable, d, dictName, "Creator", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateIntegerEntry(xRefTable, d, dictName, "Creator", OPTIONAL, model.V10, nil); err != nil { return err } // ResFork, optional stream dict // The binary contents of the embedded file's resource fork. - _, err = validateStreamDictEntry(xRefTable, d, dictName, "ResFork", OPTIONAL, model.V10, nil) + if _, err := validateStreamDictEntry(xRefTable, d, dictName, "ResFork", OPTIONAL, model.V10, nil); err != nil { + return err + } - return err + return nil } func validateEmbeddedFileStreamParameterDict(xRefTable *model.XRefTable, o types.Object) error { - d, err := xRefTable.DereferenceDict(o) if err != nil || d == nil { return err @@ -87,20 +83,17 @@ func validateEmbeddedFileStreamParameterDict(xRefTable *model.XRefTable, o types dictName := "embeddedFileStreamParmDict" // Size, optional integer - _, err = validateIntegerEntry(xRefTable, d, dictName, "Size", OPTIONAL, model.V10, nil) - if err != nil { + if _, err = validateIntegerEntry(xRefTable, d, dictName, "Size", OPTIONAL, model.V10, nil); err != nil { return err } // CreationDate, optional date - _, err = validateDateEntry(xRefTable, d, dictName, "CreationDate", OPTIONAL, model.V10) - if err != nil { + if _, err = validateDateEntry(xRefTable, d, dictName, "CreationDate", OPTIONAL, model.V10); err != nil { return err } // ModDate, optional date - _, err = validateDateEntry(xRefTable, d, dictName, "ModDate", OPTIONAL, model.V10) - if err != nil { + if _, err = validateDateEntry(xRefTable, d, dictName, "ModDate", OPTIONAL, model.V10); err != nil { return err } @@ -110,8 +103,7 @@ func validateEmbeddedFileStreamParameterDict(xRefTable *model.XRefTable, o types return err } if macDict != nil { - err = validateEmbeddedFileStreamMacParameterDict(xRefTable, macDict) - if err != nil { + if err = validateEmbeddedFileStreamMacParameterDict(xRefTable, macDict); err != nil { return err } } @@ -123,55 +115,54 @@ func validateEmbeddedFileStreamParameterDict(xRefTable *model.XRefTable, o types } func validateEmbeddedFileStreamDict(xRefTable *model.XRefTable, sd *types.StreamDict) error { - dictName := "embeddedFileStreamDict" // Type, optional, name - _, err := validateNameEntry(xRefTable, sd.Dict, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "EmbeddedFile" }) - if err != nil { + if _, err := validateNameEntry(xRefTable, sd.Dict, dictName, "Type", OPTIONAL, model.V10, func(s string) bool { return s == "EmbeddedFile" }); err != nil { return err } // Subtype, optional, name - _, err = validateNameEntry(xRefTable, sd.Dict, dictName, "Subtype", OPTIONAL, model.V10, nil) - if err != nil { + if _, err := validateNameEntry(xRefTable, sd.Dict, dictName, "Subtype", OPTIONAL, model.V10, nil); err != nil { return err } // Params, optional, dict // parameter dict containing additional file-specific information. if o, found := sd.Dict.Find("Params"); found && o != nil { - err = validateEmbeddedFileStreamParameterDict(xRefTable, o) + if err := validateEmbeddedFileStreamParameterDict(xRefTable, o); err != nil { + return err + } } - return err + return nil } func validateFileSpecDictEntriesEFAndRFKeys(k string) bool { - return k == "F" || k == "UF" || k == "DOS" || k == "Mac" || k == "Unix" + return k == "F" || k == "UF" || k == "DOS" || k == "Mac" || k == "Unix" || k == "Subtype" } func validateFileSpecDictEntryEFDict(xRefTable *model.XRefTable, d types.Dict) error { - for k, obj := range d { if !validateFileSpecDictEntriesEFAndRFKeys(k) { return errors.Errorf("validateFileSpecEntriesEFAndRF: invalid key: %s", k) } - // value must be embedded file stream dict - // see 7.11.4 - sd, err := validateStreamDict(xRefTable, obj) - if err != nil { - return err - } - if sd == nil { - continue - } + if k == "F" || k == "UF" { + // value must be embedded file stream dict + // see 7.11.4 + sd, err := validateStreamDict(xRefTable, obj) + if err != nil { + return err + } + if sd == nil { + continue + } - err = validateEmbeddedFileStreamDict(xRefTable, sd) - if err != nil { - return err + if err = validateEmbeddedFileStreamDict(xRefTable, sd); err != nil { + return err + } } } @@ -180,7 +171,6 @@ func validateFileSpecDictEntryEFDict(xRefTable *model.XRefTable, d types.Dict) e } func validateRFDictFilesArray(xRefTable *model.XRefTable, a types.Array) error { - if len(a)%2 > 0 { return errors.New("pdfcpu: validateRFDictFilesArray: rfDict array corrupt") } @@ -216,8 +206,7 @@ func validateRFDictFilesArray(xRefTable *model.XRefTable, a types.Array) error { return err } - err = validateEmbeddedFileStreamDict(xRefTable, sd) - if err != nil { + if err = validateEmbeddedFileStreamDict(xRefTable, sd); err != nil { return err } @@ -228,15 +217,13 @@ func validateRFDictFilesArray(xRefTable *model.XRefTable, a types.Array) error { } func validateFileSpecDictEntriesEFAndRF(xRefTable *model.XRefTable, efDict, rfDict types.Dict) error { - // EF only or EF and RF if efDict == nil { return errors.Errorf("pdfcpu: validateFileSpecEntriesEFAndRF: missing required efDict.") } - err := validateFileSpecDictEntryEFDict(xRefTable, efDict) - if err != nil { + if err := validateFileSpecDictEntryEFDict(xRefTable, efDict); err != nil { return err } @@ -257,8 +244,7 @@ func validateFileSpecDictEntriesEFAndRF(xRefTable *model.XRefTable, efDict, rfDi continue } - err = validateRFDictFilesArray(xRefTable, a) - if err != nil { + if err = validateRFDictFilesArray(xRefTable, a); err != nil { return err } @@ -267,21 +253,11 @@ func validateFileSpecDictEntriesEFAndRF(xRefTable *model.XRefTable, efDict, rfDi return nil } -func validateFileSpecDictType(xRefTable *model.XRefTable, d types.Dict) error { - - if d.Type() == nil || (*d.Type() != "Filespec" && (xRefTable.ValidationMode == model.ValidationRelaxed && *d.Type() != "F")) { - return errors.New("pdfcpu: validateFileSpecDictType: missing type: FileSpec") - } - - return nil -} - func requiredF(dosFound, macFound, unixFound bool) bool { return !dosFound && !macFound && !unixFound } -func validateFileSpecDictEFAndRF(xRefTable *model.XRefTable, d types.Dict, dictName string) error { - +func validateFileSpecDictEFAndRF(xRefTable *model.XRefTable, d types.Dict, dictName string, hasEP bool) error { // RF, optional, dict of related files arrays, since V1.3 rfDict, err := validateDictEntry(xRefTable, d, dictName, "RF", OPTIONAL, model.V13, nil) if err != nil { @@ -289,45 +265,48 @@ func validateFileSpecDictEFAndRF(xRefTable *model.XRefTable, d types.Dict, dictN } // EF, required if RF present, dict of embedded file streams, since 1.3 - efDict, err := validateDictEntry(xRefTable, d, dictName, "EF", rfDict != nil, model.V13, nil) + sinceVersion := model.V13 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V11 + } + efDict, err := validateDictEntry(xRefTable, d, dictName, "EF", rfDict != nil, sinceVersion, nil) if err != nil { return err } - // Type, required if EF present, name + // Type, required if EF, EP or RF present, name validate := func(s string) bool { - return s == "Filespec" || (xRefTable.ValidationMode == model.ValidationRelaxed && s == "F") + return s == "Filespec" || s == "FileSpec" || (xRefTable.ValidationMode == model.ValidationRelaxed && s == "F") } - _, err = validateNameEntry(xRefTable, d, dictName, "Type", efDict != nil, model.V10, validate) - if err != nil { + required := rfDict != nil || efDict != nil || hasEP + if _, err = validateNameEntry(xRefTable, d, dictName, "Type", required, model.V10, validate); err != nil { return err } - // if EF present, Type "FileSpec" is required if efDict != nil { - - err = validateFileSpecDictType(xRefTable, d) - if err != nil { - return err - } - err = validateFileSpecDictEntriesEFAndRF(xRefTable, efDict, rfDict) - } return err } -func validateFileSpecDict(xRefTable *model.XRefTable, d types.Dict) error { - - dictName := "fileSpecDict" - +func validateFileSpecDictPart1(xRefTable *model.XRefTable, d types.Dict, dictName string) error { // FS, optional, name fsName, err := validateNameEntry(xRefTable, d, dictName, "FS", OPTIONAL, model.V10, nil) if err != nil { return err } + // UF, optional, text string + sinceVersion := model.V17 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V13 + } + uf, err := validateStringEntry(xRefTable, d, dictName, "UF", OPTIONAL, sinceVersion, validateFileSpecString) + if err != nil { + return err + } + // DOS, byte string, optional, obsolescent. _, dosFound := d.Find("DOS") @@ -343,57 +322,93 @@ func validateFileSpecDict(xRefTable *model.XRefTable, d types.Dict) error { validate = validateURLString } - _, err = validateStringEntry(xRefTable, d, dictName, "F", requiredF(dosFound, macFound, unixFound), model.V10, validate) - if err != nil { - return err + required := requiredF(dosFound, macFound, unixFound) + if xRefTable.ValidationMode == model.ValidationRelaxed && uf != nil { + required = OPTIONAL } - - // UF, optional, text string - sinceVersion := model.V17 - if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V14 - } - _, err = validateStringEntry(xRefTable, d, dictName, "UF", OPTIONAL, sinceVersion, validateFileSpecString) - if err != nil { + if _, err = validateStringEntry(xRefTable, d, dictName, "F", required, model.V10, validate); err != nil { return err } + return nil +} + +func validateFileSpecDictPart2(xRefTable *model.XRefTable, d types.Dict, dictName string) error { // ID, optional, array of strings - _, err = validateStringArrayEntry(xRefTable, d, dictName, "ID", OPTIONAL, model.V11, func(a types.Array) bool { return len(a) == 2 }) - if err != nil { + if _, err := validateStringArrayEntry(xRefTable, d, dictName, "ID", OPTIONAL, model.V11, func(a types.Array) bool { return len(a) == 2 }); err != nil { return err } // V, optional, boolean, since V1.2 - _, err = validateBooleanEntry(xRefTable, d, dictName, "V", OPTIONAL, model.V12, nil) - if err != nil { + if _, err := validateBooleanEntry(xRefTable, d, dictName, "V", OPTIONAL, model.V12, nil); err != nil { return err } - err = validateFileSpecDictEFAndRF(xRefTable, d, dictName) + // EP, optional, encrypted payload dict, since V2.0 + epDict, err := validateDictEntry(xRefTable, d, dictName, "EP", OPTIONAL, model.V20, nil) if err != nil { return err } + if err = validateFileSpecDictEFAndRF(xRefTable, d, dictName, len(epDict) > 0); err != nil { + return err + } // Desc, optional, text string, since V1.6 - sinceVersion = model.V16 + sinceVersion := model.V16 if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V10 } - _, err = validateStringEntry(xRefTable, d, dictName, "Desc", OPTIONAL, sinceVersion, nil) - if err != nil { + if _, err = validateStringEntry(xRefTable, d, dictName, "Desc", OPTIONAL, sinceVersion, nil); err != nil { return err } // CI, optional, collection item dict, since V1.7 - _, err = validateDictEntry(xRefTable, d, dictName, "CI", OPTIONAL, model.V17, nil) + if _, err = validateDictEntry(xRefTable, d, dictName, "CI", OPTIONAL, model.V17, nil); err != nil { + return err + } - return err + // Thumb, optional, thumbnail image, since V2.0 + sinceVersion = model.V20 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V16 + } + if _, err := validateStreamDictEntry(xRefTable, d, dictName, "Thumb", OPTIONAL, sinceVersion, nil); err != nil { + return err + } + + // AFRelationship, optional, associated file semantics, since V2.0 + validateAFRelationship := func(s string) bool { + return types.MemberOf(s, []string{"Source", "Data", "Alternative", "Supplement", "EncryptedPayload", "FormData", "Schema", "Unspecified"}) + } + sinceVersion = model.V20 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + } + if _, err := validateNameEntry(xRefTable, d, dictName, "AFRelationship", OPTIONAL, sinceVersion, validateAFRelationship); err != nil { + return err + } + + return nil } -func validateFileSpecification(xRefTable *model.XRefTable, o types.Object) (types.Object, error) { +func validateFileSpecDict(xRefTable *model.XRefTable, d types.Dict) error { + // See 7.11.3 - // See 7.11.4 + dictName := "fileSpecDict" + + if err := validateFileSpecDictPart1(xRefTable, d, dictName); err != nil { + return err + } + + if err := validateFileSpecDictPart2(xRefTable, d, dictName); err != nil { + return err + } + + return nil +} + +func validateFileSpecification(xRefTable *model.XRefTable, o types.Object) (types.Object, error) { + // See 7.11 o, err := xRefTable.Dereference(o) if err != nil { @@ -402,21 +417,14 @@ func validateFileSpecification(xRefTable *model.XRefTable, o types.Object) (type switch o := o.(type) { - case types.StringLiteral: - s := o.Value() - if !validateFileSpecString(s) { - return nil, errors.Errorf("pdfcpu: validateFileSpecification: invalid file spec string: %s", s) - } - - case types.HexLiteral: - s := o.Value() + case types.StringLiteral, types.HexLiteral: + s := o.(interface{ Value() string }).Value() if !validateFileSpecString(s) { return nil, errors.Errorf("pdfcpu: validateFileSpecification: invalid file spec string: %s", s) } case types.Dict: - err = validateFileSpecDict(xRefTable, o) - if err != nil { + if err = validateFileSpecDict(xRefTable, o); err != nil { return nil, err } @@ -429,7 +437,6 @@ func validateFileSpecification(xRefTable *model.XRefTable, o types.Object) (type } func validateURLSpecification(xRefTable *model.XRefTable, o types.Object) (types.Object, error) { - // See 7.11.4 d, err := xRefTable.DereferenceDict(o) @@ -444,8 +451,7 @@ func validateURLSpecification(xRefTable *model.XRefTable, o types.Object) (types dictName := "urlSpec" // FS, required, name - _, err = validateNameEntry(xRefTable, d, dictName, "FS", REQUIRED, model.V10, func(s string) bool { return s == "URL" }) - if err != nil { + if _, err = validateNameEntry(xRefTable, d, dictName, "FS", REQUIRED, model.V10, func(s string) bool { return s == "URL" }); err != nil { return nil, err } @@ -456,14 +462,12 @@ func validateURLSpecification(xRefTable *model.XRefTable, o types.Object) (types } func validateFileSpecEntry(xRefTable *model.XRefTable, d types.Dict, dictName string, entryName string, required bool, sinceVersion model.Version) (types.Object, error) { - o, err := validateEntry(xRefTable, d, dictName, entryName, required, sinceVersion) if err != nil || o == nil { return nil, err } - err = xRefTable.ValidateVersion("fileSpec", sinceVersion) - if err != nil { + if err = xRefTable.ValidateVersion("fileSpec", sinceVersion); err != nil { return nil, err } @@ -471,14 +475,12 @@ func validateFileSpecEntry(xRefTable *model.XRefTable, d types.Dict, dictName st } func validateURLSpecEntry(xRefTable *model.XRefTable, d types.Dict, dictName string, entryName string, required bool, sinceVersion model.Version) (types.Object, error) { - o, err := validateEntry(xRefTable, d, dictName, entryName, required, sinceVersion) if err != nil || o == nil { return nil, err } - err = xRefTable.ValidateVersion("URLSpec", sinceVersion) - if err != nil { + if err = xRefTable.ValidateVersion("URLSpec", sinceVersion); err != nil { return nil, err } @@ -486,7 +488,6 @@ func validateURLSpecEntry(xRefTable *model.XRefTable, d types.Dict, dictName str } func validateFileSpecificationOrFormObject(xRefTable *model.XRefTable, obj types.Object) error { - sd, ok := obj.(types.StreamDict) if ok { return validateFormStreamDict(xRefTable, &sd) diff --git a/pkg/pdfcpu/validate/font.go b/pkg/pdfcpu/validate/font.go index 08407997..07b02913 100644 --- a/pkg/pdfcpu/validate/font.go +++ b/pkg/pdfcpu/validate/font.go @@ -17,12 +17,18 @@ limitations under the License. package validate import ( + "fmt" + "strings" + "github.com/angel-one/pdfcpu/pkg/log" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/font" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" ) +var ErrMissingFont = errors.New("pdfcpu: missing font dict") + func validateStandardType1Font(s string) bool { return types.MemberOf(s, []string{"Times-Roman", "Times-Bold", "Times-Italic", "Times-BoldItalic", @@ -31,7 +37,7 @@ func validateStandardType1Font(s string) bool { "Symbol", "ZapfDingbats"}) } -func validateFontFile3SubType(sd *types.StreamDict, fontType string) error { +func validateFontFile3SubType(sd *types.StreamDict, fontType string, relaxed bool) error { // Hint about used font program. dictSubType := sd.Subtype() @@ -43,7 +49,10 @@ func validateFontFile3SubType(sd *types.StreamDict, fontType string) error { switch fontType { case "Type1": if *dictSubType != "Type1C" && *dictSubType != "OpenType" { - return errors.Errorf("pdfcpu: validateFontFile3SubType: Type1: unexpected Subtype %s", *dictSubType) + if !relaxed { + return errors.Errorf("pdfcpu: validateFontFile3SubType: Type1: unexpected Subtype %s", *dictSubType) + } + model.ShowSkipped(fmt.Sprintf("validateFontFile3SubType: Type1: unexpected Subtype %s", *dictSubType)) } case "MMType1": @@ -76,7 +85,7 @@ func validateFontFile(xRefTable *model.XRefTable, d types.Dict, dictName string, // SubType if entryName == "FontFile3" { - err = validateFontFile3SubType(sd, fontType) + err = validateFontFile3SubType(sd, fontType, xRefTable.ValidationMode == model.ValidationRelaxed) if err != nil { return err } @@ -128,82 +137,155 @@ func validateFontDescriptorType(xRefTable *model.XRefTable, d types.Dict) (err e return nil } -func validateFontDescriptorPart1(xRefTable *model.XRefTable, d types.Dict, dictName, fontDictType string) error { - - err := validateFontDescriptorType(xRefTable, d) +func validateFontDescriptorFontName(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + required := true + if xRefTable.ValidationMode == model.ValidationRelaxed { + required = false + } + _, err := validateNameEntry(xRefTable, d, dictName, "FontName", required, model.V10, nil) if err != nil { - return err + if _, err = validateStringEntry(xRefTable, d, dictName, "FontName", required, model.V10, nil); err != nil { + if xRefTable.ValidationMode == model.ValidationRelaxed { + model.ShowDigestedSpecViolationError(xRefTable, err) + return nil + } + } } + return err +} +func validateFontDescriptorFontFamily(xRefTable *model.XRefTable, d types.Dict, dictName string) error { required := true if xRefTable.ValidationMode == model.ValidationRelaxed { required = false } - _, err = validateNameEntry(xRefTable, d, dictName, "FontName", required, model.V10, nil) + sinceVersion := model.V15 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V13 + } + _, err := validateNameEntry(xRefTable, d, dictName, "FontFamily", required, sinceVersion, nil) if err != nil { - _, err = validateStringEntry(xRefTable, d, dictName, "FontName", required, model.V10, nil) - if err != nil { - if xRefTable.ValidationMode != model.ValidationRelaxed { - return err + if _, err = validateStringEntry(xRefTable, d, dictName, "FontFamily", required, sinceVersion, nil); err != nil { + if xRefTable.ValidationMode == model.ValidationRelaxed { + model.ShowDigestedSpecViolationError(xRefTable, err) + return nil } - reportSpecViolation(xRefTable, err) } } + return err +} +func validateFontDescriptorFontStretch(xRefTable *model.XRefTable, d types.Dict, dictName string) error { sinceVersion := model.V15 if xRefTable.ValidationMode == model.ValidationRelaxed { sinceVersion = model.V13 } - _, err = validateStringEntry(xRefTable, d, dictName, "FontFamily", OPTIONAL, sinceVersion, nil) + _, err := validateNameEntry(xRefTable, d, dictName, "FontStretch", OPTIONAL, sinceVersion, nil) + return err +} + +func validateFontDescriptorFontWeight(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + sinceVersion := model.V15 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V11 + } + _, err := validateNumberEntry(xRefTable, d, dictName, "FontWeight", OPTIONAL, sinceVersion, nil) if err != nil { - // Repair - _, err = validateNameEntry(xRefTable, d, dictName, "FontFamily", OPTIONAL, sinceVersion, nil) - return err + if xRefTable.ValidationMode == model.ValidationRelaxed { + validateFontWeight := func(s string) bool { + return types.MemberOf(s, []string{"Regular", "Bold", "Italic"}) + } + _, err = validateNameEntry(xRefTable, d, dictName, "FontWeight", OPTIONAL, sinceVersion, validateFontWeight) + } } + return err +} - sinceVersion = model.V15 - if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V13 +func validateFontDescriptorFontFlags(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + _, err := validateIntegerEntry(xRefTable, d, dictName, "Flags", REQUIRED, model.V10, nil) + if err != nil { + if xRefTable.ValidationMode == model.ValidationRelaxed { + model.ShowSkipped("missing font descriptor \"Flags\"") + return nil + } } - _, err = validateNameEntry(xRefTable, d, dictName, "FontStretch", OPTIONAL, sinceVersion, nil) + return err +} + +func validateFontDescriptorFontBox(xRefTable *model.XRefTable, d types.Dict, dictName, fontDictType string) error { + _, err := validateRectangleEntry(xRefTable, d, dictName, "FontBBox", fontDictType != "Type3", model.V10, nil) if err != nil { - return err + if xRefTable.ValidationMode == model.ValidationRelaxed { + model.ShowSkipped("missing font descriptor \"FontBBox\"") + return nil + } } + return err +} - sinceVersion = model.V15 +func validateFontDescriptorItalicAngle(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + required := true if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V13 + required = false } - _, err = validateNumberEntry(xRefTable, d, dictName, "FontWeight", OPTIONAL, sinceVersion, nil) - if err != nil { + _, err := validateNumberEntry(xRefTable, d, dictName, "ItalicAngle", required, model.V10, nil) + return err +} + +func validateFontDescriptorPart1(xRefTable *model.XRefTable, d types.Dict, dictName, fontDictType string) error { + if err := validateFontDescriptorType(xRefTable, d); err != nil { return err } - _, err = validateIntegerEntry(xRefTable, d, dictName, "Flags", REQUIRED, model.V10, nil) - if err != nil { + if err := validateFontDescriptorFontName(xRefTable, d, dictName); err != nil { return err } - _, err = validateRectangleEntry(xRefTable, d, dictName, "FontBBox", fontDictType != "Type3", model.V10, nil) - if err != nil { + if err := validateFontDescriptorFontFamily(xRefTable, d, dictName); err != nil { return err } - _, err = validateNumberEntry(xRefTable, d, dictName, "ItalicAngle", REQUIRED, model.V10, nil) + if err := validateFontDescriptorFontStretch(xRefTable, d, dictName); err != nil { + return err + } - return err + if err := validateFontDescriptorFontWeight(xRefTable, d, dictName); err != nil { + return err + } + + if err := validateFontDescriptorFontFlags(xRefTable, d, dictName); err != nil { + return err + } + + if err := validateFontDescriptorFontBox(xRefTable, d, dictName, fontDictType); err != nil { + return err + } + + if err := validateFontDescriptorItalicAngle(xRefTable, d, dictName); err != nil { + return err + } + + return nil } func validateFontDescriptorPart2(xRefTable *model.XRefTable, d types.Dict, dictName, fontDictType string) error { _, err := validateNumberEntry(xRefTable, d, dictName, "Ascent", fontDictType != "Type3", model.V10, nil) if err != nil { - return err + if xRefTable.ValidationMode != model.ValidationRelaxed { + return err + } + err = nil + model.ShowSkipped("missing font descriptor \"Ascent\"") } _, err = validateNumberEntry(xRefTable, d, dictName, "Descent", fontDictType != "Type3", model.V10, nil) if err != nil { - return err + if xRefTable.ValidationMode != model.ValidationRelaxed { + return err + } + err = nil + model.ShowSkipped("missing font descriptor \"Descent\"") } _, err = validateNumberEntry(xRefTable, d, dictName, "Leading", OPTIONAL, model.V10, nil) @@ -221,13 +303,13 @@ func validateFontDescriptorPart2(xRefTable *model.XRefTable, d types.Dict, dictN return err } - required := fontDictType != "Type3" - if xRefTable.ValidationMode == model.ValidationRelaxed { - required = false - } - _, err = validateNumberEntry(xRefTable, d, dictName, "StemV", required, model.V10, nil) + _, err = validateNumberEntry(xRefTable, d, dictName, "StemV", fontDictType != "Type3", model.V10, nil) if err != nil { - return err + if xRefTable.ValidationMode != model.ValidationRelaxed { + return err + } + err = nil + model.ShowSkipped("missing font descriptor \"StemV\"") } _, err = validateNumberEntry(xRefTable, d, dictName, "StemH", OPTIONAL, model.V10, nil) @@ -369,7 +451,7 @@ func validateFontEncoding(xRefTable *model.XRefTable, d types.Dict, dictName str encodings := []string{"MacRomanEncoding", "MacExpertEncoding", "WinAnsiEncoding"} if xRefTable.ValidationMode == model.ValidationRelaxed { - encodings = append(encodings, "StandardEncoding", "SymbolSetEncoding") + encodings = append(encodings, "FontSpecific", "StandardEncoding", "SymbolSetEncoding", "PDFDocEncoding") } switch o := o.(type) { @@ -394,7 +476,7 @@ func validateFontEncoding(xRefTable *model.XRefTable, d types.Dict, dictName str return nil } -func validateTrueTypeFontDict(xRefTable *model.XRefTable, d types.Dict) error { +func validateTrueTypeFontDict(xRefTable *model.XRefTable, d types.Dict) (string, error) { // see 9.6.3 dictName := "trueTypeFontDict" @@ -402,9 +484,13 @@ func validateTrueTypeFontDict(xRefTable *model.XRefTable, d types.Dict) error { // Name, name, obsolet and should not be used. // BaseFont, required, name - _, err := validateNameEntry(xRefTable, d, dictName, "BaseFont", REQUIRED, model.V10, nil) + bf, err := validateNameEntry(xRefTable, d, dictName, "BaseFont", REQUIRED, model.V10, nil) if err != nil { - return err + return "", err + } + fontName := "" + if bf != nil { + fontName = bf.String() } // FirstChar, required, integer @@ -412,9 +498,8 @@ func validateTrueTypeFontDict(xRefTable *model.XRefTable, d types.Dict) error { if xRefTable.ValidationMode == model.ValidationRelaxed { required = OPTIONAL } - _, err = validateIntegerEntry(xRefTable, d, dictName, "FirstChar", required, model.V10, nil) - if err != nil { - return err + if _, err = validateIntegerEntry(xRefTable, d, dictName, "FirstChar", required, model.V10, nil); err != nil { + return "", err } // LastChar, required, integer @@ -422,9 +507,8 @@ func validateTrueTypeFontDict(xRefTable *model.XRefTable, d types.Dict) error { if xRefTable.ValidationMode == model.ValidationRelaxed { required = OPTIONAL } - _, err = validateIntegerEntry(xRefTable, d, dictName, "LastChar", required, model.V10, nil) - if err != nil { - return err + if _, err = validateIntegerEntry(xRefTable, d, dictName, "LastChar", required, model.V10, nil); err != nil { + return "", err } // Widths, array of numbers. @@ -432,9 +516,8 @@ func validateTrueTypeFontDict(xRefTable *model.XRefTable, d types.Dict) error { if xRefTable.ValidationMode == model.ValidationRelaxed { required = OPTIONAL } - _, err = validateNumberArrayEntry(xRefTable, d, dictName, "Widths", required, model.V10, nil) - if err != nil { - return err + if _, err = validateNumberArrayEntry(xRefTable, d, dictName, "Widths", required, model.V10, nil); err != nil { + return "", err } // FontDescriptor, required, dictionary @@ -442,21 +525,19 @@ func validateTrueTypeFontDict(xRefTable *model.XRefTable, d types.Dict) error { if xRefTable.ValidationMode == model.ValidationRelaxed { required = OPTIONAL } - err = validateFontDescriptor(xRefTable, d, dictName, "TrueType", required, model.V10) - if err != nil { - return err + if err = validateFontDescriptor(xRefTable, d, dictName, "TrueType", required, model.V10); err != nil { + return "", err } // Encoding, optional, name or dict - err = validateFontEncoding(xRefTable, d, dictName, OPTIONAL) - if err != nil { - return err + if err = validateFontEncoding(xRefTable, d, dictName, OPTIONAL); err != nil { + return "", err } // ToUnicode, optional, stream _, err = validateStreamDictEntry(xRefTable, d, dictName, "ToUnicode", OPTIONAL, model.V12, nil) - return err + return fontName, err } func validateCIDToGIDMap(xRefTable *model.XRefTable, o types.Object) error { @@ -636,6 +717,10 @@ func validateDescendantFonts(xRefTable *model.XRefTable, d types.Dict, fontDictN return err } + if len(a) != 1 { + return font.ErrCorruptFontDict + } + d1, err := xRefTable.DereferenceDict(a[0]) if err != nil { return err @@ -651,38 +736,48 @@ func validateDescendantFonts(xRefTable *model.XRefTable, d types.Dict, fontDictN return validateCIDFontDict(xRefTable, d1) } -func validateType0FontDict(xRefTable *model.XRefTable, d types.Dict) error { +func validateType0FontDict(xRefTable *model.XRefTable, d types.Dict) (string, error) { dictName := "type0FontDict" // BaseFont, required, name - _, err := validateNameEntry(xRefTable, d, dictName, "BaseFont", REQUIRED, model.V10, nil) + bf, err := validateNameEntry(xRefTable, d, dictName, "BaseFont", REQUIRED, model.V10, nil) if err != nil { - return err + return "", err + } + + fontName := "" + if bf != nil { + fontName = bf.String() } // Encoding, required, name or CMap stream dict - err = validateType0FontEncoding(xRefTable, d, dictName, REQUIRED) - if err != nil { - return err + if err = validateType0FontEncoding(xRefTable, d, dictName, REQUIRED); err != nil { + return "", err } // DescendantFonts: one-element array specifying the CIDFont dictionary that is the descendant of this Type 0 font, required. - err = validateDescendantFonts(xRefTable, d, dictName, REQUIRED) - if err != nil { - return err + if err = validateDescendantFonts(xRefTable, d, dictName, REQUIRED); err != nil { + if xRefTable.ValidationMode == model.ValidationRelaxed { + err = ErrMissingFont + } + return fontName, err } // ToUnicode, optional, CMap stream dict - _, err = validateStreamDictEntry(xRefTable, d, dictName, "ToUnicode", OPTIONAL, model.V12, nil) + sinceVersion := model.V12 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V11 + } + _, err = validateStreamDictEntry(xRefTable, d, dictName, "ToUnicode", OPTIONAL, sinceVersion, nil) if err != nil && xRefTable.ValidationMode == model.ValidationRelaxed { - _, err = validateNameEntry(xRefTable, d, dictName, "ToUnicode", REQUIRED, model.V12, func(s string) bool { return s == "Identity-H" }) + _, err = validateNameEntry(xRefTable, d, dictName, "ToUnicode", REQUIRED, sinceVersion, func(s string) bool { return s == "Identity-H" }) } - return err + return fontName, err } -func validateType1FontDict(xRefTable *model.XRefTable, d types.Dict) error { +func validateType1FontDict(xRefTable *model.XRefTable, d types.Dict) (string, error) { // see 9.6.2 @@ -691,20 +786,20 @@ func validateType1FontDict(xRefTable *model.XRefTable, d types.Dict) error { // Name, name, obsolet and should not be used. // BaseFont, required, name - fontName, err := validateNameEntry(xRefTable, d, dictName, "BaseFont", REQUIRED, model.V10, nil) + bf, err := validateNameEntry(xRefTable, d, dictName, "BaseFont", REQUIRED, model.V10, nil) if err != nil { - return err + return "", err } - fn := (*fontName).Value() - required := xRefTable.Version() >= model.V15 || !validateStandardType1Font(fn) + fontName := bf.String() + required := xRefTable.Version() >= model.V17 || !validateStandardType1Font(fontName) if xRefTable.ValidationMode == model.ValidationRelaxed { required = false } - // FirstChar, required except for standard 14 fonts. since 1.5 always required, integer + // FirstChar, required except for standard 14 fonts. since 2.0 always required, integer fc, err := validateIntegerEntry(xRefTable, d, dictName, "FirstChar", required, model.V10, nil) if err != nil { - return err + return "", err } if !required && fc != nil { @@ -714,41 +809,54 @@ func validateType1FontDict(xRefTable *model.XRefTable, d types.Dict) error { } } - // LastChar, required except for standard 14 fonts. since 1.5 always required, integer - _, err = validateIntegerEntry(xRefTable, d, dictName, "LastChar", required, model.V10, nil) - if err != nil { - return err + // LastChar, required except for standard 14 fonts. since 2.0 always required, integer + if _, err = validateIntegerEntry(xRefTable, d, dictName, "LastChar", required, model.V10, nil); err != nil { + return "", err } - // Widths, required except for standard 14 fonts. since 1.5 always required, array of numbers - _, err = validateNumberArrayEntry(xRefTable, d, dictName, "Widths", required, model.V10, nil) - if err != nil { - return err + // Widths, required except for standard 14 fonts. since 2.0 always required, array of numbers + if _, err = validateNumberArrayEntry(xRefTable, d, dictName, "Widths", required, model.V10, nil); err != nil { + return "", err } - // FontDescriptor, required since version 1.5; required unless standard font for version < 1.5, dict - err = validateFontDescriptor(xRefTable, d, dictName, "Type1", required, model.V10) - if err != nil { - return err + // FontDescriptor, required since version 2.0; required unless standard font for version <= 1.7, dict + if err = validateFontDescriptor(xRefTable, d, dictName, "Type1", required, model.V10); err != nil { + return "", err } // Encoding, optional, name or dict - err = validateFontEncoding(xRefTable, d, dictName, OPTIONAL) - if err != nil { - return err + if err = validateFontEncoding(xRefTable, d, dictName, OPTIONAL); err != nil { + return "", err } // ToUnicode, optional, stream - _, err = validateStreamDictEntry(xRefTable, d, dictName, "ToUnicode", OPTIONAL, model.V12, nil) + sinceVersion := model.V12 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V10 + } + _, err = validateStreamDictEntry(xRefTable, d, dictName, "ToUnicode", OPTIONAL, sinceVersion, nil) - return err + return fontName, err } func validateCharProcsDict(xRefTable *model.XRefTable, d types.Dict, dictName string, required bool, sinceVersion model.Version) error { + if xRefTable.ValidationMode == model.ValidationRelaxed { + required = false + } d1, err := validateDictEntry(xRefTable, d, dictName, "CharProcs", required, sinceVersion, nil) - if err != nil || d1 == nil { - return err + if d1 == nil { + return nil + } + if err != nil { + if xRefTable.ValidationMode != model.ValidationRelaxed { + return err + } + if !strings.Contains(err.Error(), "invalid type") { + return err + } + model.ShowDigestedSpecViolation("\"CharProcs\" with invalid type") + return nil } for _, v := range d1 { @@ -959,51 +1067,100 @@ func validateType3FontDict(xRefTable *model.XRefTable, d types.Dict) error { return err } -func validateFontDict(xRefTable *model.XRefTable, o types.Object) (err error) { - - d, err := xRefTable.DereferenceDict(o) - if err != nil || d == nil { - return err - } - - if xRefTable.ValidationMode == model.ValidationRelaxed { - if len(d) == 0 { - return nil - } - } - - if d.Type() == nil || *d.Type() != "Font" { - return errors.New("pdfcpu: validateFontDict: corrupt font dict") - } - +func _validateFontDict(xRefTable *model.XRefTable, d types.Dict, isIndRef bool, indRef types.IndirectRef) (fontName string, err error) { subtype := d.Subtype() if subtype == nil { - return errors.New("pdfcpu: validateFontDict: missing Subtype") + return "", errors.New("pdfcpu: validateFontDict: missing Subtype") } switch *subtype { case "TrueType": - err = validateTrueTypeFontDict(xRefTable, d) + fontName, err = validateTrueTypeFontDict(xRefTable, d) case "Type0": - err = validateType0FontDict(xRefTable, d) + fontName, err = validateType0FontDict(xRefTable, d) - case "Type1": - err = validateType1FontDict(xRefTable, d) + case "Type1", "Type1C": + fontName, err = validateType1FontDict(xRefTable, d) case "MMType1": - err = validateType1FontDict(xRefTable, d) + return validateType1FontDict(xRefTable, d) case "Type3": err = validateType3FontDict(xRefTable, d) default: - return errors.Errorf("pdfcpu: validateFontDict: unknown Subtype: %s\n", *subtype) + return "", errors.Errorf("pdfcpu: validateFontDict: unknown Subtype: %s", *subtype) } - return err + if isIndRef { + if err1 := xRefTable.SetValid(indRef); err1 != nil { + return "", err1 + } + } + + return fontName, err +} + +func validateFontDict(xRefTable *model.XRefTable, isIndRef bool, indRef types.IndirectRef) (string, error) { + + if isIndRef { + + ok, err := xRefTable.IsValid(indRef) + if err != nil { + return "", ErrMissingFont + } + if ok { + return "", nil + } + + if ok, err := xRefTable.IsBeingValidated(indRef); err != nil || ok { + return "", err + } + + if err := xRefTable.SetBeingValidated(indRef); err != nil { + return "", err + } + } + + d, err := xRefTable.DereferenceDict(indRef) + if err != nil || d == nil { + if xRefTable.ValidationMode == model.ValidationRelaxed { + err = ErrMissingFont + } + return "", err + } + + if xRefTable.ValidationMode == model.ValidationRelaxed { + if len(d) == 0 { + return "", nil + } + } + + if d.Type() == nil || *d.Type() != "Font" { + if xRefTable.ValidationMode == model.ValidationStrict { + return "", errors.New("pdfcpu: validateFontDict: corrupt font dict") + } + model.ShowDigestedSpecViolation("missing fontDict entry \"Type\"") + } + + return _validateFontDict(xRefTable, d, isIndRef, indRef) +} + +func fixFontObjNr(m1 map[string]string, m2 map[string]types.IndirectRef, d types.Dict) { + for k, v := range m1 { + if v != "" { + indRef, ok := m2[v] + if ok { + model.ShowRepaired(fmt.Sprintf("font %s mapped to objNr %d", k, indRef.ObjectNumber)) + d[k] = indRef + continue + } + } + d[k] = nil + } } func validateFontResourceDict(xRefTable *model.XRefTable, o types.Object, sinceVersion model.Version) error { @@ -1019,15 +1176,42 @@ func validateFontResourceDict(xRefTable *model.XRefTable, o types.Object, sinceV return err } + // fontid, fontname + m1 := map[string]string{} + + // fontname, objNr + m2 := map[string]types.IndirectRef{} + + var defFontName string + // Iterate over font resource dict - for _, obj := range d { + for id, obj := range d { + + indRef, indRefOk := obj.(types.IndirectRef) // Process fontDict - err = validateFontDict(xRefTable, obj) + fn, err := validateFontDict(xRefTable, indRefOk, indRef) if err != nil { + if err == ErrMissingFont { + if xRefTable.ValidationMode == model.ValidationRelaxed { + err = nil + model.ShowSkipped(fmt.Sprintf("missing font: %s %s", id, fn)) + m1[id] = fn + continue + } + } return err } + if xRefTable.ValidationMode == model.ValidationRelaxed && indRefOk { + m2[fn] = indRef + if defFontName == "" { + defFontName = fn + } + } + } + if len(m1) > 0 && xRefTable.ValidationMode == model.ValidationRelaxed { + fixFontObjNr(m1, m2, d) } return nil diff --git a/pkg/pdfcpu/validate/form.go b/pkg/pdfcpu/validate/form.go index a5fe15ee..914c43e0 100644 --- a/pkg/pdfcpu/validate/form.go +++ b/pkg/pdfcpu/validate/form.go @@ -17,6 +17,7 @@ limitations under the License. package validate import ( + "fmt" "strconv" "strings" @@ -201,13 +202,13 @@ func validateDARelaxed(s string) bool { if i < 3 { return false } - if _, err := strconv.ParseFloat(da[i-3], 32); err != nil { + if _, err := strconv.ParseFloat(strings.TrimPrefix(da[i-3], "["), 32); err != nil { return false } if _, err := strconv.ParseFloat(da[i-2], 32); err != nil { return false } - if _, err := strconv.ParseFloat(da[i-1], 32); err != nil { + if _, err := strconv.ParseFloat(strings.TrimSuffix(da[i-1], "]"), 32); err != nil { return false } } @@ -229,13 +230,18 @@ func validateFormFieldDA(xRefTable *model.XRefTable, d types.Dict, dictName stri if xRefTable.ValidationMode == model.ValidationRelaxed { validate = validateDARelaxed } - if terminalNode && (*outFieldType).Value() == "Tx" { - da, err := validateStringEntry(xRefTable, d, dictName, "DA", terminalNode && requiresDA, model.V10, validate) + + if outFieldType == nil || (*outFieldType).Value() == "Tx" { + //if (*outFieldType).Value() == "Tx" { + da, err := validateStringEntry(xRefTable, d, dictName, "DA", requiresDA, model.V10, validate) if err != nil { + if !terminalNode && requiresDA { + err = nil + } return false, err } if xRefTable.ValidationMode == model.ValidationRelaxed && da != nil { - // Repair + // Repair DA d["DA"] = types.StringLiteral(*da) } @@ -245,7 +251,84 @@ func validateFormFieldDA(xRefTable *model.XRefTable, d types.Dict, dictName stri return false, nil } -func validateFormFieldDictEntries(xRefTable *model.XRefTable, d types.Dict, terminalNode bool, inFieldType *types.Name, requiresDA bool) (outFieldType *types.Name, hasDA bool, err error) { +func cacheSig(xRefTable *model.XRefTable, d types.Dict, dictName string, form bool, objNr, incr int) error { + fieldType := d.NameEntry("FT") + if fieldType == nil || *fieldType != "Sig" { + return nil + } + + sig := &model.Signature{Type: model.SigTypePage, ObjNr: objNr, Signed: d["V"] != nil, PageNr: xRefTable.CurPage} + if form { + sig.Type = model.SigTypeForm + } + + var dts bool + + if indRef := d.IndirectRefEntry("V"); indRef != nil { + sigDict, err := xRefTable.DereferenceDict(*indRef) + if err != nil { + return nil + } + if typ := sigDict.Type(); typ != nil { + if *typ == "DocTimeStamp" { + sig.Type = model.SigTypeDTS + dts = true + } + } + } + + arr, err := validateRectangleEntry(xRefTable, d, dictName, "Rect", REQUIRED, model.V10, nil) + if err != nil { + return err + } + r := types.RectForArray(arr) + sig.Visible = r.Visible() && !dts + + if _, ok := xRefTable.Signatures[incr]; !ok { + xRefTable.Signatures[incr] = map[int]model.Signature{} + } + if sig1, ok := xRefTable.Signatures[incr][sig.ObjNr]; !ok { + xRefTable.Signatures[incr][sig.ObjNr] = *sig + } else { + sig1.PageNr = xRefTable.CurPage + xRefTable.Signatures[incr][sig.ObjNr] = sig1 + } + + return nil +} + +func isTextField(ft *types.Name) bool { + return ft != nil && *ft == "Tx" +} + +func validateV(xRefTable *model.XRefTable, objNr, incr int, d types.Dict, dictName string, terminalNode, textField, oneKid bool) error { + _, err := validateEntry(xRefTable, d, dictName, "V", OPTIONAL, model.V10) + if err != nil { + return err + } + // Ignore kids if V is present + // if textField && v != nil && !terminalNode && !oneKid { + // return errors.New("\"V\" not allowed in non terminal text fields with more than one kid") + // } + if err := cacheSig(xRefTable, d, dictName, true, objNr, incr); err != nil { + return err + } + return nil +} + +func validateDV(xRefTable *model.XRefTable, d types.Dict, dictName string, terminalNode, textField, oneKid bool) error { + _, err := validateEntry(xRefTable, d, dictName, "DV", OPTIONAL, model.V10) + if err != nil { + return err + } + // Ignore kids if DV is present. + // if textField && dv != nil && !terminalNode && !oneKid { + // return errors.New("\"DV\" not allowed in non terminal text fields with more than one kid") + // } + return nil +} + +func validateFormFieldDictEntries(xRefTable *model.XRefTable, objNr, incr int, d types.Dict, terminalNode, oneKid bool, inFieldType *types.Name, requiresDA bool) (outFieldType *types.Name, hasDA bool, err error) { dictName := "formFieldDict" @@ -261,6 +344,8 @@ func validateFormFieldDictEntries(xRefTable *model.XRefTable, d types.Dict, term outFieldType = fieldType } + textField := isTextField(outFieldType) + // Parent, required if this is a child in the field hierarchy. _, err = validateIndRefEntry(xRefTable, d, dictName, "Parent", OPTIONAL, model.V10) if err != nil { @@ -292,14 +377,12 @@ func validateFormFieldDictEntries(xRefTable *model.XRefTable, d types.Dict, term } // V, optional, various - _, err = validateEntry(xRefTable, d, dictName, "V", OPTIONAL, model.V10) - if err != nil { + if err := validateV(xRefTable, objNr, incr, d, dictName, terminalNode, textField, oneKid); err != nil { return nil, false, err } // DV, optional, various - _, err = validateEntry(xRefTable, d, dictName, "DV", OPTIONAL, model.V10) - if err != nil { + if err := validateDV(xRefTable, d, dictName, terminalNode, textField, oneKid); err != nil { return nil, false, err } @@ -316,14 +399,14 @@ func validateFormFieldDictEntries(xRefTable *model.XRefTable, d types.Dict, term return outFieldType, hasDA, err } -func validateFormFieldParts(xRefTable *model.XRefTable, d types.Dict, inFieldType *types.Name, requiresDA bool) error { +func validateFormFieldParts(xRefTable *model.XRefTable, objNr, incr int, d types.Dict, inFieldType *types.Name, requiresDA bool) error { // dict represents a terminal field and must have Subtype "Widget" if _, err := validateNameEntry(xRefTable, d, "formFieldDict", "Subtype", REQUIRED, model.V10, func(s string) bool { return s == "Widget" }); err != nil { - return err + d["Subtype"] = types.Name("Widget") } // Validate field dict entries. - if _, _, err := validateFormFieldDictEntries(xRefTable, d, true, inFieldType, requiresDA); err != nil { + if _, _, err := validateFormFieldDictEntries(xRefTable, objNr, incr, d, true, false, inFieldType, requiresDA); err != nil { return err } @@ -332,29 +415,35 @@ func validateFormFieldParts(xRefTable *model.XRefTable, d types.Dict, inFieldTyp return err } -func validateFormFieldKids(xRefTable *model.XRefTable, d types.Dict, o types.Object, inFieldType *types.Name, requiresDA bool) error { +func validateFormFieldKids(xRefTable *model.XRefTable, objNr, incr int, d types.Dict, o types.Object, inFieldType *types.Name, requiresDA bool) error { var err error // dict represents a non terminal field. if d.Subtype() != nil && *d.Subtype() == "Widget" { - return errors.New("pdfcpu: validateFormFieldKids: non terminal field can not be widget annotation") + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.New("pdfcpu: validateFormFieldKids: non terminal field can not be widget annotation") + } + } + + a, err := xRefTable.DereferenceArray(o) + if err != nil { + return err } // Validate field entries. var xInFieldType *types.Name var hasDA bool - if xInFieldType, hasDA, err = validateFormFieldDictEntries(xRefTable, d, false, inFieldType, requiresDA); err != nil { + if xInFieldType, hasDA, err = validateFormFieldDictEntries(xRefTable, objNr, incr, d, false, len(a) == 1, inFieldType, requiresDA); err != nil { return err } if requiresDA && hasDA { requiresDA = false } - // Recurse over kids. - a, err := xRefTable.DereferenceArray(o) - if err != nil || a == nil { - return err + if len(a) == 0 { + return nil } + // Recurse over kids. for _, value := range a { ir, ok := value.(types.IndirectRef) if !ok { @@ -362,7 +451,11 @@ func validateFormFieldKids(xRefTable *model.XRefTable, d types.Dict, o types.Obj } valid, err := xRefTable.IsValid(ir) if err != nil { - return err + if xRefTable.ValidationMode == model.ValidationStrict { + return err + } + model.ShowSkipped(fmt.Sprintf("missing form field kid obj #%s", ir.ObjectNumber.String())) + valid = true } if !valid { @@ -376,7 +469,7 @@ func validateFormFieldKids(xRefTable *model.XRefTable, d types.Dict, o types.Obj } func validateFormFieldDict(xRefTable *model.XRefTable, ir types.IndirectRef, inFieldType *types.Name, requiresDA bool) error { - d, err := xRefTable.DereferenceDict(ir) + d, incr, err := xRefTable.DereferenceDictWithIncr(ir) if err != nil || d == nil { return err } @@ -391,21 +484,18 @@ func validateFormFieldDict(xRefTable *model.XRefTable, ir types.IndirectRef, inF return err } + objNr := ir.ObjectNumber.Value() + if o, ok := d.Find("Kids"); ok { - return validateFormFieldKids(xRefTable, d, o, inFieldType, requiresDA) + return validateFormFieldKids(xRefTable, objNr, incr, d, o, inFieldType, requiresDA) } - return validateFormFieldParts(xRefTable, d, inFieldType, requiresDA) + return validateFormFieldParts(xRefTable, objNr, incr, d, inFieldType, requiresDA) } -func validateFormFields(xRefTable *model.XRefTable, o types.Object, requiresDA bool) error { - - a, err := xRefTable.DereferenceArray(o) - if err != nil || len(a) == 0 { - return err - } +func validateFormFields(xRefTable *model.XRefTable, arr types.Array, requiresDA bool) error { - for _, value := range a { + for _, value := range arr { ir, ok := value.(types.IndirectRef) if !ok { @@ -414,7 +504,11 @@ func validateFormFields(xRefTable *model.XRefTable, o types.Object, requiresDA b valid, err := xRefTable.IsValid(ir) if err != nil { - return err + if xRefTable.ValidationMode == model.ValidationStrict { + return err + } + model.ShowSkipped(fmt.Sprintf("missing form field obj #%s", ir.ObjectNumber.String())) + valid = true } if !valid { @@ -428,7 +522,7 @@ func validateFormFields(xRefTable *model.XRefTable, o types.Object, requiresDA b return nil } -func validateFormCO(xRefTable *model.XRefTable, o types.Object, sinceVersion model.Version, requiresDA bool) error { +func validateFormCO(xRefTable *model.XRefTable, arr types.Array, sinceVersion model.Version, requiresDA bool) error { // see 12.6.3 Trigger Events // Array of indRefs to field dicts with calculation actions, since V1.3 @@ -439,7 +533,7 @@ func validateFormCO(xRefTable *model.XRefTable, o types.Object, sinceVersion mod return err } - return validateFormFields(xRefTable, o, requiresDA) + return validateFormFields(xRefTable, arr, requiresDA) } func validateFormXFA(xRefTable *model.XRefTable, d types.Dict, sinceVersion model.Version) error { @@ -513,7 +607,12 @@ func validateFormEntryCO(xRefTable *model.XRefTable, d types.Dict, sinceVersion return nil } - return validateFormCO(xRefTable, o, sinceVersion, requiresDA) + arr, err := xRefTable.DereferenceArray(o) + if err != nil || len(arr) == 0 { + return err + } + + return validateFormCO(xRefTable, arr, sinceVersion, requiresDA) } func validateFormEntryDR(xRefTable *model.XRefTable, d types.Dict) error { @@ -528,6 +627,50 @@ func validateFormEntryDR(xRefTable *model.XRefTable, d types.Dict) error { return err } +func validateFormEntries(xRefTable *model.XRefTable, d types.Dict, dictName string, requiresDA bool, sinceVersion model.Version) error { + // NeedAppearances: optional, boolean + _, err := validateBooleanEntry(xRefTable, d, dictName, "NeedAppearances", OPTIONAL, model.V10, nil) + if err != nil { + return err + } + + // SigFlags: optional, since 1.3, integer + sinceV := model.V13 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceV = model.V12 + } + sf, err := validateIntegerEntry(xRefTable, d, dictName, "SigFlags", OPTIONAL, sinceV, nil) + if err != nil { + return err + } + if sf != nil { + i := sf.Value() + xRefTable.SignatureExist = i&1 > 0 + xRefTable.AppendOnly = i&2 > 0 + } + + // CO: array + err = validateFormEntryCO(xRefTable, d, model.V13, requiresDA) + if err != nil { + return err + } + + // DR, optional, resource dict + err = validateFormEntryDR(xRefTable, d) + if err != nil { + return err + } + + // Q: optional, integer + _, err = validateIntegerEntry(xRefTable, d, dictName, "Q", OPTIONAL, model.V10, validateQ) + if err != nil { + return err + } + + // XFA: optional, since 1.5, stream or array + return validateFormXFA(xRefTable, d, sinceVersion) +} + func validateForm(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { // => 12.7.2 Interactive Form Dictionary @@ -550,6 +693,16 @@ func validateForm(xRefTable *model.XRefTable, rootDict types.Dict, required bool return nil } + arr, err := xRefTable.DereferenceArray(o) + if err != nil { + return err + } + if len(arr) == 0 { + // Fix empty AcroForm dict. + rootDict.Delete("AcroForm") + return nil + } + xRefTable.Form = d dictName := "acroFormDict" @@ -570,50 +723,136 @@ func validateForm(xRefTable *model.XRefTable, rootDict types.Dict, required bool requiresDA := da == nil || len(*da) == 0 - err = validateFormFields(xRefTable, o, requiresDA) + err = validateFormFields(xRefTable, arr, requiresDA) if err != nil { return err } - // NeedAppearances: optional, boolean - _, err = validateBooleanEntry(xRefTable, d, dictName, "NeedAppearances", OPTIONAL, model.V10, nil) - if err != nil { - return err + return validateFormEntries(xRefTable, d, dictName, requiresDA, sinceVersion) +} + +func locateAnnForAPAndRect(d types.Dict, r *types.Rectangle, pageAnnots map[int]model.PgAnnots) *types.IndirectRef { + if indRef1 := d.IndirectRefEntry("AP"); indRef1 != nil { + apObjNr := indRef1.ObjectNumber.Value() + for _, m := range pageAnnots { + annots, ok := m[model.AnnWidget] + if ok { + for objNr, annRend := range annots.Map { + if objNr > 0 { + if annRend.RectString() == r.ShortString() && annRend.APObjNrInt() == apObjNr { + return types.NewIndirectRef(objNr, 0) + } + } + } + } + } } + return nil +} - // SigFlags: optional, since 1.3, integer - sinceV := model.V13 - if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceV = model.V12 +func pageAnnotIndRefForAcroField(xRefTable *model.XRefTable, indRef types.IndirectRef) (*types.IndirectRef, error) { + + // indRef should be part of a page annotation dict. + + for _, m := range xRefTable.PageAnnots { + annots, ok := m[model.AnnWidget] + if ok { + for _, ir := range *annots.IndRefs { + if ir == indRef { + return &ir, nil + } + } + } } - sf, err := validateIntegerEntry(xRefTable, d, dictName, "SigFlags", OPTIONAL, sinceV, nil) + + // form field is duplicated, retrieve corresponding page annotation for Rect, AP + + d, err := xRefTable.DereferenceDict(indRef) if err != nil { - return err + return nil, err } - if sf != nil { - i := sf.Value() - xRefTable.SignatureExist = i&1 > 0 - xRefTable.AppendOnly = i&2 > 0 + + arr, err := xRefTable.DereferenceArray(d["Rect"]) + if err != nil { + return nil, err + } + if arr == nil { + // Assumption: There are kids and the kids are allright. + return &indRef, nil } - // CO: arra - err = validateFormEntryCO(xRefTable, d, model.V13, requiresDA) + r, err := xRefTable.RectForArray(arr) if err != nil { - return err + return nil, err } - // DR, optional, resource dict - err = validateFormEntryDR(xRefTable, d) + // Possible orphan sig field dicts. + if ft := d.NameEntry("FT"); ft != nil && *ft == "Sig" { + // Signature Field + if _, ok := d.Find("V"); !ok { + // without linked sig dict (unsigned) + return &indRef, nil + } + // signed but invisible + if !r.Visible() { + return &indRef, nil + } + } + + if indRef := locateAnnForAPAndRect(d, r, xRefTable.PageAnnots); indRef != nil { + return indRef, nil + } + + return &indRef, nil + //return nil, errors.Errorf("pdfcpu: can't repair form field: %d\n", indRef.ObjectNumber.Value()) +} + +func fixFormFieldsArray(xRefTable *model.XRefTable, arr types.Array) (types.Array, error) { + arr1 := types.Array{} + for _, obj := range arr { + indRef, err := pageAnnotIndRefForAcroField(xRefTable, obj.(types.IndirectRef)) + if err != nil { + return nil, err + } + arr1 = append(arr1, *indRef) + } + return arr1, nil +} + +func validateFormFieldsAgainstPageAnnotations(xRefTable *model.XRefTable) error { + o, found := xRefTable.Form.Find("Fields") + if !found { + return nil + } + + indRef, ok := o.(types.IndirectRef) + if !ok { + arr, ok := o.(types.Array) + if !ok { + return errors.New("pdfcpu: invalid array object") + } + arr, err := fixFormFieldsArray(xRefTable, arr) + if err != nil { + return err + } + indRef, err := xRefTable.IndRefForNewObject(arr) + if err != nil { + return err + } + xRefTable.Form["Fields"] = *indRef + return nil + } + + arr, err := xRefTable.DereferenceArray(o) if err != nil { return err } - - // Q: optional, integer - _, err = validateIntegerEntry(xRefTable, d, dictName, "Q", OPTIONAL, model.V10, validateQ) + arr, err = fixFormFieldsArray(xRefTable, arr) if err != nil { return err } + entry, _ := xRefTable.FindTableEntryForIndRef(&indRef) + entry.Object = arr - // XFA: optional, since 1.5, stream or array - return validateFormXFA(xRefTable, d, sinceVersion) + return nil } diff --git a/pkg/pdfcpu/validate/info.go b/pkg/pdfcpu/validate/info.go index 551ed64d..92557176 100644 --- a/pkg/pdfcpu/validate/info.go +++ b/pkg/pdfcpu/validate/info.go @@ -17,8 +17,8 @@ limitations under the License. package validate import ( + "fmt" "strings" - "unicode/utf8" "github.com/angel-one/pdfcpu/pkg/log" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" @@ -28,30 +28,19 @@ import ( // DocumentProperty ensures a property name that may be modified. func DocumentProperty(s string) bool { - return !types.MemberOf(s, []string{"Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped"}) + return !types.MemberOf(s, []string{"Keywords", "Producer", "CreationDate", "ModDate", "Trapped"}) } -func handleDefault(xRefTable *model.XRefTable, o types.Object) (string, error) { - - s, err := xRefTable.DereferenceStringOrHexLiteral(o, model.V10, nil) - if err == nil { - return s, nil +func validateInfoDictDate(xRefTable *model.XRefTable, name string, o types.Object) (string, error) { + s, err := validateDateObject(xRefTable, o, model.V10) + if err != nil && xRefTable.ValidationMode == model.ValidationRelaxed { + err = nil + model.ShowRepaired(fmt.Sprintf("info dict \"%s\"", name)) } - - if xRefTable.ValidationMode == model.ValidationStrict { - return "", err - } - - _, err = xRefTable.Dereference(o) - return "", err -} - -func validateInfoDictDate(xRefTable *model.XRefTable, o types.Object) (s string, err error) { - return validateDateObject(xRefTable, o, model.V10) + return s, err } func validateInfoDictTrapped(xRefTable *model.XRefTable, o types.Object) error { - sinceVersion := model.V13 validate := func(s string) bool { return types.MemberOf(s, []string{"True", "False", "Unknown"}) } @@ -75,16 +64,40 @@ func validateInfoDictTrapped(xRefTable *model.XRefTable, o types.Object) error { } func handleProperties(xRefTable *model.XRefTable, key string, val types.Object) error { - if !utf8.ValidString(key) { - key = types.CP1252ToUTF8(key) + v, err := xRefTable.DereferenceStringOrHexLiteral(val, model.V10, nil) + if err != nil { + if xRefTable.ValidationMode == model.ValidationStrict { + return err + } + _, err = xRefTable.Dereference(val) + return err + } + + if v != "" { + + k, err := types.DecodeName(key) + if err != nil { + return err + } + + xRefTable.Properties[k] = v } - s, err := handleDefault(xRefTable, val) + + return nil +} + +func validateKeywords(xRefTable *model.XRefTable, v types.Object) (err error) { + xRefTable.Keywords, err = xRefTable.DereferenceStringOrHexLiteral(v, model.V10, nil) if err != nil { return err } - if s != "" { - xRefTable.Properties[key] = s + + ss := strings.FieldsFunc(xRefTable.Keywords, func(c rune) bool { return c == ',' || c == ';' || c == '\r' }) + for _, s := range ss { + keyword := strings.TrimSpace(s) + xRefTable.KeywordList[keyword] = true } + return nil } @@ -98,7 +111,7 @@ func validateDocInfoDictEntry(xRefTable *model.XRefTable, k string, v types.Obje // text string, opt, since V1.1 case "Title": - xRefTable.Title, err = xRefTable.DereferenceStringOrHexLiteral(v, model.V11, nil) + xRefTable.Title, err = xRefTable.DereferenceStringOrHexLiteral(v, model.V10, nil) // text string, optional case "Author": @@ -106,11 +119,13 @@ func validateDocInfoDictEntry(xRefTable *model.XRefTable, k string, v types.Obje // text string, optional, since V1.1 case "Subject": - xRefTable.Subject, err = xRefTable.DereferenceStringOrHexLiteral(v, model.V11, nil) + xRefTable.Subject, err = xRefTable.DereferenceStringOrHexLiteral(v, model.V10, nil) // text string, optional, since V1.1 case "Keywords": - xRefTable.Keywords, err = xRefTable.DereferenceStringOrHexLiteral(v, model.V11, nil) + if err := validateKeywords(xRefTable, v); err != nil { + return hasModDate, err + } // text string, optional case "Creator": @@ -122,20 +137,20 @@ func validateDocInfoDictEntry(xRefTable *model.XRefTable, k string, v types.Obje // date, optional case "CreationDate": - xRefTable.CreationDate, err = validateInfoDictDate(xRefTable, v) - if err != nil && xRefTable.ValidationMode == model.ValidationRelaxed { - err = nil - } + xRefTable.CreationDate, err = validateInfoDictDate(xRefTable, "CreationDate", v) // date, required if PieceInfo is present in document catalog. case "ModDate": hasModDate = true - xRefTable.ModDate, err = validateInfoDictDate(xRefTable, v) + xRefTable.ModDate, err = validateInfoDictDate(xRefTable, "ModDate", v) // name, optional, since V1.3 case "Trapped": err = validateInfoDictTrapped(xRefTable, v) + case "AAPL:Keywords": + xRefTable.CustomExtensions = true + // text string, optional default: err = handleProperties(xRefTable, k, v) @@ -147,9 +162,13 @@ func validateDocInfoDictEntry(xRefTable *model.XRefTable, k string, v types.Obje func validateDocumentInfoDict(xRefTable *model.XRefTable, obj types.Object) (bool, error) { // Document info object is optional. d, err := xRefTable.DereferenceDict(obj) - if err != nil || d == nil { + if err != nil { return false, err } + if d == nil { + xRefTable.Info = nil + return false, nil + } hasModDate := false @@ -175,7 +194,6 @@ func validateDocumentInfoDict(xRefTable *model.XRefTable, obj types.Object) (boo } func validateDocumentInfoObject(xRefTable *model.XRefTable) error { - // Document info object is optional. if xRefTable.Info == nil { return nil @@ -187,7 +205,12 @@ func validateDocumentInfoObject(xRefTable *model.XRefTable) error { hasModDate, err := validateDocumentInfoDict(xRefTable, *xRefTable.Info) if err != nil { - return err + if xRefTable.ValidationMode != model.ValidationRelaxed || !strings.Contains(err.Error(), "wrong type") { + return err + } + xRefTable.Info = nil + model.ShowSkipped("invalid info dict") + return nil } hasPieceInfo, err := xRefTable.CatalogHasPieceInfo() @@ -196,7 +219,10 @@ func validateDocumentInfoObject(xRefTable *model.XRefTable) error { } if hasPieceInfo && !hasModDate { - return errors.Errorf("validateDocumentInfoObject: missing required entry \"ModDate\"") + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.Errorf("validateDocumentInfoObject: missing required entry \"ModDate\"") + } + model.ShowDigestedSpecViolation("infoDict with \"PieceInfo\" but missing \"ModDate\"") } if log.ValidateEnabled() { diff --git a/pkg/pdfcpu/validate/metaData.go b/pkg/pdfcpu/validate/metaData.go index 2f4cd7a2..ff940b3a 100644 --- a/pkg/pdfcpu/validate/metaData.go +++ b/pkg/pdfcpu/validate/metaData.go @@ -18,7 +18,6 @@ package validate import ( "encoding/xml" - "fmt" "strings" "time" @@ -27,93 +26,19 @@ import ( "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" ) -const userDateFormatNoTimeZone = "2006-01-02T15:04:05Z" -const userDateFormatNegTimeZone = "2006-01-02T15:04:05-07:00" -const userDateFormatPosTimeZone = "2006-01-02T15:04:05+07:00" - -type userDate time.Time - -func (ud *userDate) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { - dateString := "" - err := d.DecodeElement(&dateString, &start) - if err != nil { - return err - } - dat, err := time.Parse(userDateFormatNoTimeZone, dateString) - if err == nil { - *ud = userDate(dat) - return nil - } - dat, err = time.Parse(userDateFormatPosTimeZone, dateString) - if err == nil { - *ud = userDate(dat) - return nil - } - dat, err = time.Parse(userDateFormatNegTimeZone, dateString) - if err == nil { - *ud = userDate(dat) - return nil - } - return err -} - -type Alt struct { - //XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Alt"` - Entries []string `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# li"` -} - -type Seq struct { - //XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Seq"` - Entries []string `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# li"` -} - -type Title struct { - //XMLName xml.Name `xml:"http://purl.org/dc/elements/1.1/ title"` - Alt Alt `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Alt"` -} - -type Desc struct { - //XMLName xml.Name `xml:"http://purl.org/dc/elements/1.1/ description"` - Alt Alt `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Alt"` -} - -type Creator struct { - //XMLName xml.Name `xml:"http://purl.org/dc/elements/1.1/ creator"` - Seq Seq `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Seq"` -} - -type Description struct { - //XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# Description"` - Title Title `xml:"http://purl.org/dc/elements/1.1/ title"` - Author Creator `xml:"http://purl.org/dc/elements/1.1/ creator"` - Subject Desc `xml:"http://purl.org/dc/elements/1.1/ description"` - Creator string `xml:"http://ns.adobe.com/xap/1.0/ CreatorTool"` - CreationDate userDate `xml:"http://ns.adobe.com/xap/1.0/ CreateDate"` - ModDate userDate `xml:"http://ns.adobe.com/xap/1.0/ ModifyDate"` - Producer string `xml:"http://ns.adobe.com/pdf/1.3/ Producer"` - Trapped bool `xml:"http://ns.adobe.com/pdf/1.3/ Trapped"` - Keywords string `xml:"http://ns.adobe.com/pdf/1.3/ Keywords"` -} - -type RDF struct { - XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# RDF"` - Description Description -} - -type XMPMeta struct { - XMLName xml.Name `xml:"adobe:ns:meta/ xmpmeta"` - RDF RDF -} - func validateMetadataStream(xRefTable *model.XRefTable, d types.Dict, required bool, sinceVersion model.Version) (*types.StreamDict, error) { if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V13 + sinceVersion = model.V10 } sd, err := validateStreamDictEntry(xRefTable, d, "dict", "Metadata", required, sinceVersion, nil) - if err != nil || sd == nil { + if err != nil { return nil, err } + if sd == nil { + delete(d, "Metadata") + return nil, nil + } dictName := "metaDataDict" @@ -139,31 +64,46 @@ func validateMetadata(xRefTable *model.XRefTable, d types.Dict, required bool, s return err } -func validateRootMetadata(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { +func catalogMetaData(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) (*model.XMPMeta, error) { sd, err := validateMetadataStream(xRefTable, rootDict, required, sinceVersion) if err != nil || sd == nil { - return err + return nil, err } - if xRefTable.Version() < model.V20 { - return nil - } + // if xRefTable.Version() < model.V20 { + // return nil + // } // Decode streamDict for supported filters only. - if err := sd.Decode(); err == filter.ErrUnsupportedFilter { - return nil + err = sd.Decode() + if err == filter.ErrUnsupportedFilter { + return nil, nil } if err != nil { - return err + return nil, err } - x := XMPMeta{} + x := model.XMPMeta{} if err = xml.Unmarshal(sd.Content, &x); err != nil { - fmt.Printf("error: %v", err) - return err + if xRefTable.ValidationMode == model.ValidationStrict { + return nil, err + } + model.ShowSkipped("metadata parse error") + return nil, nil + } + + return &x, nil +} + +func validateRootMetadata(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { + + if xRefTable.CatalogXMPMeta == nil { + return nil } + x := xRefTable.CatalogXMPMeta + // fmt.Printf(" Title: %v\n", x.RDF.Description.Title.Alt.Entries) // fmt.Printf(" Author: %v\n", x.RDF.Description.Author.Seq.Entries) // fmt.Printf(" Subject: %v\n", x.RDF.Description.Subject.Alt.Entries) @@ -183,7 +123,12 @@ func validateRootMetadata(xRefTable *model.XRefTable, rootDict types.Dict, requi xRefTable.ModDate = time.Time(d.ModDate).Format(time.RFC3339Nano) xRefTable.Producer = d.Producer //xRefTable.Trapped = d.Trapped - xRefTable.Keywords = d.Keywords + + ss := strings.FieldsFunc(d.Keywords, func(c rune) bool { return c == ',' || c == ';' || c == '\r' }) + for _, s := range ss { + keyword := strings.TrimSpace(s) + xRefTable.KeywordList[keyword] = true + } return nil } diff --git a/pkg/pdfcpu/validate/nameTree.go b/pkg/pdfcpu/validate/nameTree.go index 5cf4d26f..a2cc2223 100644 --- a/pkg/pdfcpu/validate/nameTree.go +++ b/pkg/pdfcpu/validate/nameTree.go @@ -430,7 +430,7 @@ func validateEmbeddedFilesNameTreeValue(xRefTable *model.XRefTable, o types.Obje // Version check if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V13 + sinceVersion = model.V11 } err := xRefTable.ValidateVersion("EmbeddedFilesNameTreeValue", sinceVersion) if err != nil { @@ -559,23 +559,28 @@ func validateNameTreeValue(name string, xRefTable *model.XRefTable, o types.Obje // Other PDF objects (nulls, numbers, booleans, and names) should be specified as direct objects. for k, v := range map[string]struct { - validate func(xRefTable *model.XRefTable, o types.Object, sinceVersion model.Version) error - sinceVersion model.Version + validate func(xRefTable *model.XRefTable, o types.Object, sinceVersion model.Version) error + sinceVersion model.Version + sinceVersionRelaxed model.Version }{ - "Dests": {validateDestsNameTreeValue, model.V12}, - "AP": {validateAPNameTreeValue, model.V13}, - "JavaScript": {validateJavaScriptNameTreeValue, model.V13}, - "Pages": {validatePagesNameTreeValue, model.V13}, - "Templates": {validateTemplatesNameTreeValue, model.V13}, - "IDS": {validateIDSNameTreeValue, model.V13}, - "URLS": {validateURLSNameTreeValue, model.V13}, - "EmbeddedFiles": {validateEmbeddedFilesNameTreeValue, model.V14}, - "AlternatePresentations": {validateAlternatePresentationsNameTreeValue, model.V14}, - "Renditions": {validateRenditionsNameTreeValue, model.V15}, - "IDTree": {validateIDTreeValue, model.V13}, + "Dests": {validateDestsNameTreeValue, model.V12, model.V12}, + "AP": {validateAPNameTreeValue, model.V13, model.V13}, + "JavaScript": {validateJavaScriptNameTreeValue, model.V13, model.V13}, + "Pages": {validatePagesNameTreeValue, model.V13, model.V13}, + "Templates": {validateTemplatesNameTreeValue, model.V13, model.V13}, + "IDS": {validateIDSNameTreeValue, model.V13, model.V13}, + "URLS": {validateURLSNameTreeValue, model.V13, model.V13}, + "EmbeddedFiles": {validateEmbeddedFilesNameTreeValue, model.V14, model.V11}, + "AlternatePresentations": {validateAlternatePresentationsNameTreeValue, model.V14, model.V14}, + "Renditions": {validateRenditionsNameTreeValue, model.V15, model.V15}, + "IDTree": {validateIDTreeValue, model.V13, model.V13}, } { if name == k { - return v.validate(xRefTable, o, v.sinceVersion) + sinceVersion := v.sinceVersion + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = v.sinceVersionRelaxed + } + return v.validate(xRefTable, o, sinceVersion) } } @@ -653,31 +658,42 @@ func validateNameTreeDictLimitsEntry(xRefTable *model.XRefTable, d types.Dict, f return err } - var fkv, lkv string - o, err := xRefTable.Dereference(a[0]) if err != nil { return err } - s, err := types.StringOrHexLiteral(o) if err != nil { return err } - fkv = *s + fkv := *s - if o, err = xRefTable.Dereference(a[1]); err != nil { + o, err = xRefTable.Dereference(a[1]) + if err != nil { return err } - s, err = types.StringOrHexLiteral(o) if err != nil { return err } - lkv = *s + lkv := *s + + if xRefTable.ValidationMode == model.ValidationRelaxed { + + if fkv != firstKey && xRefTable.ValidationMode == model.ValidationRelaxed { + fkv = firstKey + a[0] = types.StringLiteral(fkv) + } + + if lkv != lastKey && xRefTable.ValidationMode == model.ValidationRelaxed { + lkv = lastKey + a[1] = types.StringLiteral(lkv) + } + + } - if firstKey < fkv || lastKey > lkv { - return errors.Errorf("pdfcpu: validateNameTreeDictLimitsEntry: leaf node corrupted (firstKey: %s vs %s) (lastKey: %s vs %s)\n", firstKey, fkv, lastKey, lkv) + if firstKey != fkv || lastKey != lkv { + return errors.Errorf("pdfcpu: validateNameTreeDictLimitsEntry: invalid leaf node (firstKey: %s vs %s) (lastKey: %s vs %s)\n", firstKey, fkv, lastKey, lkv) } return nil @@ -708,8 +724,11 @@ func validateNameTree(xRefTable *model.XRefTable, name string, d types.Dict, roo return "", "", nil, err } - if a == nil { - return "", "", nil, errors.New("pdfcpu: validateNameTree: missing \"Kids\" array") + if len(a) == 0 { + if xRefTable.ValidationMode == model.ValidationStrict { + return "", "", nil, errors.New("pdfcpu: validateNameTree: missing \"Kids\" array") + } + return "", "", nil, nil } for _, o := range a { @@ -723,7 +742,10 @@ func validateNameTree(xRefTable *model.XRefTable, name string, d types.Dict, roo var kidNode *model.Node kminKid, kmax, kidNode, err = validateNameTree(xRefTable, name, d, false) if err != nil { - return "", "", nil, err + if xRefTable.ValidationMode == model.ValidationStrict { + return "", "", nil, err + } + continue } if kmin == "" { kmin = kminKid diff --git a/pkg/pdfcpu/validate/numberTree.go b/pkg/pdfcpu/validate/numberTree.go index 4e2e96e1..b821937f 100644 --- a/pkg/pdfcpu/validate/numberTree.go +++ b/pkg/pdfcpu/validate/numberTree.go @@ -17,6 +17,8 @@ limitations under the License. package validate import ( + "fmt" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -79,7 +81,12 @@ func validateNumberTreeDictNumsEntry(xRefTable *model.XRefTable, d types.Dict, n // arr length needs to be even because of contained key value pairs. if len(a)%2 == 1 { - return 0, 0, errors.Errorf("pdfcpu: validateNumberTreeDictNumsEntry: Nums array entry length needs to be even, length=%d\n", len(a)) + if xRefTable.ValidationMode == model.ValidationStrict { + return 0, 0, errors.Errorf("pdfcpu: validateNumberTreeDictNumsEntry: Nums array entry length needs to be even, length=%d\n", len(a)) + } + model.ShowDigestedSpecViolation("number tree \"Num\" entry array length needs to be even") + model.ShowSkipped("invalid number tree") + return 0, 0, nil } // every other entry is a value @@ -137,11 +144,22 @@ func validateNumberTreeDictLimitsEntry(xRefTable *model.XRefTable, d types.Dict, return err } - fk, _ := a[0].(types.Integer) - lk, _ := a[1].(types.Integer) + fk := 0 + if a[0] != nil { + fk = a[0].(types.Integer).Value() + } + + lk := 0 + if a[1] != nil { + lk = a[1].(types.Integer).Value() + } - if firstKey < fk.Value() || lastKey > lk.Value() { - return errors.Errorf("pdfcpu: validateNumberTreeDictLimitsEntry: leaf node corrupted: firstKey(%d vs. %d) lastKey(%d vs. %d)\n", firstKey, fk.Value(), lastKey, lk.Value()) + if firstKey < fk || lastKey > lk { + msg := fmt.Sprintf("validateNumberTreeDictLimitsEntry: invalid leaf node: firstKey(%d vs. %d) lastKey(%d vs. %d)", firstKey, fk, lastKey, lk) + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.Errorf("pdfcpu: %s\n", msg) + } + model.ShowDigestedSpecViolation(msg) } return nil diff --git a/pkg/pdfcpu/validate/object.go b/pkg/pdfcpu/validate/object.go index 7f8e74b4..1ec52fc0 100644 --- a/pkg/pdfcpu/validate/object.go +++ b/pkg/pdfcpu/validate/object.go @@ -70,7 +70,7 @@ func validateArrayEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entr log.Validate.Printf("validateArrayEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return nil, err } @@ -116,7 +116,7 @@ func validateBooleanEntry(xRefTable *model.XRefTable, d types.Dict, dictName, en log.Validate.Printf("validateBooleanEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return nil, err } @@ -213,6 +213,24 @@ func validateBooleanArrayEntry(xRefTable *model.XRefTable, d types.Dict, dictNam return a, nil } +func timeOfDateObject(xRefTable *model.XRefTable, o types.Object, sinceVersion model.Version) (*time.Time, error) { + s, err := xRefTable.DereferenceStringOrHexLiteral(o, sinceVersion, nil) + if err != nil { + return nil, err + } + + if s == "" { + return nil, nil + } + + t, ok := types.DateTime(s, xRefTable.ValidationMode == model.ValidationRelaxed) + if !ok { + return nil, errors.Errorf("pdfcpu: validateDateObject: <%s> invalid date", s) + } + + return &t, nil +} + func validateDateObject(xRefTable *model.XRefTable, o types.Object, sinceVersion model.Version) (string, error) { s, err := xRefTable.DereferenceStringOrHexLiteral(o, sinceVersion, nil) if err != nil { @@ -236,7 +254,7 @@ func validateDateEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entry log.Validate.Printf("validateDateEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return nil, err } @@ -273,7 +291,7 @@ func validateDictEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entry log.Validate.Printf("validateDictEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return nil, err } @@ -303,7 +321,7 @@ func validateDictEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entry } // Validation - if validate != nil && !validate(d) { + if validate != nil && len(d) > 0 && !validate(d) { return nil, errors.Errorf("validateDictEntry: dict=%s entry=%s invalid dict entry", dictName, entryName) } @@ -373,7 +391,7 @@ func validateFunctionOrArrayOfFunctionsEntry(xRefTable *model.XRefTable, d types log.Validate.Printf("validateFunctionOrArrayOfFunctionsEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return err } @@ -431,7 +449,7 @@ func validateIndRefEntry(xRefTable *model.XRefTable, d types.Dict, dictName, ent log.Validate.Printf("validateIndRefEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return nil, err } @@ -464,6 +482,9 @@ func validateIndRefArrayEntry(xRefTable *model.XRefTable, d types.Dict, dictName } for i, o := range a { + if o == nil { + continue + } if _, ok := o.(types.IndirectRef); !ok { return nil, errors.Errorf("pdfcpu: validateIndRefArrayEntry: invalid type at index %d\n", i) } @@ -512,7 +533,7 @@ func validateIntegerEntry(xRefTable *model.XRefTable, d types.Dict, dictName, en log.Validate.Printf("validateIntegerEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return nil, err } @@ -662,7 +683,7 @@ func validateNameEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entry log.Validate.Printf("validateNameEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return nil, err } @@ -694,7 +715,7 @@ func validateNameEntry(xRefTable *model.XRefTable, d types.Dict, dictName, entry // Validation v := name.Value() if validate != nil && (required || len(v) > 0) && !validate(v) { - return nil, errors.Errorf("pdfcpu: validateNameEntry: dict=%s entry=%s invalid dict entry: %s", dictName, entryName, v) + return &name, errors.Errorf("pdfcpu: validateNameEntry: dict=%s entry=%s invalid dict entry: %s", dictName, entryName, v) } if log.ValidateEnabled() { @@ -811,7 +832,7 @@ func validateNumberEntry(xRefTable *model.XRefTable, d types.Dict, dictName, ent log.Validate.Printf("validateNumberEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return nil, err } @@ -984,10 +1005,20 @@ func validateStreamDictEntry(xRefTable *model.XRefTable, d types.Dict, dictName, log.Validate.Printf("validateStreamDictEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) - if err != nil || o == nil { + o, found, err := d.Entry(dictName, entryName, required) + if err != nil { return nil, err } + if o == nil { + if !found { + return nil, nil + } + if xRefTable.ValidationMode == model.ValidationStrict { + return nil, errors.Errorf("pdfcpu: validateStreamDictEntry: dict=%s optional entry=%s is corrupt", dictName, entryName) + } + delete(d, entryName) + model.ShowRepaired("root dict \"Metadata\"") + } sd, valid, err := xRefTable.DereferenceStreamDict(o) if valid { @@ -1042,7 +1073,7 @@ func validateStringEntry(xRefTable *model.XRefTable, d types.Dict, dictName, ent log.Validate.Printf("validateStringEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return nil, err } @@ -1169,7 +1200,7 @@ func validateStringOrStreamEntry(xRefTable *model.XRefTable, d types.Dict, dictN log.Validate.Printf("validateStringOrStreamEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return err } @@ -1214,7 +1245,7 @@ func validateNameOrStringEntry(xRefTable *model.XRefTable, d types.Dict, dictNam log.Validate.Printf("validateNameOrStringEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return err } @@ -1259,7 +1290,7 @@ func validateIntOrStringEntry(xRefTable *model.XRefTable, d types.Dict, dictName log.Validate.Printf("validateIntOrStringEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return err } @@ -1304,7 +1335,7 @@ func validateBooleanOrStreamEntry(xRefTable *model.XRefTable, d types.Dict, dict log.Validate.Printf("validateBooleanOrStreamEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return err } @@ -1349,7 +1380,7 @@ func validateStreamDictOrDictEntry(xRefTable *model.XRefTable, d types.Dict, dic log.Validate.Printf("validateStreamDictOrDictEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return err } @@ -1429,7 +1460,7 @@ func validateIntegerOrArrayOfIntegerEntry(xRefTable *model.XRefTable, d types.Di log.Validate.Printf("validateIntegerOrArrayOfIntegerEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return err } @@ -1502,7 +1533,7 @@ func validateNameOrArrayOfNameEntry(xRefTable *model.XRefTable, d types.Dict, di log.Validate.Printf("validateNameOrArrayOfNameEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return err } @@ -1574,7 +1605,7 @@ func validateBooleanOrArrayOfBooleanEntry(xRefTable *model.XRefTable, d types.Di log.Validate.Printf("validateBooleanOrArrayOfBooleanEntry begin: entry=%s\n", entryName) } - o, err := d.Entry(dictName, entryName, required) + o, _, err := d.Entry(dictName, entryName, required) if err != nil || o == nil { return err } diff --git a/pkg/pdfcpu/validate/optionalContent.go b/pkg/pdfcpu/validate/optionalContent.go index 8fe9ef47..c21ee382 100644 --- a/pkg/pdfcpu/validate/optionalContent.go +++ b/pkg/pdfcpu/validate/optionalContent.go @@ -190,7 +190,7 @@ func validateOCGs(xRefTable *model.XRefTable, d types.Dict, dictName, entryName // see 8.11.2.2 - o, err := d.Entry(dictName, entryName, OPTIONAL) + o, _, err := d.Entry(dictName, entryName, OPTIONAL) if err != nil || o == nil { return err } diff --git a/pkg/pdfcpu/validate/outlineTree.go b/pkg/pdfcpu/validate/outlineTree.go index 52de75b1..34bbcbfd 100644 --- a/pkg/pdfcpu/validate/outlineTree.go +++ b/pkg/pdfcpu/validate/outlineTree.go @@ -22,35 +22,53 @@ import ( "github.com/pkg/errors" ) -func validateOutlineItemDict(xRefTable *model.XRefTable, d types.Dict) error { - dictName := "outlineItemDict" +var ErrBookmarksRepair = errors.New("pdfcpu: bookmarks repair failed") - // Title, required, text string +func validateOutlineItemDictTitle(xRefTable *model.XRefTable, d types.Dict, dictName string) error { _, err := validateStringEntry(xRefTable, d, dictName, "Title", REQUIRED, model.V10, nil) if err != nil { - return err + if xRefTable.ValidationMode == model.ValidationStrict { + return err + } + if _, err := validateNameEntry(xRefTable, d, dictName, "Title", REQUIRED, model.V10, nil); err != nil { + return err + } } + return nil +} - // fmt.Printf("Title: %s\n", *title) - - // Parent, required, dict indRef - ir, err := validateIndRefEntry(xRefTable, d, dictName, "Parent", REQUIRED, model.V10) +func validateOutlineItemDictParent(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + required := REQUIRED + if xRefTable.ValidationMode == model.ValidationRelaxed { + required = OPTIONAL + } + ir, err := validateIndRefEntry(xRefTable, d, dictName, "Parent", required, model.V10) if err != nil { return err } - _, err = xRefTable.DereferenceDict(*ir) - if err != nil { + if ir != nil { + if _, err = xRefTable.DereferenceDict(*ir); err != nil { + return err + } + } + return nil +} + +func validateOutlineItemDict(xRefTable *model.XRefTable, d types.Dict) error { + dictName := "outlineItemDict" + + // Title, required, text string + if err := validateOutlineItemDictTitle(xRefTable, d, dictName); err != nil { return err } - // // Count, optional, int - // _, err = validateIntegerEntry(xRefTable, d, dictName, "Count", OPTIONAL, model.V10, nil) - // if err != nil { - // return err - // } + // Parent, required, dict indRef + if err := validateOutlineItemDictParent(xRefTable, d, dictName); err != nil { + return err + } // SE, optional, dict indRef, since V1.3 - ir, err = validateIndRefEntry(xRefTable, d, dictName, "SE", OPTIONAL, model.V13) + ir, err := validateIndRefEntry(xRefTable, d, dictName, "SE", OPTIONAL, model.V13) if err != nil { return err } @@ -62,19 +80,32 @@ func validateOutlineItemDict(xRefTable *model.XRefTable, d types.Dict) error { } // C, optional, array of 3 numbers, since V1.4 - _, err = validateNumberArrayEntry(xRefTable, d, dictName, "C", OPTIONAL, model.V14, func(a types.Array) bool { return len(a) == 3 }) - if err != nil { + version := model.V14 + if xRefTable.ValidationMode == model.ValidationRelaxed { + version = model.V13 + } + if _, err = validateNumberArrayEntry(xRefTable, d, dictName, "C", OPTIONAL, version, func(a types.Array) bool { return len(a) == 3 }); err != nil { return err } // F, optional integer, since V1.4 - _, err = validateIntegerEntry(xRefTable, d, dictName, "F", OPTIONAL, model.V14, nil) - if err != nil { + if _, err = validateIntegerEntry(xRefTable, d, dictName, "F", OPTIONAL, model.V14, nil); err != nil { return err } // Optional A or Dest, since V1.1 - return validateActionOrDestination(xRefTable, d, dictName, model.V11) + destName, err := validateActionOrDestination(xRefTable, d, dictName, model.V11) + if err != nil { + return err + } + if destName != "" { + if _, err = xRefTable.DereferenceDestArray(destName); err != nil && xRefTable.ValidationMode == model.ValidationRelaxed { + model.ShowDigestedSpecViolation("outlineDict with unresolved destination") + return nil + } + } + + return err } func handleOutlineItemDict(xRefTable *model.XRefTable, ir types.IndirectRef, objNumber int) (types.Dict, error) { @@ -110,7 +141,7 @@ func leaf(firstChild, lastChild *types.IndirectRef, objNumber, validationMode in lastChild != nil && lastChild.ObjectNumber.Value() == objNumber { // Degenerated leaf = node pointing to itself. if validationMode == model.ValidationStrict { - return false, errors.Errorf("pdfcpu: validateOutlineTree: corrupted at obj#%d", objNumber) + return false, errors.Errorf("pdfcpu: validateOutlineTree: invalid at obj#%d", objNumber) } return true, nil } @@ -147,7 +178,7 @@ func evalOutlineCount(xRefTable *model.XRefTable, c, visc int, count int, total, return nil } -func validateOutlineTree(xRefTable *model.XRefTable, first, last *types.IndirectRef) (int, int, error) { +func validateOutlineTree(xRefTable *model.XRefTable, first, last *types.IndirectRef, m map[int]bool, fixed *bool) (int, int, error) { var ( d types.Dict objNr int @@ -156,17 +187,9 @@ func validateOutlineTree(xRefTable *model.XRefTable, first, last *types.Indirect err error ) - m := map[int]bool{} - // Process linked list of outline items. for ir := first; ir != nil; ir = d.IndirectRefEntry("Next") { - objNr = ir.ObjectNumber.Value() - if m[objNr] { - return 0, 0, errors.New("pdfcpu: validateOutlineTree: circular outline items") - } - m[objNr] = true - total++ d, err = handleOutlineItemDict(xRefTable, *ir, objNr) @@ -188,12 +211,18 @@ func validateOutlineTree(xRefTable *model.XRefTable, first, last *types.Indirect } if ok { if count != 0 { - return 0, 0, errors.New("pdfcpu: validateOutlineTree: empty outline item dict \"Count\" must be 0") + if xRefTable.ValidationMode == model.ValidationStrict { + return 0, 0, errors.New("pdfcpu: validateOutlineTree: empty outline item dict \"Count\" must be 0") + } } continue } - c, visc, err := validateOutlineTree(xRefTable, firstChild, lastChild) + if err := scanAndFixOutlineItems(xRefTable, firstChild, lastChild, m, fixed); err != nil { + return 0, 0, err + } + + c, visc, err := validateOutlineTree(xRefTable, firstChild, lastChild, m, fixed) if err != nil { return 0, 0, err } @@ -205,7 +234,7 @@ func validateOutlineTree(xRefTable *model.XRefTable, first, last *types.Indirect } if xRefTable.ValidationMode == model.ValidationStrict && objNr != last.ObjectNumber.Value() { - return 0, 0, errors.Errorf("pdfcpu: validateOutlineTree: corrupted child list %d <> %d\n", objNr, last.ObjectNumber) + return 0, 0, errors.Errorf("pdfcpu: validateOutlineTree: invalid child list %d <> %d\n", objNr, last.ObjectNumber) } return total, visible, nil @@ -213,25 +242,25 @@ func validateOutlineTree(xRefTable *model.XRefTable, first, last *types.Indirect func validateVisibleOutlineCount(xRefTable *model.XRefTable, total, visible int, count *int) error { if count == nil { - return errors.Errorf("pdfcpu: validateOutlines: corrupted, root \"Count\" is nil, expected to be %d", total+visible) + return errors.Errorf("pdfcpu: validateOutlines: invalid, root \"Count\" is nil, expected to be %d", total+visible) } if xRefTable.ValidationMode == model.ValidationStrict && *count != total+visible { - return errors.Errorf("pdfcpu: validateOutlines: corrupted, root \"Count\" = %d, expected to be %d", *count, total+visible) + return errors.Errorf("pdfcpu: validateOutlines: invalid, root \"Count\" = %d, expected to be %d", *count, total+visible) } if xRefTable.ValidationMode == model.ValidationRelaxed && *count != total+visible && *count != -total-visible { - return errors.Errorf("pdfcpu: validateOutlines: corrupted, root \"Count\" = %d, expected to be %d", *count, total+visible) + return errors.Errorf("pdfcpu: validateOutlines: invalid, root \"Count\" = %d, expected to be %d", *count, total+visible) } return nil } -func validateInvisibleOutlineCount(xRefTable *model.XRefTable, total, visible int, count *int) error { +func validateInvisibleOutlineCount(xRefTable *model.XRefTable, total int, count *int) error { if count != nil { if xRefTable.ValidationMode == model.ValidationStrict && *count == 0 { - return errors.New("pdfcpu: validateOutlines: corrupted, root \"Count\" shall be omitted if there are no open outline items") + return errors.New("pdfcpu: validateOutlines: invalid, root \"Count\" shall be omitted if there are no open outline items") } if xRefTable.ValidationMode == model.ValidationStrict && *count != total && *count != -total { - return errors.Errorf("pdfcpu: validateOutlines: corrupted, root \"Count\" = %d, expected to be %d", *count, total) + return errors.Errorf("pdfcpu: validateOutlines: invalid, root \"Count\" = %d, expected to be %d", *count, total) } } @@ -240,7 +269,7 @@ func validateInvisibleOutlineCount(xRefTable *model.XRefTable, total, visible in func validateOutlineCount(xRefTable *model.XRefTable, total, visible int, count *int) error { if visible == 0 { - return validateInvisibleOutlineCount(xRefTable, total, visible, count) + return validateInvisibleOutlineCount(xRefTable, total, count) } if visible > 0 { @@ -250,25 +279,155 @@ func validateOutlineCount(xRefTable *model.XRefTable, total, visible int, count return nil } -func validateOutlines(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { - // => 12.3.3 Document Outline +func firstOfRemainder(xRefTable *model.XRefTable, last *types.IndirectRef, duplObjNr, oneBeforeDuplObj int) (int, types.Dict, error) { + // Starting with the last node, go back until we hit duplObjNr or oneBeforeDuplObj + for ir := last; ir != nil; { + objNr := ir.ObjectNumber.Value() + d, err := xRefTable.DereferenceDict(*ir) + if err != nil { + return 0, nil, err + } + if len(d) == 0 { + if xRefTable.ValidationMode == model.ValidationStrict { + return 0, nil, errors.New("pdfcpu: validateOutlines: corrupt outline items detected") + } + } + irPrev := d.IndirectRefEntry("Prev") + if irPrev == nil { + break + } + prevObjNr := irPrev.ObjectNumber.Value() + if prevObjNr == duplObjNr { + d["Prev"] = *types.NewIndirectRef(oneBeforeDuplObj, 0) + return objNr, d, nil + } + if prevObjNr == oneBeforeDuplObj { + return objNr, d, nil + } + ir = irPrev + } - ir, err := validateIndRefEntry(xRefTable, rootDict, "rootDict", "Outlines", required, sinceVersion) - if err != nil || ir == nil { + return 0, nil, nil +} + +func removeDuplFirst(xRefTable *model.XRefTable, first, last *types.IndirectRef, duplObjNr, oneBeforeDuplObj int) error { + nextObjNr, nextDict, err := firstOfRemainder(xRefTable, last, duplObjNr, oneBeforeDuplObj) + if err != nil { return err } + if nextObjNr == 0 { + return ErrBookmarksRepair + } + delete(nextDict, "Prev") + first.ObjectNumber = types.Integer(oneBeforeDuplObj) + return nil +} - d, err := xRefTable.DereferenceDict(*ir) - if err != nil || d == nil { - return err +func handleCircular(xRefTable *model.XRefTable, dict types.Dict, first *types.IndirectRef, fixed *bool) error { + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.New("pdfcpu: validateOutlines: circular outline items detected") } + dict["Prev"] = *first + delete(dict, "Next") + *fixed = true + return nil +} - xRefTable.Outlines = d +func handleCorruptDict(xRefTable *model.XRefTable) error { + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.New("pdfcpu: validateOutlines: corrupt outline items detected") + } + return ErrBookmarksRepair +} + +func handleDuplicate( + xRefTable *model.XRefTable, + ir, first, last *types.IndirectRef, + prevDict types.Dict, + objNr, prevObjNr int) error { + + if ir == first { + return removeDuplFirst(xRefTable, first, last, objNr, prevObjNr) + } + + if ir == last { + delete(prevDict, "Next") + last.ObjectNumber = types.Integer(prevObjNr) + return nil + } + + nextObjNr, _, _ := firstOfRemainder(xRefTable, last, objNr, prevObjNr) + if nextObjNr == 0 { + return ErrBookmarksRepair + } + + nextRef := prevDict.IndirectRefEntry("Next") + if nextRef == nil { + return ErrBookmarksRepair + } + + prevDict["Next"] = *types.NewIndirectRef(nextObjNr, 0) + + return nil +} + +func scanAndFixOutlineItems(xRefTable *model.XRefTable, first, last *types.IndirectRef, seen map[int]bool, fixed *bool) error { + visited := map[int]bool{} + var prevDict types.Dict + var prevObjNr int + + for ir := first; ir != nil; { + objNr := ir.ObjectNumber.Value() + + if visited[objNr] { + return handleCircular(xRefTable, prevDict, first, fixed) + } + visited[objNr] = true + + dict, err := xRefTable.DereferenceDict(*ir) + if err != nil { + return err + } + if len(dict) == 0 { + return handleCorruptDict(xRefTable) + } + + if ir == first && dict["Prev"] != nil { + *fixed = true + if xRefTable.ValidationMode == model.ValidationStrict { + return errors.New("pdfcpu: validateOutlines: corrupt outline items detected") + } + delete(dict, "Prev") + } + + if seen[objNr] { + *fixed = true + return handleDuplicate(xRefTable, ir, first, last, prevDict, objNr, prevObjNr) + } + + seen[objNr] = true + prevDict = dict + prevObjNr = objNr + ir = dict.IndirectRefEntry("Next") + } + + return nil +} + +func removeOutlines(xRefTable *model.XRefTable, rootDict types.Dict) { + xRefTable.Outlines = nil + delete(rootDict, "Outlines") +} + +func validateOutlinesGeneral(xRefTable *model.XRefTable, rootDict types.Dict) (*types.IndirectRef, *types.IndirectRef, *int, error) { + d := xRefTable.Outlines // Type, optional, name - _, err = validateNameEntry(xRefTable, d, "outlineDict", "Type", OPTIONAL, model.V10, func(s string) bool { return s == "Outlines" || s == "Outline" }) + _, err := validateNameEntry(xRefTable, d, "outlineDict", "Type", OPTIONAL, model.V10, func(s string) bool { + return s == "Outlines" || (xRefTable.ValidationMode == model.ValidationRelaxed && (s == "Outline" || s == "BMoutlines")) + }) if err != nil { - return err + return nil, nil, nil, err } first := d.IndirectRefEntry("First") @@ -276,24 +435,49 @@ func validateOutlines(xRefTable *model.XRefTable, rootDict types.Dict, required if first == nil { if last != nil { - return errors.New("pdfcpu: validateOutlines: corrupted, root missing \"First\"") + return nil, nil, nil, errors.New("pdfcpu: validateOutlines: invalid, root missing \"First\"") } - // empty outlines - xRefTable.Outlines = nil - rootDict.Delete("Outlines") - return nil + removeOutlines(xRefTable, rootDict) + return nil, nil, nil, nil } - if last == nil { - return errors.New("pdfcpu: validateOutlines: corrupted, root missing \"Last\"") + if last == nil && xRefTable.ValidationMode == model.ValidationStrict { + return nil, nil, nil, errors.New("pdfcpu: validateOutlines: invalid, root missing \"Last\"") } count := d.IntEntry("Count") if xRefTable.ValidationMode == model.ValidationStrict && count != nil && *count < 0 { - return errors.New("pdfcpu: validateOutlines: corrupted, root \"Count\" can't be negativ") + return nil, nil, nil, errors.New("pdfcpu: validateOutlines: invalid, root \"Count\" can't be negative") + } + + return first, last, count, nil +} + +func handleCorruptOutlineItems(xRefTable *model.XRefTable, rootDict types.Dict) { + model.ShowMsg("validateOutlines: corrupt outline items detected") + removeOutlines(xRefTable, rootDict) + model.ShowSkipped("bookmarks") +} + +func scanAndFixOutlines(xRefTable *model.XRefTable, rootDict types.Dict, first, last *types.IndirectRef, count *int) error { + + m := map[int]bool{} + var fixed bool + + err := scanAndFixOutlineItems(xRefTable, first, last, m, &fixed) + if err != nil { + if err == ErrBookmarksRepair && xRefTable.ValidationMode == model.ValidationRelaxed { + handleCorruptOutlineItems(xRefTable, rootDict) + return nil + } + return err } - total, visible, err := validateOutlineTree(xRefTable, first, last) + total, visible, err := validateOutlineTree(xRefTable, first, last, m, &fixed) if err != nil { + if err == ErrBookmarksRepair && xRefTable.ValidationMode == model.ValidationRelaxed { + handleCorruptOutlineItems(xRefTable, rootDict) + return nil + } return err } @@ -301,5 +485,40 @@ func validateOutlines(xRefTable *model.XRefTable, rootDict types.Dict, required return err } + if fixed { + model.ShowRepaired("bookmarks") + } + return nil } + +func validateOutlines(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { + // => 12.3.3 Document Outline + + ir, err := validateIndRefEntry(xRefTable, rootDict, "rootDict", "Outlines", required, sinceVersion) + if err != nil || ir == nil { + return err + } + + d, err := xRefTable.DereferenceDict(*ir) + if err != nil { + return err + } + + if d == nil { + removeOutlines(xRefTable, rootDict) + return nil + } + + xRefTable.Outlines = d + + first, last, count, err := validateOutlinesGeneral(xRefTable, rootDict) + if err != nil { + return err + } + if first == nil && last == nil { + return nil + } + + return scanAndFixOutlines(xRefTable, rootDict, first, last, count) +} diff --git a/pkg/pdfcpu/validate/page.go b/pkg/pdfcpu/validate/page.go index 873982bb..cf6d5deb 100644 --- a/pkg/pdfcpu/validate/page.go +++ b/pkg/pdfcpu/validate/page.go @@ -17,6 +17,9 @@ limitations under the License. package validate import ( + "fmt" + "strings" + "github.com/angel-one/pdfcpu/pkg/log" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" @@ -67,19 +70,8 @@ func validateResourceDict(xRefTable *model.XRefTable, o types.Object) (hasResour return true, nil } -func validatePageContents(xRefTable *model.XRefTable, d types.Dict) (hasContents bool, err error) { - - o, found := d.Find("Contents") - if !found { - return false, err - } - - o, err = xRefTable.Dereference(o) - if err != nil || o == nil { - return false, err - } - - switch o := o.(type) { +func validateContents(obj types.Object, xRefTable *model.XRefTable, d types.Dict) (hasContents bool, err error) { + switch obj := obj.(type) { case types.StreamDict: // no further processing. @@ -88,8 +80,8 @@ func validatePageContents(xRefTable *model.XRefTable, d types.Dict) (hasContents case types.Array: // process array of content stream dicts. - for _, o := range o { - o1, _, err := xRefTable.DereferenceStreamDict(o) + for _, obj := range obj { + o1, _, err := xRefTable.DereferenceStreamDict(obj) if err != nil { return false, err } @@ -102,47 +94,72 @@ func validatePageContents(xRefTable *model.XRefTable, d types.Dict) (hasContents } - if !hasContents { - err := errors.Errorf("validatePageContents: empty page content array detected") - if xRefTable.ValidationMode == model.ValidationStrict { - return false, err - } - reportSpecViolation(xRefTable, err) + if hasContents { + break + } + + if xRefTable.ValidationMode == model.ValidationStrict { + return false, errors.Errorf("validatePageContents: empty page content array detected") + } + + // Digest empty array. + d.Delete("Contents") + model.ShowRepaired("page dict \"Contents\"") + + case types.StringLiteral: + + s := strings.TrimSpace(obj.Value()) + + if len(s) > 0 || xRefTable.ValidationMode == model.ValidationStrict { + return false, errors.Errorf("validatePageContents: page content must be stream dict or array, got: %T", obj) + } + + // Digest empty string literal. + d.Delete("Contents") + model.ShowRepaired("page dict \"Contents\"") + + case types.Dict: + + if len(obj) > 0 || xRefTable.ValidationMode == model.ValidationStrict { + return false, errors.Errorf("validatePageContents: page content must be stream dict or array, got: %T", obj) } + // Digest empty dict. + d.Delete("Contents") + model.ShowRepaired("page dict \"Contents\"") + default: - return false, errors.Errorf("validatePageContents: page content must be stream dict or array") + return false, errors.Errorf("validatePageContents: page content must be stream dict or array, got: %T", obj) } return hasContents, nil } -func validatePageResources(xRefTable *model.XRefTable, d types.Dict, hasResources, hasContents bool) error { +func validatePageContents(xRefTable *model.XRefTable, d types.Dict) (hasContents bool, err error) { + o, found := d.Find("Contents") + if !found { + return false, err + } + + o, err = xRefTable.Dereference(o) + if err != nil || o == nil { + return false, err + } + + return validateContents(o, xRefTable, d) +} +func validatePageResources(xRefTable *model.XRefTable, d types.Dict) error { if o, found := d.Find("Resources"); found { _, err := validateResourceDict(xRefTable, o) return err } - // TODO Check if contents need resources (#169) - // if !hasResources && hasContents { - // return errors.New("pdfcpu: validatePageResources: missing required entry \"Resources\" - should be inherited") - // } - return nil } -func validatePageEntryMediaBox(xRefTable *model.XRefTable, d types.Dict, required bool, sinceVersion model.Version) (hasMediaBox bool, err error) { - - o, err := validateRectangleEntry(xRefTable, d, "pageDict", "MediaBox", required, sinceVersion, nil) - if err != nil { - return false, err - } - if o != nil { - hasMediaBox = true - } - - return hasMediaBox, nil +func validatePageEntryMediaBox(xRefTable *model.XRefTable, d types.Dict, required bool, sinceVersion model.Version) (types.Array, error) { + return validateRectangleEntry(xRefTable, d, "pageDict", "MediaBox", required, sinceVersion, nil) } func validatePageEntryCropBox(xRefTable *model.XRefTable, d types.Dict, required bool, sinceVersion model.Version) error { @@ -299,7 +316,7 @@ func validatePageEntryDur(xRefTable *model.XRefTable, d types.Dict, required boo return err } -func validateTransitionDictEntryDi(xRefTable *model.XRefTable, d types.Dict) error { +func validateTransitionDictEntryDi(d types.Dict) error { o, found := d.Find("Di") if !found { @@ -389,7 +406,7 @@ func validateTransitionDict(xRefTable *model.XRefTable, d types.Dict) error { } // Di, optional, number or name - err = validateTransitionDictEntryDi(xRefTable, d) + err = validateTransitionDictEntryDi(d) if err != nil { return err } @@ -479,7 +496,7 @@ func validatePageEntryTabs(xRefTable *model.XRefTable, d types.Dict, required bo validateTabs := func(s string) bool { return types.MemberOf(s, []string{"R", "C", "S", "A", "W"}) } if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V14 + sinceVersion = model.V13 } _, err := validateNameEntry(xRefTable, d, "pagesDict", "Tabs", required, sinceVersion, validateTabs) @@ -771,132 +788,139 @@ func validatePageEntryVP(xRefTable *model.XRefTable, d types.Dict, required bool return nil } -func validatePageDict(xRefTable *model.XRefTable, d types.Dict, objNumber int, hasResources, hasMediaBox bool) error { +func handlePieceInfo(xRefTable *model.XRefTable, d types.Dict, dictName string) error { + sinceVersion := model.V13 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V10 + } + + hasPieceInfo, err := validatePieceInfo(xRefTable, d, dictName, "PieceInfo", OPTIONAL, sinceVersion) + if err != nil { + return err + } + + // LastModified + lm, err := validateDateEntry(xRefTable, d, dictName, "LastModified", OPTIONAL, model.V13) + if err != nil { + return err + } + + if hasPieceInfo && lm == nil && xRefTable.ValidationMode == model.ValidationStrict { + return errors.New("pdfcpu: validatePageDict: missing \"LastModified\" (required by \"PieceInfo\")") + } + + return nil +} + +func validatePageDict(xRefTable *model.XRefTable, d types.Dict, hasMediaBox bool) (types.Array, error) { dictName := "pageDict" if ir := d.IndirectRefEntry("Parent"); ir == nil { - return errors.New("pdfcpu: validatePageDict: missing parent") + return nil, errors.New("pdfcpu: validatePageDict: missing parent") } // Contents - hasContents, err := validatePageContents(xRefTable, d) + _, err := validatePageContents(xRefTable, d) if err != nil { - return err + return nil, err } // Resources - err = validatePageResources(xRefTable, d, hasResources, hasContents) + err = validatePageResources(xRefTable, d) if err != nil { - return err + return nil, err } // MediaBox - _, err = validatePageEntryMediaBox(xRefTable, d, !hasMediaBox, model.V10) + mediaBoxArr, err := validatePageEntryMediaBox(xRefTable, d, !hasMediaBox, model.V10) if err != nil { - return err + return nil, err } // PieceInfo - if xRefTable.ValidationMode != model.ValidationRelaxed { - sinceVersion := model.V13 - if xRefTable.ValidationMode == model.ValidationRelaxed { - sinceVersion = model.V10 - } - - hasPieceInfo, err := validatePieceInfo(xRefTable, d, dictName, "PieceInfo", OPTIONAL, sinceVersion) - if err != nil { - return err - } - - // LastModified - lm, err := validateDateEntry(xRefTable, d, dictName, "LastModified", OPTIONAL, model.V13) - if err != nil { - return err - } - - if hasPieceInfo && lm == nil && xRefTable.ValidationMode == model.ValidationStrict { - return errors.New("pdfcpu: validatePageDict: missing \"LastModified\" (required by \"PieceInfo\")") - } + if err := handlePieceInfo(xRefTable, d, dictName); err != nil { + return nil, err } // AA - err = validateAdditionalActions(xRefTable, d, dictName, "AA", OPTIONAL, model.V14, "page") + sinceVersion := model.V14 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V11 + } + err = validateAdditionalActions(xRefTable, d, dictName, "AA", OPTIONAL, sinceVersion, "page") if err != nil { - return err + return nil, err } type v struct { - validate func(xRefTable *model.XRefTable, d types.Dict, required bool, sinceVersion model.Version) (err error) - required bool - sinceVersion model.Version + validate func(xRefTable *model.XRefTable, d types.Dict, required bool, sinceVersion model.Version) (err error) + required bool + sinceVersion model.Version + sinceVersionRelaxed model.Version } for _, f := range []v{ - {validatePageEntryCropBox, OPTIONAL, model.V10}, - {validatePageEntryBleedBox, OPTIONAL, model.V13}, - {validatePageEntryTrimBox, OPTIONAL, model.V13}, - {validatePageEntryArtBox, OPTIONAL, model.V13}, - {validatePageBoxColorInfo, OPTIONAL, model.V14}, - {validatePageEntryRotate, OPTIONAL, model.V10}, - {validatePageEntryGroup, OPTIONAL, model.V14}, - {validatePageEntryThumb, OPTIONAL, model.V10}, - {validatePageEntryB, OPTIONAL, model.V11}, - {validatePageEntryDur, OPTIONAL, model.V11}, - {validatePageEntryTrans, OPTIONAL, model.V11}, - {validateMetadata, OPTIONAL, model.V14}, - {validatePageEntryStructParents, OPTIONAL, model.V10}, - {validatePageEntryID, OPTIONAL, model.V13}, - {validatePageEntryPZ, OPTIONAL, model.V13}, - {validatePageEntrySeparationInfo, OPTIONAL, model.V13}, - {validatePageEntryTabs, OPTIONAL, model.V15}, - {validatePageEntryTemplateInstantiated, OPTIONAL, model.V15}, - {validatePageEntryPresSteps, OPTIONAL, model.V15}, - {validatePageEntryUserUnit, OPTIONAL, model.V16}, - {validatePageEntryVP, OPTIONAL, model.V16}, + {validatePageEntryCropBox, OPTIONAL, model.V10, model.V10}, + {validatePageEntryBleedBox, OPTIONAL, model.V13, model.V12}, + {validatePageEntryTrimBox, OPTIONAL, model.V13, model.V10}, + {validatePageEntryArtBox, OPTIONAL, model.V13, model.V12}, + {validatePageBoxColorInfo, OPTIONAL, model.V14, model.V14}, + {validatePageEntryRotate, OPTIONAL, model.V10, model.V10}, + {validatePageEntryGroup, OPTIONAL, model.V14, model.V14}, + {validatePageEntryThumb, OPTIONAL, model.V10, model.V10}, + {validatePageEntryB, OPTIONAL, model.V11, model.V11}, + {validatePageEntryDur, OPTIONAL, model.V11, model.V11}, + {validatePageEntryTrans, OPTIONAL, model.V11, model.V11}, + {validateMetadata, OPTIONAL, model.V14, model.V14}, + {validatePageEntryStructParents, OPTIONAL, model.V10, model.V10}, + {validatePageEntryID, OPTIONAL, model.V13, model.V13}, + {validatePageEntryPZ, OPTIONAL, model.V13, model.V13}, + {validatePageEntrySeparationInfo, OPTIONAL, model.V13, model.V13}, + {validatePageEntryTabs, OPTIONAL, model.V15, model.V15}, + {validatePageEntryTemplateInstantiated, OPTIONAL, model.V15, model.V15}, + {validatePageEntryPresSteps, OPTIONAL, model.V15, model.V15}, + {validatePageEntryUserUnit, OPTIONAL, model.V16, model.V16}, + {validatePageEntryVP, OPTIONAL, model.V16, model.V16}, } { - err = f.validate(xRefTable, d, f.required, f.sinceVersion) + sinceVersion := f.sinceVersion + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = f.sinceVersionRelaxed + } + err = f.validate(xRefTable, d, f.required, sinceVersion) if err != nil { - return err + return nil, err } } - return nil + return mediaBoxArr, nil } -func validatePagesDictGeneralEntries(xRefTable *model.XRefTable, d types.Dict) (pageCount int, hasResources, hasMediaBox bool, err error) { - - // PageCount of this sub page tree - i := d.IntEntry("Count") - if i == nil { - return 0, false, false, errors.New("pdfcpu: validatePagesDictGeneralEntries: missing \"Count\" in page tree") - } - pageCount = *i - +func validatePagesDictGeneralEntries(xRefTable *model.XRefTable, d types.Dict) (hasResources bool, mediaBoxArr types.Array, err error) { hasResources, err = validateResources(xRefTable, d) if err != nil { - return 0, false, false, err + return false, nil, err } // MediaBox: optional, rectangle - hasMediaBox, err = validatePageEntryMediaBox(xRefTable, d, OPTIONAL, model.V10) + mediaBoxArr, err = validatePageEntryMediaBox(xRefTable, d, OPTIONAL, model.V10) if err != nil { - return 0, false, false, err + return false, nil, err } // CropBox: optional, rectangle err = validatePageEntryCropBox(xRefTable, d, OPTIONAL, model.V10) if err != nil { - return 0, false, false, err + return false, nil, err } // Rotate: optional, integer err = validatePageEntryRotate(xRefTable, d, OPTIONAL, model.V10) if err != nil { - return 0, false, false, err + return false, nil, err } - return pageCount, hasResources, hasMediaBox, nil + return hasResources, mediaBoxArr, nil } func dictTypeForPageNodeDict(d types.Dict) (string, error) { @@ -914,22 +938,6 @@ func dictTypeForPageNodeDict(d types.Dict) (string, error) { } func validateResources(xRefTable *model.XRefTable, d types.Dict) (hasResources bool, err error) { - - // Get number of pages of this PDF file. - pageCount := d.IntEntry("Count") - if pageCount == nil { - return false, errors.New("pdfcpu: validateResources: missing \"Count\"") - } - - // TODO not ideal - overall pageCount is only set during validation! - if xRefTable.PageCount == 0 { - xRefTable.PageCount = *pageCount - } - - if log.ValidateEnabled() { - log.Validate.Printf("validateResources: This page node has %d pages\n", *pageCount) - } - // Resources: optional, dict o, ok := d.Find("Resources") if !ok { @@ -954,7 +962,49 @@ func pagesDictKids(xRefTable *model.XRefTable, d types.Dict) types.Array { return kids } -func processPagesKids(xRefTable *model.XRefTable, kids types.Array, objNr int, hasResources, hasMediaBox bool, curPage *int) (types.Array, error) { +func validateParent(pageNodeDict types.Dict, objNr int) error { + parentIndRef := pageNodeDict.IndirectRefEntry("Parent") + if parentIndRef == nil { + return errors.New("pdfcpu: validatePagesDict: missing parent node") + } + if parentIndRef.ObjectNumber.Value() != objNr { + return errors.New("pdfcpu: validatePagesDict: corrupt parent node") + } + return nil +} + +func detectPageNodeDict(xRefTable *model.XRefTable, indRef types.IndirectRef, objNr, parentObjNr int, mediaBoxArr types.Array, pageNr int) (types.Dict, error) { + pageNodeDict, err := xRefTable.DereferenceDict(indRef) + if err != nil { + return nil, err + } + + if len(pageNodeDict) > 0 { + return pageNodeDict, nil + } + + if xRefTable.ValidationMode == model.ValidationStrict { + return nil, errors.Errorf("pdfcpu: validatePagesDict: corrupt page %d (obj#%d)", pageNr, objNr) + } + + var mediaBox *types.Rectangle + if len(mediaBoxArr) > 0 { + mediaBox, err = xRefTable.RectForArray(mediaBoxArr) + if err != nil { + return nil, err + } + } + + if _, err := xRefTable.EmptyPage(types.NewIndirectRef(parentObjNr, 0), mediaBox, objNr); err != nil { + return nil, err + } + + model.ShowRepaired(fmt.Sprintf("currupt page %d with blank page", pageNr)) + + return xRefTable.DereferenceDict(indRef) +} + +func processPagesKids(xRefTable *model.XRefTable, kids types.Array, parentObjNr int, hasResources bool, mediaBoxArr types.Array, curPage *int) (types.Array, error) { var a types.Array for _, o := range kids { @@ -968,31 +1018,20 @@ func processPagesKids(xRefTable *model.XRefTable, kids types.Array, objNr int, h return nil, errors.New("pdfcpu: validatePagesDict: missing indirect reference for kid") } - if log.ValidateEnabled() { - log.Validate.Printf("validatePagesDict: PageNode: %s\n", ir) - } - - objNumber := ir.ObjectNumber.Value() - if objNumber == 0 { + objNr := ir.ObjectNumber.Value() + if objNr == 0 { continue } - a = append(a, ir) - - pageNodeDict, err := xRefTable.DereferenceDict(ir) + pageNodeDict, err := detectPageNodeDict(xRefTable, ir, objNr, parentObjNr, mediaBoxArr, *curPage+1) if err != nil { return nil, err } - if pageNodeDict == nil { - return nil, errors.New("pdfcpu: validatePagesDict: corrupt page node") - } - parentIndRef := pageNodeDict.IndirectRefEntry("Parent") - if parentIndRef == nil { - return nil, errors.New("pdfcpu: validatePagesDict: missing parent node") - } - if parentIndRef.ObjectNumber.Value() != objNr { - return nil, errors.New("pdfcpu: validatePagesDict: corrupt parent node") + a = append(a, ir) + + if err := validateParent(pageNodeDict, parentObjNr); err != nil { + return nil, err } dictType, err := dictTypeForPageNodeDict(pageNodeDict) @@ -1003,16 +1042,20 @@ func processPagesKids(xRefTable *model.XRefTable, kids types.Array, objNr int, h switch dictType { case "Pages": - if err = validatePagesDict(xRefTable, pageNodeDict, objNumber, hasResources, hasMediaBox, curPage); err != nil { + if err = validatePagesDict(xRefTable, pageNodeDict, objNr, hasResources, mediaBoxArr, curPage); err != nil { return nil, err } case "Page": *curPage++ xRefTable.CurPage = *curPage - if err = validatePageDict(xRefTable, pageNodeDict, objNumber, hasResources, hasMediaBox); err != nil { + dMediaBoxArr, err := validatePageDict(xRefTable, pageNodeDict, len(mediaBoxArr) > 0) + if err != nil { return nil, err } + if len(mediaBoxArr) == 0 { + mediaBoxArr = dMediaBoxArr + } if err := xRefTable.SetValid(ir); err != nil { return nil, err } @@ -1026,22 +1069,18 @@ func processPagesKids(xRefTable *model.XRefTable, kids types.Array, objNr int, h return a, nil } -func validatePagesDict(xRefTable *model.XRefTable, d types.Dict, objNr int, hasResources, hasMediaBox bool, curPage *int) error { - pageCount, dHasResources, dHasMediaBox, err := validatePagesDictGeneralEntries(xRefTable, d) +func validatePagesDict(xRefTable *model.XRefTable, d types.Dict, objNr int, hasResources bool, mediaBoxArr types.Array, curPage *int) error { + dHasResources, dMediaBoxArr, err := validatePagesDictGeneralEntries(xRefTable, d) if err != nil { return err } - if pageCount == 0 { - return nil - } - if dHasResources { hasResources = true } - if dHasMediaBox { - hasMediaBox = true + if len(dMediaBoxArr) > 0 { + mediaBoxArr = dMediaBoxArr } kids := pagesDictKids(xRefTable, d) @@ -1049,7 +1088,7 @@ func validatePagesDict(xRefTable *model.XRefTable, d types.Dict, objNr int, hasR return errors.New("pdfcpu: validatePagesDict: corrupt \"Kids\" entry") } - d["Kids"], err = processPagesKids(xRefTable, kids, objNr, hasResources, hasMediaBox, curPage) + d["Kids"], err = processPagesKids(xRefTable, kids, objNr, hasResources, mediaBoxArr, curPage) return err } @@ -1137,13 +1176,7 @@ func validatePages(xRefTable *model.XRefTable, rootDict types.Dict) (types.Dict, if err != nil { return nil, err } - msg := "repaired: missing \"Pages\" indirect reference" - if log.DebugEnabled() { - log.Debug.Println("pdfcpu " + msg) - } - if log.CLIEnabled() { - log.CLI.Println(msg) - } + model.ShowRepaired("missing \"Pages\" indirect reference") } if ok { @@ -1159,19 +1192,26 @@ func validatePages(xRefTable *model.XRefTable, rootDict types.Dict) (types.Dict, } } - pageCount := pageRoot.IntEntry("Count") - if pageCount == nil { + obj, found = pageRoot.Find("Count") + if !found { return nil, errors.New("pdfcpu: validatePages: missing \"Count\" in page root dict") } - i := 0 - err = validatePagesDict(xRefTable, pageRoot, objNr, false, false, &i) + i, err := xRefTable.DereferenceInteger(obj) + if err != nil || i == nil { + return nil, errors.New("pdfcpu: validatePages: corrupt \"Count\" in page root dict") + } + + xRefTable.PageCount = i.Value() + + pc := 0 + err = validatePagesDict(xRefTable, pageRoot, objNr, false, nil, &pc) if err != nil { return nil, err } - if i != *pageCount { - return nil, errors.New("pdfcpu: validatePages: page tree corrupted") + if pc != xRefTable.PageCount { + return nil, errors.New("pdfcpu: validatePages: page tree invalid") } return pageRoot, err diff --git a/pkg/pdfcpu/validate/pattern.go b/pkg/pdfcpu/validate/pattern.go index 95afd99e..951c950e 100644 --- a/pkg/pdfcpu/validate/pattern.go +++ b/pkg/pdfcpu/validate/pattern.go @@ -23,16 +23,13 @@ import ( ) func validateTilingPatternDict(xRefTable *model.XRefTable, sd *types.StreamDict, sinceVersion model.Version) error { - dictName := "tilingPatternDict" - // Version check - err := xRefTable.ValidateVersion(dictName, sinceVersion) - if err != nil { + if err := xRefTable.ValidateVersion(dictName, sinceVersion); err != nil { return err } - _, err = validateNameEntry(xRefTable, sd.Dict, dictName, "Type", OPTIONAL, sinceVersion, func(s string) bool { return s == "Pattern" }) + _, err := validateNameEntry(xRefTable, sd.Dict, dictName, "Type", OPTIONAL, sinceVersion, func(s string) bool { return s == "Pattern" }) if err != nil { return err } @@ -83,15 +80,13 @@ func validateTilingPatternDict(xRefTable *model.XRefTable, sd *types.StreamDict, } func validateShadingPatternDict(xRefTable *model.XRefTable, d types.Dict, sinceVersion model.Version) error { - dictName := "shadingPatternDict" - err := xRefTable.ValidateVersion(dictName, sinceVersion) - if err != nil { + if err := xRefTable.ValidateVersion(dictName, sinceVersion); err != nil { return err } - _, err = validateNameEntry(xRefTable, d, dictName, "Type", OPTIONAL, sinceVersion, func(s string) bool { return s == "Pattern" }) + _, err := validateNameEntry(xRefTable, d, dictName, "Type", OPTIONAL, sinceVersion, func(s string) bool { return s == "Pattern" }) if err != nil { return err } @@ -136,12 +131,12 @@ func validatePattern(xRefTable *model.XRefTable, o types.Object) error { switch o := o.(type) { - case types.Dict: - err = validateShadingPatternDict(xRefTable, o, model.V13) - case types.StreamDict: err = validateTilingPatternDict(xRefTable, &o, model.V10) + case types.Dict: + err = validateShadingPatternDict(xRefTable, o, model.V13) + default: err = errors.New("pdfcpu: validatePattern: corrupt obj typ, must be dict or stream dict") @@ -155,8 +150,7 @@ func validatePatternResourceDict(xRefTable *model.XRefTable, o types.Object, sin // see 8.7 Patterns // Version check - err := xRefTable.ValidateVersion("PatternResourceDict", sinceVersion) - if err != nil { + if err := xRefTable.ValidateVersion("PatternResourceDict", sinceVersion); err != nil { return err } @@ -169,8 +163,7 @@ func validatePatternResourceDict(xRefTable *model.XRefTable, o types.Object, sin for _, o := range d { // Process pattern - err = validatePattern(xRefTable, o) - if err != nil { + if err = validatePattern(xRefTable, o); err != nil { return err } diff --git a/pkg/pdfcpu/validate/shading.go b/pkg/pdfcpu/validate/shading.go index 0912fb5c..9d0f6448 100644 --- a/pkg/pdfcpu/validate/shading.go +++ b/pkg/pdfcpu/validate/shading.go @@ -264,7 +264,7 @@ func validateTensorProductPatchMeshesDict(xRefTable *model.XRefTable, dict types func validateShadingStreamDict(xRefTable *model.XRefTable, sd *types.StreamDict) error { - // Shading 4-7 + // Shading 2, 4-7 dict := sd.Dict @@ -275,6 +275,9 @@ func validateShadingStreamDict(xRefTable *model.XRefTable, sd *types.StreamDict) switch shadingType { + case 2: + err = validateAxialShadingDict(xRefTable, dict) + case 4: err = validateFreeFormGouroudShadedTriangleMeshesDict(xRefTable, dict) diff --git a/pkg/pdfcpu/validate/structTree.go b/pkg/pdfcpu/validate/structTree.go index 183e50f6..c17765fe 100644 --- a/pkg/pdfcpu/validate/structTree.go +++ b/pkg/pdfcpu/validate/structTree.go @@ -17,6 +17,7 @@ limitations under the License. package validate import ( + "fmt" "strconv" "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" @@ -243,11 +244,19 @@ func processStructElementDictPgEntry(xRefTable *model.XRefTable, ir types.Indire pageDict, ok := o.(types.Dict) if !ok { - return errors.Errorf("pdfcpu: processStructElementDictPgEntry: Pg object corrupt dict: %s\n", o) + if xRefTable.ValidationMode == model.ValidationRelaxed { + model.ShowSkipped(fmt.Sprintf("invalid structElementDict Pg entry, objNr: %d ", ir.ObjectNumber)) + return nil + } + return errors.Errorf("pdfcpu: processStructElementDictPgEntry: Pg object corrupt dict: %s objNr:%d\n", o, ir.ObjectNumber) } if t := pageDict.Type(); t == nil || *t != "Page" { - return errors.Errorf("pdfcpu: processStructElementDictPgEntry: Pg object no pageDict: %s\n", pageDict) + if xRefTable.ValidationMode == model.ValidationRelaxed { + model.ShowSkipped(fmt.Sprintf("invalid structElementDict Pg entry, objNr: %d ", ir.ObjectNumber)) + return nil + } + return errors.Errorf("pdfcpu: processStructElementDictPgEntry: Pg object no pageDict: %s objNr:%d\n", pageDict, ir.ObjectNumber) } return nil @@ -363,7 +372,7 @@ func validateStructElementDictPart1(xRefTable *model.XRefTable, d types.Dict, di return err } if i != nil { - // Repair + // "Repair" d["S"] = types.Name(strconv.Itoa((*i).Value())) } } @@ -450,14 +459,22 @@ func validateStructElementDictPart2(xRefTable *model.XRefTable, d types.Dict, di return err } - // E: optional, text sttring, since 1.5 - _, err = validateStringEntry(xRefTable, d, dictName, "E", OPTIONAL, model.V15, nil) + // E: optional, text string, since 1.5 + sinceVersion = model.V15 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V14 + } + _, err = validateStringEntry(xRefTable, d, dictName, "E", OPTIONAL, sinceVersion, nil) if err != nil { return err } // ActualText: optional, text string, since 1.4 - _, err = validateStringEntry(xRefTable, d, dictName, "ActualText", OPTIONAL, model.V14, nil) + sinceVersion = model.V14 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V13 + } + _, err = validateStringEntry(xRefTable, d, dictName, "ActualText", OPTIONAL, sinceVersion, nil) return err } diff --git a/pkg/pdfcpu/validate/viewerPreferences.go b/pkg/pdfcpu/validate/viewerPreferences.go index da8b6504..dc122d82 100644 --- a/pkg/pdfcpu/validate/viewerPreferences.go +++ b/pkg/pdfcpu/validate/viewerPreferences.go @@ -120,7 +120,10 @@ func validatePrinterPreferences(xRefTable *model.XRefTable, d types.Dict, dictNa } n, err := validateNameEntry(xRefTable, d, dictName, "PrintScaling", OPTIONAL, sinceVersion, validate) if err != nil { - return err + if xRefTable.ValidationMode == model.ValidationStrict { + return err + } + // Ignore in relaxed mode. } if n != nil { vp.PrintScaling = model.PrintScalingFor(n.String()) @@ -137,12 +140,20 @@ func validatePrinterPreferences(xRefTable *model.XRefTable, d types.Dict, dictNa vp.Duplex = model.PaperHandlingFor(n.String()) } - vp.PickTrayByPDFSize, err = validateFlexBooleanEntry(xRefTable, d, dictName, "PickTrayByPDFSize", OPTIONAL, model.V17) + sinceVersion = model.V17 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V15 + } + vp.PickTrayByPDFSize, err = validateFlexBooleanEntry(xRefTable, d, dictName, "PickTrayByPDFSize", OPTIONAL, sinceVersion) if err != nil { return err } - vp.NumCopies, err = validateIntegerEntry(xRefTable, d, dictName, "NumCopies", OPTIONAL, model.V17, func(i int) bool { return i >= 1 }) + sinceVersion = model.V17 + if xRefTable.ValidationMode == model.ValidationRelaxed { + sinceVersion = model.V15 + } + vp.NumCopies, err = validateIntegerEntry(xRefTable, d, dictName, "NumCopies", OPTIONAL, sinceVersion, func(i int) bool { return i >= 1 }) if err != nil { return err } @@ -212,8 +223,12 @@ func validateViewerPreferences(xRefTable *model.XRefTable, rootDict types.Dict, return err } + vv := []string{"UseNone", "UseOutlines", "UseThumbs", "UseOC"} + if xRefTable.ValidationMode == model.ValidationRelaxed { + vv = append(vv, "PageOnly") + } validate := func(s string) bool { - return types.MemberOf(s, []string{"UseNone", "UseOutlines", "UseThumbs", "UseOC"}) + return types.MemberOf(s, vv) } n, err := validateNameEntry(xRefTable, d, dictName, "NonFullScreenPageMode", OPTIONAL, model.V10, validate) if err != nil { @@ -226,9 +241,15 @@ func validateViewerPreferences(xRefTable *model.XRefTable, rootDict types.Dict, validate = func(s string) bool { return types.MemberOf(s, []string{"L2R", "R2L"}) } n, err = validateNameEntry(xRefTable, d, dictName, "Direction", OPTIONAL, model.V13, validate) if err != nil { - return err + s, err := validateStringEntry(xRefTable, d, dictName, "Direction", OPTIONAL, model.V13, validate) + if err != nil { + return err + } + if s != nil { + vp.Direction = model.DirectionFor(*s) + } } - if n != nil { + if vp.Direction == nil && n != nil { vp.Direction = model.DirectionFor(n.String()) } diff --git a/pkg/pdfcpu/validate/xObject.go b/pkg/pdfcpu/validate/xObject.go index f2245524..d5bb2f89 100644 --- a/pkg/pdfcpu/validate/xObject.go +++ b/pkg/pdfcpu/validate/xObject.go @@ -413,13 +413,21 @@ func validateAlternateImageStreamDicts(xRefTable *model.XRefTable, d types.Dict, func validateImageStreamDictPart1(xRefTable *model.XRefTable, sd *types.StreamDict, dictName string) (isImageMask bool, err error) { // Width, integer, required - _, err = validateIntegerEntry(xRefTable, sd.Dict, dictName, "Width", REQUIRED, model.V10, nil) + required := true + if xRefTable.ValidationMode == model.ValidationRelaxed { + required = false + } + _, err = validateIntegerEntry(xRefTable, sd.Dict, dictName, "Width", required, model.V10, nil) if err != nil { return false, err } // Height, integer, required - _, err = validateIntegerEntry(xRefTable, sd.Dict, dictName, "Height", REQUIRED, model.V10, nil) + required = true + if xRefTable.ValidationMode == model.ValidationRelaxed { + required = false + } + _, err = validateIntegerEntry(xRefTable, sd.Dict, dictName, "Height", required, model.V10, nil) if err != nil { return false, err } @@ -430,12 +438,15 @@ func validateImageStreamDictPart1(xRefTable *model.XRefTable, sd *types.StreamDi return false, err } - isImageMask = (imageMask != nil) && *imageMask == true + isImageMask = (imageMask != nil) && *imageMask // ColorSpace, name or array, required unless used filter is JPXDecode; not allowed for imagemasks. if !isImageMask { - required := REQUIRED + required = REQUIRED + if xRefTable.ValidationMode == model.ValidationRelaxed { + required = OPTIONAL + } if sd.HasSoleFilterNamed(filter.JPX) { required = OPTIONAL @@ -459,9 +470,10 @@ func validateImageStreamDictPart2(xRefTable *model.XRefTable, sd *types.StreamDi // BitsPerComponent, integer required := REQUIRED - if sd.HasSoleFilterNamed(filter.JPX) || isImageMask { + if sd.HasSoleFilterNamed(filter.JPX) || isImageMask || xRefTable.ValidationMode == model.ValidationRelaxed { required = OPTIONAL } + // For imageMasks BitsPerComponent must be 1. var validateBPC func(i int) bool if isImageMask { @@ -474,11 +486,8 @@ func validateImageStreamDictPart2(xRefTable *model.XRefTable, sd *types.StreamDi return err } - // Intent, name, optional, since V1.0 - validate := func(s string) bool { - return types.MemberOf(s, []string{"AbsoluteColorimetric", "RelativeColorimetric", "Saturation", "Perceptual"}) - } - _, err = validateNameEntry(xRefTable, sd.Dict, dictName, "Intent", OPTIONAL, model.V11, validate) + // Note 8.6.5.8: If a PDF processor does not recognise the specified name, it shall use the RelativeColorimetric intent by default. + _, err = validateNameEntry(xRefTable, sd.Dict, dictName, "Intent", OPTIONAL, model.V11, nil) if err != nil { return err } @@ -759,6 +768,10 @@ func validateXObjectStreamDict(xRefTable *model.XRefTable, o types.Object) error // see 8.8 External Objects + if o == nil { + return nil + } + // Dereference stream dict and ensure it is validated exactly once in order // to handle XObjects(forms) with recursive structures like produced by Microsoft. sd, valid, err := xRefTable.DereferenceStreamDict(o) @@ -784,7 +797,7 @@ func validateXObjectStreamDict(xRefTable *model.XRefTable, o types.Object) error return err } - if subtype == nil { + if subtype == nil || len(*subtype) == 0 { // relaxed _, found := sd.Find("BBox") if found { diff --git a/pkg/pdfcpu/validate/xReftable.go b/pkg/pdfcpu/validate/xReftable.go index d808926e..7d51c6c8 100644 --- a/pkg/pdfcpu/validate/xReftable.go +++ b/pkg/pdfcpu/validate/xReftable.go @@ -19,6 +19,7 @@ package validate import ( "fmt" + "net" "net/http" "net/url" "sort" @@ -31,19 +32,8 @@ import ( "github.com/pkg/errors" ) -func reportSpecViolation(xRefTable *model.XRefTable, err error) { - // TODO Apply across code base. - pre := fmt.Sprintf("digesting spec violation around obj#(%d)", xRefTable.CurObj) - if log.ValidateEnabled() { - log.CLI.Printf("%s: %v\n", pre, err) - } - if log.CLIEnabled() { - log.Validate.Printf("%s: %v\n", pre, err) - } -} - // XRefTable validates a PDF cross reference table obeying the validation mode. -func XRefTable(xRefTable *model.XRefTable) error { +func XRefTable(ctx *model.Context) error { if log.InfoEnabled() { log.Info.Println("validating") } @@ -51,26 +41,48 @@ func XRefTable(xRefTable *model.XRefTable) error { log.Validate.Println("*** validateXRefTable begin ***") } - // Validate root object(aka the document catalog) and page tree. - err := validateRootObject(xRefTable) + xRefTable := ctx.XRefTable + + metaDataAuthoritative, err := metaDataModifiedAfterInfoDict(xRefTable) if err != nil { return err } - // Validate document information dictionary. - err = validateDocumentInfoObject(xRefTable) - if err != nil { - return err + if metaDataAuthoritative { + // if both info dict and catalog metadata present and metadata modification date after infodict modification date + // validate document information dictionary before catalog metadata. + err := validateDocumentInfoObject(xRefTable) + if err != nil { + return err + } } - // Validate offspec additional streams as declared in pdf trailer. - err = validateAdditionalStreams(xRefTable) + // Validate root object(aka the document catalog) and page tree. + err = validateRootObject(ctx) if err != nil { return err } + if !metaDataAuthoritative { + // Validate document information dictionary after catalog metadata. + err = validateDocumentInfoObject(xRefTable) + if err != nil { + return err + } + } + + // Validate offspec additional streams as declared in pdf trailer. + // err = validateAdditionalStreams(xRefTable) + // if err != nil { + // return err + // } + xRefTable.Valid = true + if xRefTable.CustomExtensions && log.CLIEnabled() { + log.CLI.Println("Note: custom extensions will not be validated.") + } + if log.ValidateEnabled() { log.Validate.Println("*** validateXRefTable end ***") } @@ -78,6 +90,82 @@ func XRefTable(xRefTable *model.XRefTable) error { return nil } +func fixInfoDict(xRefTable *model.XRefTable, rootDict types.Dict) error { + indRef := rootDict.IndirectRefEntry("Metadata") + ok, err := model.EqualObjects(*indRef, *xRefTable.Info, xRefTable) + if err != nil { + return err + } + if ok { + // infoDict indRef falsely points to meta data. + xRefTable.Info = nil + } + return nil +} + +func metaDataModifiedAfterInfoDict(xRefTable *model.XRefTable) (bool, error) { + rootDict, err := xRefTable.Catalog() + if err != nil { + return false, err + } + + xmpMeta, err := catalogMetaData(xRefTable, rootDict, OPTIONAL, model.V14) + if err != nil { + return false, err + } + + if xmpMeta != nil { + xRefTable.CatalogXMPMeta = xmpMeta + if xRefTable.Info != nil { + if err := fixInfoDict(xRefTable, rootDict); err != nil { + return false, err + } + } + } + + if !(xmpMeta != nil && xRefTable.Info != nil) { + return false, nil + } + + d, err := xRefTable.DereferenceDict(*xRefTable.Info) + if err != nil { + return false, err + } + if d == nil { + return true, nil + } + + modDate, ok := d["ModDate"] + if !ok { + return true, nil + } + + modTimestampInfoDict, err := timeOfDateObject(xRefTable, modDate, model.V10) + if err != nil { + return false, err + } + if modTimestampInfoDict == nil { + return true, nil + } + + modTimestampMetaData := time.Time(xmpMeta.RDF.Description.ModDate) + if modTimestampMetaData.IsZero() { + // xmlns:xap='http://ns.adobe.com/xap/1.0/ ...xap:ModifyDate='2006-06-05T21:58:13-05:00'> + //fmt.Println("metadata modificationDate is zero -> older than infodict") + return false, nil + } + + //fmt.Printf("infoDict: %s metaData: %s\n", modTimestampInfoDict, modTimestampMetaData) + + if *modTimestampInfoDict == modTimestampMetaData { + return false, nil + } + + infoDictOlderThanMetaDict := (*modTimestampInfoDict).Before(modTimestampMetaData) + + return infoDictOlderThanMetaDict, nil +} + func validateRootVersion(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { _, err := validateNameEntry(xRefTable, rootDict, "rootDict", "Version", OPTIONAL, sinceVersion, nil) return err @@ -138,6 +226,8 @@ func validateNames(xRefTable *model.XRefTable, rootDict types.Dict, required boo "URLS", "EmbeddedFiles", "AlternatePresentations", "Renditions"}) } + d1 := types.Dict{} + for treeName, value := range d { if ok := validateNameTreeName(treeName); !ok { @@ -147,6 +237,11 @@ func validateNames(xRefTable *model.XRefTable, rootDict types.Dict, required boo continue } + if xRefTable.Names[treeName] != nil { + // Already internalized. + continue + } + d, err := xRefTable.DereferenceDict(value) if err != nil { return err @@ -160,29 +255,33 @@ func validateNames(xRefTable *model.XRefTable, rootDict types.Dict, required boo return err } - // Internalize this name tree. - // If no validation takes place, name trees have to be internalized via xRefTable.LocateNameTree - // TODO Move this out of validation into Read. - if tree != nil { + if tree != nil && tree.Kmin != "" && tree.Kmax != "" { + // Internalize. xRefTable.Names[treeName] = tree + d1.Insert(treeName, value) } } + delete(rootDict, "Names") + if len(d1) > 0 { + rootDict["Names"] = d1 + } + return nil } -func validateNamedDestinations(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { +func validateNamedDestinations(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) (err error) { // => 12.3.2.3 Named Destinations // indRef or dict with destination array values. - d, err := validateDictEntry(xRefTable, rootDict, "rootDict", "Dests", required, sinceVersion, nil) - if err != nil || d == nil { + xRefTable.Dests, err = validateDictEntry(xRefTable, rootDict, "rootDict", "Dests", required, sinceVersion, nil) + if err != nil || xRefTable.Dests == nil { return err } - for _, o := range d { + for _, o := range xRefTable.Dests { if _, err = validateDestination(xRefTable, o, false); err != nil { return err } @@ -192,7 +291,8 @@ func validateNamedDestinations(xRefTable *model.XRefTable, rootDict types.Dict, } func pageLayoutValidator(v model.Version) func(s string) bool { - layouts := []string{"SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight"} + // "UseNone" is out of spec. + layouts := []string{"SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "UseNone"} if v >= model.V15 { layouts = append(layouts, "TwoPageLeft", "TwoPageRight") } @@ -216,23 +316,27 @@ func validatePageLayout(xRefTable *model.XRefTable, rootDict types.Dict, require } func pageModeValidator(v model.Version) func(s string) bool { - modes := []string{"UseNone", "UseOutlines", "UseThumbs", "FullScreen"} - if v >= model.V15 { + // "None" and "none" are out of spec. + modes := []string{"UseNone", "UseOutlines", "UseThumbs", "FullScreen", "None", "none"} + if v >= model.V14 { modes = append(modes, "UseOC") } if v >= model.V16 { modes = append(modes, "UseAttachments") } - validate := func(s string) bool { - return types.MemberOf(s, modes) - } - return validate + return func(s string) bool { return types.MemberOf(s, modes) } } func validatePageMode(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { n, err := validateNameEntry(xRefTable, rootDict, "rootDict", "PageMode", required, sinceVersion, pageModeValidator(xRefTable.Version())) if err != nil { - return err + if xRefTable.ValidationMode == model.ValidationStrict || n == nil { + return err + } + // Relax validation of "UseAttachments" before PDF v1.6. + if *n != "UseAttachments" { + return err + } } if n != nil { @@ -414,7 +518,11 @@ func validateOutputIntentDict(xRefTable *model.XRefTable, d types.Dict) error { } // OutputConditionIdentifier, required, text string - _, err = validateStringEntry(xRefTable, d, dictName, "OutputConditionIdentifier", REQUIRED, model.V10, nil) + required := REQUIRED + if xRefTable.ValidationMode == model.ValidationRelaxed { + required = OPTIONAL + } + _, err = validateStringEntry(xRefTable, d, dictName, "OutputConditionIdentifier", required, model.V10, nil) if err != nil { return err } @@ -525,16 +633,46 @@ func validatePieceInfo(xRefTable *model.XRefTable, d types.Dict, dictName, entry return hasPieceInfo, err } -// TODO implement func validatePermissions(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { // => 12.8.4 Permissions - d, err := validateDictEntry(xRefTable, rootDict, "rootDict", "Permissions", required, sinceVersion, nil) - if err != nil || d == nil { + d, err := validateDictEntry(xRefTable, rootDict, "rootDict", "Perms", required, sinceVersion, nil) + if err != nil { + return err + } + if len(d) == 0 { + return nil + } + + i := 0 + + if indRef := d.IndirectRefEntry("DocMDP"); indRef != nil { + d1, err := xRefTable.DereferenceDict(*indRef) + if err != nil { + return err + } + if len(d1) > 0 { + xRefTable.CertifiedSigObjNr = indRef.ObjectNumber.Value() + i++ + } + } + + d1, err := validateDictEntry(xRefTable, d, "permDict", "UR3", OPTIONAL, sinceVersion, nil) + if err != nil { return err } + if len(d1) == 0 { + return nil + } + + xRefTable.URSignature = d1 + i++ + + if i == 0 { + return errors.New("pdfcpu: validatePermissions: unsupported permissions detected") + } - return errors.New("pdfcpu: validatePermissions: not supported") + return nil } // TODO implement @@ -542,11 +680,11 @@ func validateLegal(xRefTable *model.XRefTable, rootDict types.Dict, required boo // => 12.8.5 Legal Content Attestations d, err := validateDictEntry(xRefTable, rootDict, "rootDict", "Legal", required, sinceVersion, nil) - if err != nil || d == nil { + if err != nil || len(d) == 0 { return err } - return errors.New("pdfcpu: validateLegal: not supported") + return errors.New("pdfcpu: \"Legal\" not supported") } func validateRequirementDict(xRefTable *model.XRefTable, d types.Dict, sinceVersion model.Version) error { @@ -699,7 +837,7 @@ func validateCollectionSortDict(xRefTable *model.XRefTable, d types.Dict) error return err } -func validateInitialView(s string) bool { return s == "D" || s == "T" || s == "H" } +func validateInitialView(s string) bool { return s == "D" || s == "T" || s == "H" || s == "C" } func validateCollection(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { // => 12.3.5 Collections @@ -760,8 +898,45 @@ func validateNeedsRendering(xRefTable *model.XRefTable, rootDict types.Dict, req return err } +func validateDSS(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { + // => 12.8.4.3 Document Security Store + + d, err := validateDictEntry(xRefTable, rootDict, "rootDict", "DSS", required, sinceVersion, nil) + if err != nil || d == nil { + return err + } + + xRefTable.DSS = d + + return nil +} + +func validateAF(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { + // => 14.13 Associated Files + + a, err := validateArrayEntry(xRefTable, rootDict, "rootDict", "AF", required, sinceVersion, nil) + if err != nil || len(a) == 0 { + return err + } + + return errors.New("pdfcpu: PDF2.0 \"AF\" not supported") +} + +func validateDPartRoot(xRefTable *model.XRefTable, rootDict types.Dict, required bool, sinceVersion model.Version) error { + // => 14.12 Document Parts + + d, err := validateDictEntry(xRefTable, rootDict, "rootDict", "DPartRoot", required, sinceVersion, nil) + if err != nil || len(d) == 0 { + return err + } + + return errors.New("pdfcpu: PDF2.0 \"DPartRoot\" not supported") +} + func logURIError(xRefTable *model.XRefTable, pages []int) { - fmt.Println() + if log.CLIEnabled() { + log.CLI.Println() + } for _, page := range pages { for uri, resp := range xRefTable.URIs[page] { if resp != "" { @@ -771,33 +946,21 @@ func logURIError(xRefTable *model.XRefTable, pages []int) { s = "invalid url" case "s": s = "severe error" + case "t": + s = "timeout" default: s = fmt.Sprintf("status=%s", resp) } if log.CLIEnabled() { - log.CLI.Printf("Page %d: %s %s\n", page, uri, s) + log.CLI.Printf("Page %d: %s - %s\n", page, uri, s) } } } } } -func checkForBrokenLinks(xRefTable *model.XRefTable) error { +func checkLinks(xRefTable *model.XRefTable, client http.Client, pages []int) bool { var httpErr bool - if log.CLIEnabled() { - log.CLI.Println("validating URIs..") - } - - pages := []int{} - for i := range xRefTable.URIs { - pages = append(pages, i) - } - sort.Ints(pages) - - client := http.Client{ - Timeout: 5 * time.Second, - } - for _, page := range pages { for uri := range xRefTable.URIs[page] { if log.CLIEnabled() { @@ -811,18 +974,55 @@ func checkForBrokenLinks(xRefTable *model.XRefTable) error { } res, err := client.Get(uri) if err != nil { + if e, ok := err.(net.Error); ok && e.Timeout() { + xRefTable.URIs[page][uri] = "t" + } else { + xRefTable.URIs[page][uri] = "s" + } httpErr = true - xRefTable.URIs[page][uri] = "s" continue } defer res.Body.Close() - if res.StatusCode != 200 { + if res.StatusCode != http.StatusOK { httpErr = true xRefTable.URIs[page][uri] = strconv.Itoa(res.StatusCode) continue } } } + return httpErr +} + +func checkForBrokenLinks(ctx *model.Context) error { + if !ctx.XRefTable.ValidateLinks { + return nil + } + if len(ctx.URIs) > 0 { + if ctx.Offline { + if log.CLIEnabled() { + log.CLI.Printf("pdfcpu is offline, can't validate Links") + } + return nil + } + } + + if log.CLIEnabled() { + log.CLI.Println("validating URIs..") + } + + xRefTable := ctx.XRefTable + + pages := []int{} + for i := range xRefTable.URIs { + pages = append(pages, i) + } + sort.Ints(pages) + + client := http.Client{ + Timeout: time.Duration(ctx.Timeout) * time.Second, + } + + httpErr := checkLinks(xRefTable, client, pages) if log.CLIEnabled() { logURIError(xRefTable, pages) @@ -835,7 +1035,7 @@ func checkForBrokenLinks(xRefTable *model.XRefTable) error { return nil } -func validateRootObject(xRefTable *model.XRefTable) error { +func validateRootObject(ctx *model.Context) error { if log.ValidateEnabled() { log.Validate.Println("*** validateRootObject begin ***") } @@ -878,13 +1078,19 @@ func validateRootObject(xRefTable *model.XRefTable) error { // AF y 2.0 array of dicts => 14.3 Associated Files TODO // DPartRoot y 2.0 dict => 14.12 Document parts TODO + xRefTable := ctx.XRefTable + d, err := xRefTable.Catalog() if err != nil { return err } // Type - _, err = validateNameEntry(xRefTable, d, "rootDict", "Type", REQUIRED, model.V10, func(s string) bool { return s == "Catalog" }) + required := true + if ctx.XRefTable.ValidationMode == model.ValidationRelaxed { + required = false + } + _, err = validateNameEntry(xRefTable, d, "rootDict", "Type", required, model.V10, func(s string) bool { return s == "Catalog" }) if err != nil { return err } @@ -903,7 +1109,7 @@ func validateRootObject(xRefTable *model.XRefTable) error { {validateRootVersion, OPTIONAL, model.V14}, {validateExtensions, OPTIONAL, model.V10}, {validatePageLabels, OPTIONAL, model.V13}, - {validateNames, OPTIONAL, model.V12}, + {validateNames, OPTIONAL, model.V11}, //model.V12}, {validateNamedDestinations, OPTIONAL, model.V11}, {validateViewerPreferences, OPTIONAL, model.V12}, {validatePageLayout, OPTIONAL, model.V10}, @@ -927,6 +1133,9 @@ func validateRootObject(xRefTable *model.XRefTable) error { {validateRequirements, OPTIONAL, model.V17}, {validateCollection, OPTIONAL, model.V17}, {validateNeedsRendering, OPTIONAL, model.V17}, + {validateDSS, OPTIONAL, model.V17}, + {validateAF, OPTIONAL, model.V20}, + {validateDPartRoot, OPTIONAL, model.V20}, } { if !f.required && xRefTable.Version() < f.sinceVersion { // Ignore optional fields if currentVersion < sinceVersion @@ -940,13 +1149,19 @@ func validateRootObject(xRefTable *model.XRefTable) error { } // Validate remainder of annotations after AcroForm validation only. - _, err = validatePagesAnnotations(xRefTable, rootPageNodeDict, 0) + if _, err = validatePagesAnnotations(xRefTable, rootPageNodeDict, 0); err != nil { + return err + } - if xRefTable.ValidateLinks && len(xRefTable.URIs) > 0 { - err = checkForBrokenLinks(xRefTable) + // Validate form fields against page annotations. + if xRefTable.Form != nil { + if err := validateFormFieldsAgainstPageAnnotations(xRefTable); err != nil { + return err + } } - if err == nil { + // Validate links. + if err = checkForBrokenLinks(ctx); err == nil { if log.ValidateEnabled() { log.Validate.Println("*** validateRootObject end ***") } @@ -954,8 +1169,3 @@ func validateRootObject(xRefTable *model.XRefTable) error { return err } - -func validateAdditionalStreams(xRefTable *model.XRefTable) error { - // Out of spec scope. - return nil -} diff --git a/pkg/pdfcpu/write.go b/pkg/pdfcpu/write.go index 73b09cbf..7925d755 100644 --- a/pkg/pdfcpu/write.go +++ b/pkg/pdfcpu/write.go @@ -60,8 +60,8 @@ func writeObjects(ctx *model.Context) error { return writeEncryptDict(ctx) } -// Write generates a PDF file for the cross reference table contained in Context. -func Write(ctx *model.Context) (err error) { +// WriteContext generates a PDF file for the cross reference table contained in Context. +func WriteContext(ctx *model.Context) (err error) { // Create a writer for dirname and filename if not already supplied. if ctx.Write.Writer == nil { @@ -98,11 +98,14 @@ func Write(ctx *model.Context) (err error) { return err } - // Since we support PDF Collections (since V1.7) for file attachments - // we need to generate V1.7 PDF files. + // if exists metadata, update from info dict + // else if v2 create from scratch + // else nothing just write info dict + + // We support PDF Collections (since V1.7) for file attachments v := model.V17 - if ctx.Version() == model.V20 { + if ctx.XRefTable.Version() == model.V20 { v = model.V20 } @@ -209,7 +212,7 @@ func ensureFileID(ctx *model.Context) error { } func ensureInfoDictAndFileID(ctx *model.Context) error { - if ctx.Version() < model.V20 { + if ctx.XRefTable.Version() < model.V20 { if err := ensureInfoDict(ctx); err != nil { return err } @@ -264,6 +267,11 @@ func writePages(ctx *model.Context, rootDict types.Dict) error { } func writeRootAttrsBatch1(ctx *model.Context, d types.Dict, dictName string) error { + + if err := writeAcroFormRootEntry(ctx, d, dictName); err != nil { + return err + } + for _, e := range []struct { entryName string statsAttr int @@ -280,7 +288,7 @@ func writeRootAttrsBatch1(ctx *model.Context, d types.Dict, dictName string) err {"OpenAction", model.RootOpenAction}, {"AA", model.RootAA}, {"URI", model.RootURI}, - {"AcroForm", model.RootAcroForm}, + //{"AcroForm", model.RootAcroForm}, {"Metadata", model.RootMetadata}, } { if err := writeRootEntry(ctx, d, dictName, e.entryName, e.statsAttr); err != nil { @@ -495,7 +503,7 @@ func deleteRedundantObject(ctx *model.Context, objNr int) { } if ctx.IsLinearizationObject(objNr) || ctx.Optimize.IsDuplicateInfoObject(objNr) || - ctx.Read.IsObjectStreamObject(objNr) || ctx.Read.IsXRefStreamObject(objNr) { + ctx.Read.IsObjectStreamObject(objNr) { ctx.FreeObject(objNr) } @@ -507,7 +515,7 @@ func detectLinearizationObjs(xRefTable *model.XRefTable, entry *model.XRefTableE if *entry.Offset == *xRefTable.OffsetPrimaryHintTable { xRefTable.LinearizationObjs[i] = true if log.WriteEnabled() { - log.Write.Printf("deleteRedundantObjects: primaryHintTable at obj #%d\n", i) + log.Write.Printf("detectLinearizationObjs: primaryHintTable at obj #%d\n", i) } } @@ -515,7 +523,7 @@ func detectLinearizationObjs(xRefTable *model.XRefTable, entry *model.XRefTableE *entry.Offset == *xRefTable.OffsetOverflowHintTable { xRefTable.LinearizationObjs[i] = true if log.WriteEnabled() { - log.Write.Printf("deleteRedundantObjects: overflowHintTable at obj #%d\n", i) + log.Write.Printf("detectLinearizationObjs: overflowHintTable at obj #%d\n", i) } } @@ -909,10 +917,11 @@ func setupEncryption(ctx *model.Context) error { var err error if ok := validateAlgorithm(ctx); !ok { - return errors.New("pdfcpu: unsupported encryption algorithm") + return errors.New("pdfcpu: unsupported encryption algorithm (PDF 2.0 assumes AES/256)") } d := newEncryptDict( + ctx.XRefTable.Version(), ctx.EncryptUsingAES, ctx.EncryptKeyLength, int16(ctx.Permissions), @@ -980,12 +989,13 @@ func updateEncryption(ctx *model.Context) error { ctx.OwnerPW = *ctx.OwnerPWNew } - if ctx.E.R == 5 { + if ctx.E.R == 5 || ctx.E.R == 6 { if err = calcOAndU(ctx, d); err != nil { return err } + // Calc Perms for rev 5, 6. return writePermissions(ctx, d) } @@ -1009,16 +1019,6 @@ func updateEncryption(ctx *model.Context) error { func handleEncryption(ctx *model.Context) error { - if ctx.Version() == model.V20 { - if ctx.Cmd == model.ENCRYPT || - ctx.Cmd == model.DECRYPT || - ctx.Cmd == model.CHANGEUPW || - ctx.Cmd == model.CHANGEOPW || - ctx.Cmd == model.SETPERMISSIONS { - return ErrUnsupportedVersion - } - } - if ctx.Cmd == model.ENCRYPT || ctx.Cmd == model.DECRYPT { if ctx.Cmd == model.DECRYPT { diff --git a/pkg/pdfcpu/writeImage.go b/pkg/pdfcpu/writeImage.go index 6deba119..8c2f4c29 100644 --- a/pkg/pdfcpu/writeImage.go +++ b/pkg/pdfcpu/writeImage.go @@ -91,8 +91,25 @@ func pdfImage(xRefTable *model.XRefTable, sd *types.StreamDict, thumb bool, objN bpc := *sd.IntEntry("BitsPerComponent") - w := *sd.IntEntry("Width") - h := *sd.IntEntry("Height") + obj, ok := sd.Find("Width") + if !ok { + return nil, errors.Errorf("pdfcpu: missing image width obj#%d", objNr) + } + i, err := xRefTable.DereferenceInteger(obj) + if err != nil { + return nil, err + } + w := i.Value() + + obj, ok = sd.Find("Height") + if !ok { + return nil, errors.Errorf("pdfcpu: missing image height obj#%d", objNr) + } + i, err = xRefTable.DereferenceInteger(obj) + if err != nil { + return nil, err + } + h := i.Value() decode := decodeArr(sd.ArrayEntry("Decode")) @@ -287,7 +304,7 @@ func imageForCMYKWithSoftMask(im *PDFImage) image.Image { return img } -func renderDeviceCMYKToTIFF(im *PDFImage, resourceName string) (io.Reader, string, error) { +func renderDeviceCMYKToTIFF(im *PDFImage) (io.Reader, string, error) { b := im.sd.Content if log.DebugEnabled() { log.Debug.Printf("renderDeviceCMYKToTIFF: CMYK objNr=%d w=%d h=%d bpc=%d buflen=%d\n", im.objNr, im.w, im.h, im.bpc, len(b)) @@ -312,7 +329,7 @@ func scaleToBPC8(v uint8, bpc int) uint8 { return uint8(float64(v) * 255.0 / float64(maxValForBits(bpc))) } -func renderDeviceGrayToPNG(im *PDFImage, resourceName string) (io.Reader, string, error) { +func renderDeviceGrayToPNG(im *PDFImage) (io.Reader, string, error) { b := im.sd.Content if log.DebugEnabled() { log.Debug.Printf("renderDeviceGrayToPNG: objNr=%d w=%d h=%d bpc=%d buflen=%d\n", im.objNr, im.w, im.h, im.bpc, len(b)) @@ -335,7 +352,7 @@ func renderDeviceGrayToPNG(im *PDFImage, resourceName string) (io.Reader, string for y := 0; y < im.h; y++ { for x := 0; x < im.w; { p := b[i] - for j := 0; j < 8/im.bpc; j++ { + for j := 0; j < 8/im.bpc && x < im.w; j++ { pix := p >> (8 - uint8(im.bpc)) v := decodePixelValue(pix, im.bpc, cvr) if im.bpc < 8 { @@ -361,7 +378,7 @@ func renderDeviceGrayToPNG(im *PDFImage, resourceName string) (io.Reader, string return &buf, "png", nil } -func renderDeviceRGBToPNG(im *PDFImage, resourceName string) (io.Reader, string, error) { +func renderDeviceRGBToPNG(im *PDFImage) (io.Reader, string, error) { b := im.sd.Content if log.DebugEnabled() { log.Debug.Printf("renderDeviceRGBToPNG: objNr=%d w=%d h=%d bpc=%d buflen=%d\n", im.objNr, im.w, im.h, im.bpc, len(b)) @@ -396,7 +413,7 @@ func renderDeviceRGBToPNG(im *PDFImage, resourceName string) (io.Reader, string, return &buf, "png", nil } -func renderCalRGBToPNG(im *PDFImage, resourceName string) (io.Reader, string, error) { +func renderCalRGBToPNG(im *PDFImage) (io.Reader, string, error) { b := im.sd.Content if log.DebugEnabled() { log.Debug.Printf("renderCalRGBToPNG: objNr=%d w=%d h=%d bpc=%d buflen=%d\n", im.objNr, im.w, im.h, im.bpc, len(b)) @@ -428,7 +445,7 @@ func renderCalRGBToPNG(im *PDFImage, resourceName string) (io.Reader, string, er return &buf, "png", nil } -func renderICCBased(xRefTable *model.XRefTable, im *PDFImage, resourceName string, cs types.Array) (io.Reader, string, error) { +func renderICCBased(xRefTable *model.XRefTable, im *PDFImage, cs types.Array) (io.Reader, string, error) { // Any ICC profile >= ICC.1:2004:10 is sufficient for any PDF version <= 1.7 // If the embedded ICC profile version is newer than the one used by the Reader, substitute with Alternate color space. @@ -460,21 +477,21 @@ func renderICCBased(xRefTable *model.XRefTable, im *PDFImage, resourceName strin switch n { case 1: // Gray - return renderDeviceGrayToPNG(im, resourceName) + return renderDeviceGrayToPNG(im) case 3: // RGB - return renderDeviceRGBToPNG(im, resourceName) + return renderDeviceRGBToPNG(im) case 4: // CMYK - return renderDeviceCMYKToTIFF(im, resourceName) + return renderDeviceCMYKToTIFF(im) } return nil, "", nil } -func renderIndexedGrayToPNG(im *PDFImage, resourceName string, lookup []byte) (io.Reader, string, error) { +func renderIndexedGrayToPNG(im *PDFImage, lookup []byte) (io.Reader, string, error) { b := im.sd.Content if log.DebugEnabled() { log.Debug.Printf("renderIndexedGrayToPNG: objNr=%d w=%d h=%d bpc=%d buflen=%d\n", im.objNr, im.w, im.h, im.bpc, len(b)) @@ -498,7 +515,7 @@ func renderIndexedGrayToPNG(im *PDFImage, resourceName string, lookup []byte) (i for y := 0; y < im.h; y++ { for x := 0; x < im.w; { p := b[i] - for j := 0; j < 8/im.bpc; j++ { + for j := 0; j < 8/im.bpc && x < im.w; j++ { ind := p >> (8 - uint8(im.bpc)) v := decodePixelValue(lookup[ind], im.bpc, cvr) if im.bpc < 8 { @@ -521,7 +538,7 @@ func renderIndexedGrayToPNG(im *PDFImage, resourceName string, lookup []byte) (i return &buf, "png", nil } -func renderIndexedRGBToPNG(im *PDFImage, resourceName string, lookup []byte) (io.Reader, string, error) { +func renderIndexedRGBToPNG(im *PDFImage, lookup []byte) (io.Reader, string, error) { b := im.sd.Content img := image.NewNRGBA(image.Rect(0, 0, im.w, im.h)) @@ -531,7 +548,7 @@ func renderIndexedRGBToPNG(im *PDFImage, resourceName string, lookup []byte) (io for y := 0; y < im.h; y++ { for x := 0; x < im.w; { p := b[i] - for j := 0; j < 8/im.bpc; j++ { + for j := 0; j < 8/im.bpc && x < im.w; j++ { ind := p >> (8 - uint8(im.bpc)) //fmt.Printf("x=%d y=%d i=%d j=%d p=#%02x ind=#%02x\n", x, y, i, j, p, ind) alpha := uint8(255) @@ -568,7 +585,7 @@ func imageForIndexedCMYKWithoutSoftMask(im *PDFImage, lookup []byte) image.Image for y := 0; y < im.h; y++ { for x := 0; x < im.w; { p := b[i] - for j := 0; j < 8/im.bpc; j++ { + for j := 0; j < 8/im.bpc && x < im.w; j++ { ind := p >> (8 - uint8(im.bpc)) //fmt.Printf("x=%d y=%d i=%d j=%d p=#%02x ind=#%02x\n", x, y, i, j, p, ind) l := 4 * int(ind) @@ -594,7 +611,7 @@ func imageForIndexedCMYKWithSoftMask(im *PDFImage, lookup []byte) image.Image { for y := 0; y < im.h; y++ { for x := 0; x < im.w; { p := b[i] - for j := 0; j < 8/im.bpc; j++ { + for j := 0; j < 8/im.bpc && x < im.w; j++ { ind := p >> (8 - uint8(im.bpc)) //fmt.Printf("x=%d y=%d i=%d j=%d p=#%02x ind=#%02x\n", x, y, i, j, p, ind) l := 4 * int(ind) @@ -611,7 +628,7 @@ func imageForIndexedCMYKWithSoftMask(im *PDFImage, lookup []byte) image.Image { return img } -func renderIndexedCMYKToTIFF(im *PDFImage, resourceName string, lookup []byte) (io.Reader, string, error) { +func renderIndexedCMYKToTIFF(im *PDFImage, lookup []byte) (io.Reader, string, error) { var img image.Image if im.softMask != nil { @@ -628,26 +645,26 @@ func renderIndexedCMYKToTIFF(im *PDFImage, resourceName string, lookup []byte) ( return &buf, "tif", nil } -func renderIndexedNameCS(im *PDFImage, resourceName string, cs types.Name, maxInd int, lookup []byte) (io.Reader, string, error) { +func renderIndexedNameCS(im *PDFImage, cs types.Name, maxInd int, lookup []byte) (io.Reader, string, error) { switch cs { case model.DeviceGrayCS: if len(lookup) < 1*(maxInd+1) { return nil, "", errors.Errorf("pdfcpu: renderIndexedNameCS: objNr=%d, corrupt DeviceGray lookup table\n", im.objNr) } - return renderIndexedGrayToPNG(im, resourceName, lookup) + return renderIndexedGrayToPNG(im, lookup) case model.DeviceRGBCS: if len(lookup) < 3*(maxInd+1) { return nil, "", errors.Errorf("pdfcpu: renderIndexedNameCS: objNr=%d, corrupt DeviceRGB lookup table\n", im.objNr) } - return renderIndexedRGBToPNG(im, resourceName, lookup) + return renderIndexedRGBToPNG(im, lookup) case model.DeviceCMYKCS: if len(lookup) < 4*(maxInd+1) { return nil, "", errors.Errorf("pdfcpu: renderIndexedNameCS: objNr=%d, corrupt DeviceCMYK lookup table\n", im.objNr) } - return renderIndexedCMYKToTIFF(im, resourceName, lookup) + return renderIndexedCMYKToTIFF(im, lookup) } if log.InfoEnabled() { @@ -657,7 +674,7 @@ func renderIndexedNameCS(im *PDFImage, resourceName string, cs types.Name, maxIn return nil, "", nil } -func renderIndexedArrayCS(xRefTable *model.XRefTable, im *PDFImage, resourceName string, csa types.Array, maxInd int, lookup []byte) (io.Reader, string, error) { +func renderIndexedArrayCS(xRefTable *model.XRefTable, im *PDFImage, csa types.Array, maxInd int, lookup []byte) (io.Reader, string, error) { b := im.sd.Content cs, _ := csa[0].(types.Name) @@ -667,7 +684,7 @@ func renderIndexedArrayCS(xRefTable *model.XRefTable, im *PDFImage, resourceName //case CalGrayCS: case model.CalRGBCS: - return renderIndexedRGBToPNG(im, resourceName, lookup) + return renderIndexedRGBToPNG(im, lookup) //case LabCS: // return renderIndexedRGBToPNG(im, resourceName, lookup) @@ -712,14 +729,14 @@ func renderIndexedArrayCS(xRefTable *model.XRefTable, im *PDFImage, resourceName case 3: // RGB - return renderIndexedRGBToPNG(im, resourceName, lookup) + return renderIndexedRGBToPNG(im, lookup) case 4: // CMYK if log.DebugEnabled() { log.Debug.Printf("renderIndexedArrayCS: CMYK objNr=%d w=%d h=%d bpc=%d buflen=%d\n", im.objNr, im.w, im.h, im.bpc, len(b)) } - return renderIndexedCMYKToTIFF(im, resourceName, lookup) + return renderIndexedCMYKToTIFF(im, lookup) } } @@ -730,7 +747,7 @@ func renderIndexedArrayCS(xRefTable *model.XRefTable, im *PDFImage, resourceName return nil, "", nil } -func renderIndexed(xRefTable *model.XRefTable, im *PDFImage, resourceName string, cs types.Array) (io.Reader, string, error) { +func renderIndexed(xRefTable *model.XRefTable, im *PDFImage, cs types.Array) (io.Reader, string, error) { // Identify the base color space. baseCS, _ := xRefTable.Dereference(cs[1]) @@ -763,29 +780,29 @@ func renderIndexed(xRefTable *model.XRefTable, im *PDFImage, resourceName string switch cs := baseCS.(type) { case types.Name: - return renderIndexedNameCS(im, resourceName, cs, maxInd.Value(), lookup) + return renderIndexedNameCS(im, cs, maxInd.Value(), lookup) case types.Array: - return renderIndexedArrayCS(xRefTable, im, resourceName, cs, maxInd.Value(), lookup) + return renderIndexedArrayCS(xRefTable, im, cs, maxInd.Value(), lookup) } return nil, "", nil } -func renderDeviceN(xRefTable *model.XRefTable, im *PDFImage, resourceName string, cs types.Array) (io.Reader, string, error) { +func renderDeviceN(im *PDFImage, cs types.Array) (io.Reader, string, error) { if im.comp <= 4 { switch im.comp { case 1: // Gray - return renderDeviceGrayToPNG(im, resourceName) + return renderDeviceGrayToPNG(im) case 3: // RGB - return renderDeviceRGBToPNG(im, resourceName) + return renderDeviceRGBToPNG(im) case 4: // CMYK - return renderDeviceCMYKToTIFF(im, resourceName) + return renderDeviceCMYKToTIFF(im) } } @@ -797,21 +814,21 @@ func renderDeviceN(xRefTable *model.XRefTable, im *PDFImage, resourceName string switch alternateCS { case model.DeviceGrayCS: // Gray - return renderDeviceGrayToPNG(im, resourceName) + return renderDeviceGrayToPNG(im) case model.DeviceRGBCS: // RGB - return renderDeviceRGBToPNG(im, resourceName) + return renderDeviceRGBToPNG(im) case model.DeviceCMYKCS: // CMYK - return renderDeviceCMYKToTIFF(im, resourceName) + return renderDeviceCMYKToTIFF(im) } return nil, "", nil } -func renderImage(xRefTable *model.XRefTable, sd *types.StreamDict, thumb bool, resourceName string, objNr int) (io.Reader, string, error) { +func renderImage(xRefTable *model.XRefTable, sd *types.StreamDict, thumb bool, objNr int) (io.Reader, string, error) { // If color space is CMYK then write .tif else write .png pdfImage, err := pdfImage(xRefTable, sd, thumb, objNr) @@ -830,13 +847,13 @@ func renderImage(xRefTable *model.XRefTable, sd *types.StreamDict, thumb bool, r switch cs { case model.DeviceGrayCS: - return renderDeviceGrayToPNG(pdfImage, resourceName) + return renderDeviceGrayToPNG(pdfImage) case model.DeviceRGBCS: - return renderDeviceRGBToPNG(pdfImage, resourceName) + return renderDeviceRGBToPNG(pdfImage) case model.DeviceCMYKCS: - return renderDeviceCMYKToTIFF(pdfImage, resourceName) + return renderDeviceCMYKToTIFF(pdfImage) default: if log.InfoEnabled() { @@ -850,19 +867,19 @@ func renderImage(xRefTable *model.XRefTable, sd *types.StreamDict, thumb bool, r switch csn { case model.CalRGBCS: - return renderCalRGBToPNG(pdfImage, resourceName) + return renderCalRGBToPNG(pdfImage) case model.DeviceNCS: - return renderDeviceN(xRefTable, pdfImage, resourceName, cs) + return renderDeviceN(pdfImage, cs) case model.ICCBasedCS: - return renderICCBased(xRefTable, pdfImage, resourceName, cs) + return renderICCBased(xRefTable, pdfImage, cs) case model.IndexedCS: - return renderIndexed(xRefTable, pdfImage, resourceName, cs) + return renderIndexed(xRefTable, pdfImage, cs) case model.SeparationCS: - return renderDeviceN(xRefTable, pdfImage, resourceName, cs) + return renderDeviceN(pdfImage, cs) default: if log.InfoEnabled() { @@ -886,7 +903,7 @@ func decodeCMYK(c, m, y, k uint8, decode []colValRange) (uint8, uint8, uint8, ui return c, m, y, k } -func renderCMYKToPng(im *PDFImage, resourceName string) (io.Reader, string, error) { +func renderCMYKToPng(im *PDFImage) (io.Reader, string, error) { bb := bytes.NewReader(im.sd.Content) dec := gob.NewDecoder(bb) @@ -915,13 +932,13 @@ func renderCMYKToPng(im *PDFImage, resourceName string) (io.Reader, string, erro return &buf, "png", nil } -func renderDCTToPNG(xRefTable *model.XRefTable, sd *types.StreamDict, thumb bool, resourceName string, objNr int) (io.Reader, string, error) { +func renderDCTToPNG(xRefTable *model.XRefTable, sd *types.StreamDict, thumb bool, objNr int) (io.Reader, string, error) { im, err := pdfImage(xRefTable, sd, thumb, objNr) if err != nil { return nil, "", err } - return renderCMYKToPng(im, resourceName) + return renderCMYKToPng(im) } // RenderImage returns a reader for a decoded image stream. @@ -929,19 +946,19 @@ func RenderImage(xRefTable *model.XRefTable, sd *types.StreamDict, thumb bool, r // Image compression is the last filter in the pipeline. if len(sd.FilterPipeline) == 0 { - return renderImage(xRefTable, sd, thumb, resourceName, objNr) + return renderImage(xRefTable, sd, thumb, objNr) } f := sd.FilterPipeline[len(sd.FilterPipeline)-1].Name switch f { - case filter.Flate, filter.CCITTFax, filter.RunLength: - return renderImage(xRefTable, sd, thumb, resourceName, objNr) + case filter.Flate, filter.LZW, filter.CCITTFax, filter.RunLength: + return renderImage(xRefTable, sd, thumb, objNr) case filter.DCT: if sd.CSComponents == 4 { - return renderDCTToPNG(xRefTable, sd, thumb, resourceName, objNr) + return renderDCTToPNG(xRefTable, sd, thumb, objNr) } return bytes.NewReader(sd.Content), "jpg", nil diff --git a/pkg/pdfcpu/writeObjects.go b/pkg/pdfcpu/writeObjects.go index f6bf4aaa..7aa15989 100644 --- a/pkg/pdfcpu/writeObjects.go +++ b/pkg/pdfcpu/writeObjects.go @@ -284,7 +284,7 @@ func writeNameObject(ctx *model.Context, objNumber, genNumber int, name types.Na return writeObject(ctx, objNumber, genNumber, name.PDFString()) } -func writeStringLiteralObject(ctx *model.Context, objNumber, genNumber int, stringLiteral types.StringLiteral) error { +func writeStringLiteralObject(ctx *model.Context, objNumber, genNumber int, sl types.StringLiteral) error { ok, err := writeToObjectStream(ctx, objNumber, genNumber) if err != nil { return err @@ -294,21 +294,19 @@ func writeStringLiteralObject(ctx *model.Context, objNumber, genNumber int, stri return nil } - sl := stringLiteral - if ctx.EncKey != nil { - s1, err := encryptString(stringLiteral.Value(), objNumber, genNumber, ctx.EncKey, ctx.AES4Strings, ctx.E.R) + sl1, err := encryptStringLiteral(sl, objNumber, genNumber, ctx.EncKey, ctx.AES4Strings, ctx.E.R) if err != nil { return err } - sl = types.StringLiteral(*s1) + sl = *sl1 } return writeObject(ctx, objNumber, genNumber, sl.PDFString()) } -func writeHexLiteralObject(ctx *model.Context, objNumber, genNumber int, hexLiteral types.HexLiteral) error { +func writeHexLiteralObject(ctx *model.Context, objNumber, genNumber int, hl types.HexLiteral) error { ok, err := writeToObjectStream(ctx, objNumber, genNumber) if err != nil { return err @@ -318,30 +316,19 @@ func writeHexLiteralObject(ctx *model.Context, objNumber, genNumber int, hexLite return nil } - hl := hexLiteral - if ctx.EncKey != nil { - s1, err := encryptString(hexLiteral.Value(), objNumber, genNumber, ctx.EncKey, ctx.AES4Strings, ctx.E.R) + hl1, err := encryptHexLiteral(hl, objNumber, genNumber, ctx.EncKey, ctx.AES4Strings, ctx.E.R) if err != nil { return err } - hl = types.HexLiteral(*s1) + hl = *hl1 } return writeObject(ctx, objNumber, genNumber, hl.PDFString()) } func writeIntegerObject(ctx *model.Context, objNumber, genNumber int, integer types.Integer) error { - ok, err := writeToObjectStream(ctx, objNumber, genNumber) - if err != nil { - return err - } - - if ok { - return nil - } - return writeObject(ctx, objNumber, genNumber, integer.PDFString()) } @@ -580,7 +567,7 @@ func writeNullObject(ctx *model.Context, objNumber, genNumber int) error { func writeDeepDict(ctx *model.Context, d types.Dict, objNr, genNr int) error { if d.IsPage() { - valid, err := ctx.IsValidObj(objNr, genNr) + valid, err := ctx.IsObjValid(objNr, genNr) if err != nil { return err } @@ -643,6 +630,15 @@ func writeDeepArray(ctx *model.Context, a types.Array, objNr, genNr int) error { return nil } +func writeLazyObjectStreamObject(ctx *model.Context, objNr, genNr int, o types.LazyObjectStreamObject) error { + data, err := o.GetData() + if err != nil { + return err + } + + return writeObject(ctx, objNr, genNr, string(data)) +} + func writeObjectGeneric(ctx *model.Context, o types.Object, objNr, genNr int) (err error) { switch o := o.(type) { @@ -673,6 +669,9 @@ func writeObjectGeneric(ctx *model.Context, o types.Object, objNr, genNr int) (e case types.Name: err = writeNameObject(ctx, objNr, genNr, o) + case types.LazyObjectStreamObject: + err = writeLazyObjectStreamObject(ctx, objNr, genNr, o) + default: err = errors.Errorf("writeIndirectObject: undefined PDF object #%d %T\n", objNr, o) } @@ -691,7 +690,7 @@ func writeIndirectObject(ctx *model.Context, ir types.IndirectRef) error { return nil } - o, err := ctx.Dereference(ir) + o, err := ctx.DereferenceForWrite(ir) if err != nil { return errors.Wrapf(err, "writeIndirectObject: unable to dereference indirect object #%d", objNr) } diff --git a/pkg/pdfcpu/writePages.go b/pkg/pdfcpu/writePages.go index ba34a559..49161742 100644 --- a/pkg/pdfcpu/writePages.go +++ b/pkg/pdfcpu/writePages.go @@ -255,13 +255,6 @@ func writePagesDict(ctx *model.Context, indRef *types.IndirectRef, pageNr *int) // Push count, kids. countOrig, _ := d.Find("Count") - c := countOrig.(types.Integer).Value() - - if c == 0 { - // Ignore empty page tree. - return true, 0, nil - } - kidsOrig := d.ArrayEntry("Kids") // Iterate over page tree. diff --git a/pkg/pdfcpu/writeSignature.go b/pkg/pdfcpu/writeSignature.go new file mode 100644 index 00000000..d9ff74a2 --- /dev/null +++ b/pkg/pdfcpu/writeSignature.go @@ -0,0 +1,228 @@ +/* +Copyright 2024 The pdfcpu Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pdfcpu + +import ( + "fmt" + "strings" + + "github.com/angel-one/pdfcpu/pkg/pdfcpu/model" + "github.com/angel-one/pdfcpu/pkg/pdfcpu/types" + "github.com/pkg/errors" +) + +func sigDictPDFString(d types.Dict) string { + s := []string{} + s = append(s, "<<") + s = append(s, fmt.Sprintf("/ByteRange%-62v", d["ByteRange"].PDFString())) + s = append(s, fmt.Sprintf("/Contents%s", d["Contents"].PDFString())) + s = append(s, fmt.Sprintf("/Type%s", d["Type"].PDFString())) + s = append(s, fmt.Sprintf("/Filter%s", d["Filter"].PDFString())) + s = append(s, fmt.Sprintf("/SubFilter%s", d["SubFilter"].PDFString())) + s = append(s, ">>") + return strings.Join(s, "") +} + +func writeSigDict(ctx *model.Context, ir types.IndirectRef) error { + // << + // + // > + // + // + // + // >> + + d, err := ctx.DereferenceDict(ir) + if err != nil { + return err + } + + typ := d.NameEntry("Type") + if typ == nil || *typ != "Sig" { + return errors.New("corrupt sig dict") + } + + f := d.NameEntry("Filter") + if f == nil || *f != "Adobe.PPKLite" { + return errors.Errorf("sig dict: unexpected Filter: %s", *f) + } + + f = d.NameEntry("SubFilter") + if f == nil || *f != "adbe.pkcs7.detached" { + return errors.Errorf("sig dict: unexpected SubFilter: %s", *f) + } + + objNr := ir.ObjectNumber.Value() + genNr := ir.GenerationNumber.Value() + + // Set write-offset for this object. + w := ctx.Write + w.SetWriteOffset(objNr) + + written, err := writeObjectHeader(w, objNr, genNr) + if err != nil { + return err + } + + // < + w.OffsetSigContents = w.OffsetSigByteRange + 1 + 60 + 1 + 9 + // 1 for "[" + // 60 for max 60 chars within this array PDF string. + // 1 for "]" + // 9 for "/Contents<" + + i, err := w.WriteString(sigDictPDFString(d)) + if err != nil { + return err + } + + j, err := writeObjectTrailer(w) + if err != nil { + return err + } + + // Write-offset for next object. + w.Offset += int64(written + i + j) + + // Record writeOffset for first and last char of Contents. + + // Record writeOffset for ByteArray... + + return nil +} + +func writeSigFieldDict(ctx *model.Context, d types.Dict, objNr, genNr int) error { + // << + // + // + // + // + // + // + // + // >> + + if err := writeDictObject(ctx, objNr, genNr, d); err != nil { + return err + } + + ir := d.IndirectRefEntry("V") + if ir == nil { + return errors.New("sig field dict: missing V") + } + + return writeSigDict(ctx, *ir) +} + +func writeBlankSignature(ctx *model.Context, d types.Dict, objNr, genNr int) error { + + // << + // + // >>> + // >>> + // + // + // >> + + if err := writeDictObject(ctx, objNr, genNr, d); err != nil { + return err + } + + // Write font resource + resDict := d.DictEntry("DR") + fontResDict := resDict.DictEntry("Font") + ir := fontResDict.IndirectRefEntry("Courier") + if err := writeIndirectObject(ctx, *ir); err != nil { + return err + } + + // Write fields + a := d.ArrayEntry("Fields") + if a == nil { + return errors.New("acroform dict: missing Fields") + } + for _, o := range a { + ir, ok := o.(types.IndirectRef) + if !ok { + return errors.New("acroform dict fields: expecting indRef") + } + d, err := ctx.DereferenceDict(ir) + if err != nil { + return err + } + ft := d.NameEntry("FT") + if ft == nil || *ft != "Sig" { + if err := writeIndirectObject(ctx, ir); err != nil { + return err + } + continue + } + objNr := ir.ObjectNumber.Value() + genNr := ir.GenerationNumber.Value() + writeSigFieldDict(ctx, d, objNr, genNr) + } + return nil +} + +func writeAcroFormRootEntry(ctx *model.Context, d types.Dict, dictName string) error { + o, found := d.Find("AcroForm") + if !found || o == nil { + return nil + } + + if ctx.Cmd != model.ADDSIGNATURE { + if err := writeRootEntry(ctx, d, dictName, "AcroForm", model.RootAcroForm); err != nil { + return err + } + ctx.Stats.AddRootAttr(model.RootAcroForm) + return nil + } + + // TODO distinguish between + // A) PDF is not signed => write new Acroform with single SigField + // B) Acroform is not signed => add Sigfield to existing Acroform + // C) PDF is already signed => add Sigfield to existing Acroform via incremental update + + // Handle A) + indRef, ok := o.(types.IndirectRef) + if !ok { + return errors.New("pdfcpu: add signature: missing Acroform object") + } + + d1, err := ctx.DereferenceDict(indRef) + if err != nil { + return err + } + + objNr := indRef.ObjectNumber.Value() + genNr := indRef.GenerationNumber.Value() + + if err := writeBlankSignature(ctx, d1, objNr, genNr); err != nil { + return err + } + + ctx.Stats.AddRootAttr(model.RootAcroForm) + + return nil +} diff --git a/pkg/pdfcpu/writeStats.go b/pkg/pdfcpu/writeStats.go index 6af617a5..01e97b21 100644 --- a/pkg/pdfcpu/writeStats.go +++ b/pkg/pdfcpu/writeStats.go @@ -72,10 +72,6 @@ func logWriteStats(ctx *model.Context) { l, str = ctx.Read.ObjectStreamsString() log.Stats.Printf("%d original objectStream entries: %s", l, str) - // XRefStreams - l, str = ctx.Read.XRefStreamsString() - log.Stats.Printf("%d original xrefStream entries: %s", l, str) - // Linearization objects l, str = ctx.LinearizationObjsString() log.Stats.Printf("%d original linearization entries: %s", l, str) diff --git a/pkg/pdfcpu/zoom.go b/pkg/pdfcpu/zoom.go index 1858b43d..c90b4d89 100644 --- a/pkg/pdfcpu/zoom.go +++ b/pkg/pdfcpu/zoom.go @@ -122,7 +122,7 @@ func zoomPage(ctx *model.Context, pageNr int, zoom *model.Zoom) error { var trans bytes.Buffer fmt.Fprintf(&trans, "q %.5f %.5f %.5f %.5f %.5f %.5f cm ", m[0][0], m[0][1], m[1][0], m[1][1], m[2][0], m[2][1]) - bb, err := ctx.PageContent(d) + bb, err := ctx.PageContent(d, pageNr) if err == model.ErrNoContent { return nil } diff --git a/pkg/samples/annotations/Annotations.pdf b/pkg/samples/annotations/Annotations.pdf new file mode 100644 index 00000000..cd193f12 Binary files /dev/null and b/pkg/samples/annotations/Annotations.pdf differ diff --git a/pkg/samples/annotations/CaretAnnotation.pdf b/pkg/samples/annotations/CaretAnnotation.pdf new file mode 100644 index 00000000..87952c03 Binary files /dev/null and b/pkg/samples/annotations/CaretAnnotation.pdf differ diff --git a/pkg/samples/annotations/FreeTextAnnotation.pdf b/pkg/samples/annotations/FreeTextAnnotation.pdf new file mode 100644 index 00000000..71d4b8c2 Binary files /dev/null and b/pkg/samples/annotations/FreeTextAnnotation.pdf differ diff --git a/pkg/samples/annotations/HighlightAnnotation.pdf b/pkg/samples/annotations/HighlightAnnotation.pdf new file mode 100644 index 00000000..e6b27c93 Binary files /dev/null and b/pkg/samples/annotations/HighlightAnnotation.pdf differ diff --git a/pkg/samples/annotations/InkAnnotation.pdf b/pkg/samples/annotations/InkAnnotation.pdf new file mode 100644 index 00000000..020c7d72 Binary files /dev/null and b/pkg/samples/annotations/InkAnnotation.pdf differ diff --git a/pkg/samples/annotations/LineAnnotation.pdf b/pkg/samples/annotations/LineAnnotation.pdf new file mode 100644 index 00000000..76584f74 Binary files /dev/null and b/pkg/samples/annotations/LineAnnotation.pdf differ diff --git a/pkg/samples/annotations/LinkAnnotWithDestTopLeft.pdf b/pkg/samples/annotations/LinkAnnotWithDestTopLeft.pdf index d65e4da4..5985684d 100644 Binary files a/pkg/samples/annotations/LinkAnnotWithDestTopLeft.pdf and b/pkg/samples/annotations/LinkAnnotWithDestTopLeft.pdf differ diff --git a/pkg/samples/annotations/PolyLineAnnotation.pdf b/pkg/samples/annotations/PolyLineAnnotation.pdf new file mode 100644 index 00000000..c64cdd6b Binary files /dev/null and b/pkg/samples/annotations/PolyLineAnnotation.pdf differ diff --git a/pkg/samples/annotations/PolygonAnnotation.pdf b/pkg/samples/annotations/PolygonAnnotation.pdf new file mode 100644 index 00000000..eebebe7c Binary files /dev/null and b/pkg/samples/annotations/PolygonAnnotation.pdf differ diff --git a/pkg/samples/annotations/PopupAnnotation.pdf b/pkg/samples/annotations/PopupAnnotation.pdf new file mode 100644 index 00000000..3e08eeaa Binary files /dev/null and b/pkg/samples/annotations/PopupAnnotation.pdf differ diff --git a/pkg/samples/annotations/SquigglyAnnotation.pdf b/pkg/samples/annotations/SquigglyAnnotation.pdf new file mode 100644 index 00000000..5c5fd8cf Binary files /dev/null and b/pkg/samples/annotations/SquigglyAnnotation.pdf differ diff --git a/pkg/samples/annotations/StrikeOutAnnotation.pdf b/pkg/samples/annotations/StrikeOutAnnotation.pdf new file mode 100644 index 00000000..582d90c0 Binary files /dev/null and b/pkg/samples/annotations/StrikeOutAnnotation.pdf differ diff --git a/pkg/samples/annotations/TestAnnotations.pdf b/pkg/samples/annotations/TestAnnotations.pdf deleted file mode 100644 index 46fb45cb..00000000 Binary files a/pkg/samples/annotations/TestAnnotations.pdf and /dev/null differ diff --git a/pkg/samples/annotations/TestAnnotationsFile.pdf b/pkg/samples/annotations/TestAnnotationsFile.pdf deleted file mode 100644 index 85885c77..00000000 Binary files a/pkg/samples/annotations/TestAnnotationsFile.pdf and /dev/null differ diff --git a/pkg/samples/annotations/UnderlineAnnotation.pdf b/pkg/samples/annotations/UnderlineAnnotation.pdf new file mode 100644 index 00000000..226a05e5 Binary files /dev/null and b/pkg/samples/annotations/UnderlineAnnotation.pdf differ diff --git a/pkg/samples/booklet/BookletFromImagesA4_2Up.pdf b/pkg/samples/booklet/BookletFromImagesA4_2Up.pdf index e03e18b4..da6b123d 100644 Binary files a/pkg/samples/booklet/BookletFromImagesA4_2Up.pdf and b/pkg/samples/booklet/BookletFromImagesA4_2Up.pdf differ diff --git a/pkg/samples/booklet/BookletFromImagesA4_4Up.pdf b/pkg/samples/booklet/BookletFromImagesA4_4Up.pdf index 9eb483a5..798f965c 100644 Binary files a/pkg/samples/booklet/BookletFromImagesA4_4Up.pdf and b/pkg/samples/booklet/BookletFromImagesA4_4Up.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDF8Up.pdf b/pkg/samples/booklet/BookletFromPDF8Up.pdf new file mode 100644 index 00000000..56d82606 Binary files /dev/null and b/pkg/samples/booklet/BookletFromPDF8Up.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDF8UpLandscapeLong.pdf b/pkg/samples/booklet/BookletFromPDF8UpLandscapeLong.pdf new file mode 100644 index 00000000..40fd4659 Binary files /dev/null and b/pkg/samples/booklet/BookletFromPDF8UpLandscapeLong.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDF8UpLandscapeShort.pdf b/pkg/samples/booklet/BookletFromPDF8UpLandscapeShort.pdf new file mode 100644 index 00000000..6336378f Binary files /dev/null and b/pkg/samples/booklet/BookletFromPDF8UpLandscapeShort.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDF8UpPortraitShort.pdf b/pkg/samples/booklet/BookletFromPDF8UpPortraitShort.pdf new file mode 100644 index 00000000..38943f4f Binary files /dev/null and b/pkg/samples/booklet/BookletFromPDF8UpPortraitShort.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFA4_2Up.pdf b/pkg/samples/booklet/BookletFromPDFA4_2Up.pdf index a97ed494..54371b3d 100644 Binary files a/pkg/samples/booklet/BookletFromPDFA4_2Up.pdf and b/pkg/samples/booklet/BookletFromPDFA4_2Up.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFA4_4Up.pdf b/pkg/samples/booklet/BookletFromPDFA4_4Up.pdf index 86d71fa6..48f8666e 100644 Binary files a/pkg/samples/booklet/BookletFromPDFA4_4Up.pdf and b/pkg/samples/booklet/BookletFromPDFA4_4Up.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_4Up.pdf b/pkg/samples/booklet/BookletFromPDFLedger_4Up.pdf index 4b38ca67..89942b5d 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_4Up.pdf and b/pkg/samples/booklet/BookletFromPDFLedger_4Up.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_4UpWithTrailingBlankPages.pdf b/pkg/samples/booklet/BookletFromPDFLedger_4UpWithTrailingBlankPages.pdf index 7c193776..1166bea8 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_4UpWithTrailingBlankPages.pdf and b/pkg/samples/booklet/BookletFromPDFLedger_4UpWithTrailingBlankPages.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_long.pdf b/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_long.pdf index a50b1a9f..a7d22537 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_long.pdf and b/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_long.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_short.pdf b/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_short.pdf index 3e9c495d..2702fc98 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_short.pdf and b/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_short.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_short_advanced.pdf b/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_short_advanced.pdf index a59528ba..7e5a36f6 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_short_advanced.pdf and b/pkg/samples/booklet/BookletFromPDFLedger_4Up_landscape_short_advanced.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_4Up_perfectbound.pdf b/pkg/samples/booklet/BookletFromPDFLedger_4Up_perfectbound.pdf index f1dd14cc..d816abef 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_4Up_perfectbound.pdf and b/pkg/samples/booklet/BookletFromPDFLedger_4Up_perfectbound.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_4Up_portrait_long_advanced.pdf b/pkg/samples/booklet/BookletFromPDFLedger_4Up_portrait_long_advanced.pdf index e812c6f5..3570027e 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_4Up_portrait_long_advanced.pdf and b/pkg/samples/booklet/BookletFromPDFLedger_4Up_portrait_long_advanced.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_4Up_portrait_short.pdf b/pkg/samples/booklet/BookletFromPDFLedger_4Up_portrait_short.pdf index 1ae57184..82ffb556 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_4Up_portrait_short.pdf and b/pkg/samples/booklet/BookletFromPDFLedger_4Up_portrait_short.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_6Up.pdf b/pkg/samples/booklet/BookletFromPDFLedger_6Up.pdf index 13e83697..36a9cdd9 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_6Up.pdf and b/pkg/samples/booklet/BookletFromPDFLedger_6Up.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLedger_8Up.pdf b/pkg/samples/booklet/BookletFromPDFLedger_8Up.pdf deleted file mode 100644 index f4e4e636..00000000 Binary files a/pkg/samples/booklet/BookletFromPDFLedger_8Up.pdf and /dev/null differ diff --git a/pkg/samples/booklet/BookletFromPDFLetter_2Up.pdf b/pkg/samples/booklet/BookletFromPDFLetter_2Up.pdf index 2907cef1..2f249546 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLetter_2Up.pdf and b/pkg/samples/booklet/BookletFromPDFLetter_2Up.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLetter_2UpWithTrailingBlankPages.pdf b/pkg/samples/booklet/BookletFromPDFLetter_2UpWithTrailingBlankPages.pdf index 4b16998f..1fe39da6 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLetter_2UpWithTrailingBlankPages.pdf and b/pkg/samples/booklet/BookletFromPDFLetter_2UpWithTrailingBlankPages.pdf differ diff --git a/pkg/samples/booklet/BookletFromPDFLetter_2Up_perfectbound.pdf b/pkg/samples/booklet/BookletFromPDFLetter_2Up_perfectbound.pdf index a5bac195..1000cae0 100644 Binary files a/pkg/samples/booklet/BookletFromPDFLetter_2Up_perfectbound.pdf and b/pkg/samples/booklet/BookletFromPDFLetter_2Up_perfectbound.pdf differ diff --git a/pkg/samples/booklet/HardbackBookFromPDF.pdf b/pkg/samples/booklet/HardbackBookFromPDF.pdf index 6a34ea0f..7e8212d0 100644 Binary files a/pkg/samples/booklet/HardbackBookFromPDF.pdf and b/pkg/samples/booklet/HardbackBookFromPDF.pdf differ diff --git a/pkg/samples/bookmarks/bookmarkDuplicates.pdf b/pkg/samples/bookmarks/bookmarkDuplicates.pdf new file mode 100644 index 00000000..1a1dae0c Binary files /dev/null and b/pkg/samples/bookmarks/bookmarkDuplicates.pdf differ diff --git a/pkg/samples/form/multifill/csv/merge/person.pdf b/pkg/samples/form/multifill/csv/merge/person.pdf index 3933bf48..b3b0fd44 100644 Binary files a/pkg/samples/form/multifill/csv/merge/person.pdf and b/pkg/samples/form/multifill/csv/merge/person.pdf differ diff --git a/pkg/samples/form/multifill/csv/person.csv b/pkg/samples/form/multifill/csv/person.csv index 8bfbc85e..bc594613 100644 --- a/pkg/samples/form/multifill/csv/person.csv +++ b/pkg/samples/form/multifill/csv/person.csv @@ -12,7 +12,7 @@ "Niloofar","Hamedi","*22.10.1992","*t","female","Iran","Journalist","imprisoned","CC BY 4.0","https://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Niloofar_Hamedi_04.jpg/206px-Niloofar_Hamedi_04.jpg,(https://en.wikipedia.org/wiki/Niloofar_Hamedi)" "Werner Karl","Heisenberg","*05.12.1901","*t","male","Germany","Physicist","*deceased","CC BY-SA 3.0 de","https://upload.wikimedia.org/wikipedia/commons/f/f8/Bundesarchiv_Bild183-R57262%2C_Werner_Heisenberg.jpg,(https://en.wikipedia.org/wiki/Werner_Heisenberg)" "Hindou Oumarou","Ibrahim","","f","female","Chad","Environmental Activist","alive","CC BY-SA 4.0","https://upload.wikimedia.org/wikipedia/commons/thumb/7/7d/Hindou_Oumarou_Ibrahim.jpg/237px-Hindou_Oumarou_Ibrahim.jpg,(https://en.wikipedia.org/wiki/Hindou_Oumarou_Ibrahim)" -"Vladimir","Kara-Murza","*07.11.1981","*t","male","Russia","Journalist","imprisoned","CC BY-SA 4.0","https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/Vladimir_V._Kara-Murza_%282017%29.jpg/384px-Vladimir_V._Kara-Murza_%282017%29.jpg,(https://en.wikipedia.org/wiki/Vladimir_Kara-Murza)" +"Vladimir","Kara-Murza","*07.11.1981","*t","male","Russia","Journalist","alive","CC BY-SA 4.0","https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/Vladimir_V._Kara-Murza_%282017%29.jpg/384px-Vladimir_V._Kara-Murza_%282017%29.jpg,(https://en.wikipedia.org/wiki/Vladimir_Kara-Murza)" "Nathan","Law","*13.07.1993","*t","male","Hong Kong","Activist","alive","CC BY-SA 4.0","https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/Nathan_Law_%282022%29_II.jpg/384px-Nathan_Law_%282022%29_II.jpg,(https://en.wikipedia.org/wiki/Nathan_Law)" "Alexander","Litvinenko","*30.08.1962","*t","male","Russia","Journalist","*killed","fair use","https://upload.wikimedia.org/wikipedia/en/9/99/AlexanderLitvinenko.jpg,(https://en.wikipedia.org/wiki/Alexander_Litvinenko)" "Gopi Shankar","Madurai","*13.04.1991","*t","non-binary","India","Activist","alive","unknown","https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQm6N-rADTwRyO6dLvc8E0qOnIBBOwdMUcIV8v6oKjOMLAo8QS_,(https://en.wikipedia.org/wiki/Gopi_Shankar_Madurai)" @@ -30,7 +30,7 @@ "Roman","Protasevich","*05.05.1995","*t","male","Belarus","Activist","alive","CC BY 3.0","https://upload.wikimedia.org/wikipedia/commons/thumb/7/7e/%D0%A0%D0%BE%D0%BC%D0%B0%D0%BD_%D0%9F%D1%80%D0%BE%D1%82%D0%B0%D1%81%D0%B5%D0%B2%D0%B8%D1%87.jpg/168px-%D0%A0%D0%BE%D0%BC%D0%B0%D0%BD_%D0%9F%D1%80%D0%BE%D1%82%D0%B0%D1%81%D0%B5%D0%B2%D0%B8%D1%87.jpg,(https://en.wikipedia.org/wiki/Roman_Protasevich)" "Tarcila","Rivera Zea","*24.12.1950","*t","female","Peru","Activist","alive","Twitter","https://pbs.twimg.com/profile_images/856320137166815233/aK5MLAvu_400x400.jpg,(https://en.wikipedia.org/wiki/Tarcila_Rivera_Zea)" "Yevgeny","Roizman","*14.09.1962","*t","male","Russia","Politician","alive","CC BY-SA 4.0","https://upload.wikimedia.org/wikipedia/commons/thumb/e/e5/Roizman_Yevgeny.png/185px-Roizman_Yevgeny.png,(https://en.wikipedia.org/wiki/Yevgeny_Roizman)" -"Yuri Petrovich","Shchekochikhin","*09.06.1950","*t","male","Azerbaijan","Journalist","killed","fair use","https://upload.wikimedia.org/wikipedia/en/c/cb/Yuri_Shchekochikhin.png,(https://en.wikipedia.org/wiki/Yuri_Shchekochikhin)" +"Yuri Petrovich","Shchekochikhin","*09.06.1950","*t","male","Azerbaijan","Journalist","killed","fair use","https://upload.wikimedia.org/wikipedia/commons/f/f4/Yuri_Shchekochikhin.jpg,(https://en.wikipedia.org/wiki/Yuri_Shchekochikhin)" "Marina","Silva","08.02.1958","*t","female","Brazil","Minister of Climate Change","alive","Twitter","https://pbs.twimg.com/profile_images/1577255224741400576/_1Vi_i-g_400x400.jpg,(https://en.wikipedia.org/wiki/Marina_Silva)" "Greta","Thunberg","*03.01.2003","*t","female","Sweden","Environmental Activist","alive","CC BY 2.0","https://upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Glastonbury2022_%28218_of_413%29_%2852182434551%29_%28cropped%29_%28cropped%29.jpg/505px-Glastonbury2022_%28218_of_413%29_%2852182434551%29_%28cropped%29_%28cropped%29.jpg,(https://en.wikipedia.org/wiki/Greta_Thunberg)" "Alan","Turing","*23.06.1912","*t","male","UK","Computer Scientist","*deceased","Public Domain","https://upload.wikimedia.org/wikipedia/commons/thumb/1/17/Alan_Turing_%281912-1954%29_in_1936_at_Princeton_University.jpg/236px-Alan_Turing_%281912-1954%29_in_1936_at_Princeton_University.jpg,(https://en.wikipedia.org/wiki/Alan_Turing)" diff --git a/pkg/samples/form/multifill/csv/person_14.pdf b/pkg/samples/form/multifill/csv/person_14.pdf index 8c8578bb..420286dd 100644 Binary files a/pkg/samples/form/multifill/csv/person_14.pdf and b/pkg/samples/form/multifill/csv/person_14.pdf differ diff --git a/pkg/samples/form/multifill/csv/person_32.pdf b/pkg/samples/form/multifill/csv/person_32.pdf index 98977b22..df6673cc 100644 Binary files a/pkg/samples/form/multifill/csv/person_32.pdf and b/pkg/samples/form/multifill/csv/person_32.pdf differ diff --git a/pkg/samples/images/ImageUpdatedByObjNr.pdf b/pkg/samples/images/ImageUpdatedByObjNr.pdf new file mode 100644 index 00000000..9abb0e30 Binary files /dev/null and b/pkg/samples/images/ImageUpdatedByObjNr.pdf differ diff --git a/pkg/samples/images/any.jpg b/pkg/samples/images/any.jpg new file mode 100644 index 00000000..05438fba Binary files /dev/null and b/pkg/samples/images/any.jpg differ diff --git a/pkg/samples/images/any.png b/pkg/samples/images/any.png new file mode 100644 index 00000000..195dcec6 Binary files /dev/null and b/pkg/samples/images/any.png differ diff --git a/pkg/samples/images/any.tiff b/pkg/samples/images/any.tiff new file mode 100644 index 00000000..99734823 Binary files /dev/null and b/pkg/samples/images/any.tiff differ diff --git a/pkg/samples/images/any.webp b/pkg/samples/images/any.webp new file mode 100644 index 00000000..b2586680 Binary files /dev/null and b/pkg/samples/images/any.webp differ diff --git a/pkg/samples/images/imageUpdatedByFileName.pdf b/pkg/samples/images/imageUpdatedByFileName.pdf new file mode 100644 index 00000000..501a624d Binary files /dev/null and b/pkg/samples/images/imageUpdatedByFileName.pdf differ diff --git a/pkg/samples/images/imageUpdatedByObjNrJPG.pdf b/pkg/samples/images/imageUpdatedByObjNrJPG.pdf new file mode 100644 index 00000000..ae9f44d9 Binary files /dev/null and b/pkg/samples/images/imageUpdatedByObjNrJPG.pdf differ diff --git a/pkg/samples/images/imageUpdatedByObjNrPNG.pdf b/pkg/samples/images/imageUpdatedByObjNrPNG.pdf new file mode 100644 index 00000000..62f65096 Binary files /dev/null and b/pkg/samples/images/imageUpdatedByObjNrPNG.pdf differ diff --git a/pkg/samples/images/imageUpdatedByObjNrTIFF.pdf b/pkg/samples/images/imageUpdatedByObjNrTIFF.pdf new file mode 100644 index 00000000..d512ab8f Binary files /dev/null and b/pkg/samples/images/imageUpdatedByObjNrTIFF.pdf differ diff --git a/pkg/samples/images/imageUpdatedByObjNrWEBP.pdf b/pkg/samples/images/imageUpdatedByObjNrWEBP.pdf new file mode 100644 index 00000000..eab42a22 Binary files /dev/null and b/pkg/samples/images/imageUpdatedByObjNrWEBP.pdf differ diff --git a/pkg/samples/images/imageUpdatedByPageNrAndIdAny.pdf b/pkg/samples/images/imageUpdatedByPageNrAndIdAny.pdf new file mode 100644 index 00000000..09bb13f7 Binary files /dev/null and b/pkg/samples/images/imageUpdatedByPageNrAndIdAny.pdf differ diff --git a/pkg/samples/images/imageUpdatedByPageNrAndIdPage1.pdf b/pkg/samples/images/imageUpdatedByPageNrAndIdPage1.pdf new file mode 100644 index 00000000..332a37f1 Binary files /dev/null and b/pkg/samples/images/imageUpdatedByPageNrAndIdPage1.pdf differ diff --git a/pkg/samples/images/imageUpdatedByPageNrAndIdPage2.pdf b/pkg/samples/images/imageUpdatedByPageNrAndIdPage2.pdf new file mode 100644 index 00000000..9a6b3b49 Binary files /dev/null and b/pkg/samples/images/imageUpdatedByPageNrAndIdPage2.pdf differ diff --git a/pkg/samples/images/test.pdf b/pkg/samples/images/test.pdf new file mode 100644 index 00000000..0dc8c952 Binary files /dev/null and b/pkg/samples/images/test.pdf differ diff --git a/pkg/samples/images/test_1_Im1.png b/pkg/samples/images/test_1_Im1.png new file mode 100644 index 00000000..195dcec6 Binary files /dev/null and b/pkg/samples/images/test_1_Im1.png differ diff --git a/pkg/samples/import/MultiPageTIFF.pdf b/pkg/samples/import/MultiPageTIFF.pdf new file mode 100644 index 00000000..ac1b00c0 Binary files /dev/null and b/pkg/samples/import/MultiPageTIFF.pdf differ diff --git a/pkg/samples/signatures/ETSI.CAdES.detached/sample1.pdf b/pkg/samples/signatures/ETSI.CAdES.detached/sample1.pdf new file mode 100644 index 00000000..e4330710 Binary files /dev/null and b/pkg/samples/signatures/ETSI.CAdES.detached/sample1.pdf differ diff --git a/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BB.pdf b/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BB.pdf new file mode 100644 index 00000000..9b51d8c6 Binary files /dev/null and b/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BB.pdf differ diff --git a/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BLT.pdf b/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BLT.pdf new file mode 100644 index 00000000..338aecba Binary files /dev/null and b/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BLT.pdf differ diff --git a/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BLTA.pdf b/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BLTA.pdf new file mode 100644 index 00000000..666065cf Binary files /dev/null and b/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BLTA.pdf differ diff --git a/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BT.pdf b/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BT.pdf new file mode 100644 index 00000000..0f89f731 Binary files /dev/null and b/pkg/samples/signatures/ETSI.CAdES.detached/testPAdES_BT.pdf differ diff --git a/pkg/samples/signatures/ETSI.CAdES.detached/your_signed_files_here_for_testing b/pkg/samples/signatures/ETSI.CAdES.detached/your_signed_files_here_for_testing new file mode 100644 index 00000000..e69de29b diff --git a/pkg/samples/signatures/adbe.pkcs7.detached/sample1.pdf b/pkg/samples/signatures/adbe.pkcs7.detached/sample1.pdf new file mode 100644 index 00000000..8858f933 Binary files /dev/null and b/pkg/samples/signatures/adbe.pkcs7.detached/sample1.pdf differ diff --git a/pkg/samples/signatures/adbe.pkcs7.detached/sample2.pdf b/pkg/samples/signatures/adbe.pkcs7.detached/sample2.pdf new file mode 100644 index 00000000..9f7af931 Binary files /dev/null and b/pkg/samples/signatures/adbe.pkcs7.detached/sample2.pdf differ diff --git a/pkg/samples/signatures/adbe.pkcs7.detached/usageRights.pdf b/pkg/samples/signatures/adbe.pkcs7.detached/usageRights.pdf new file mode 100644 index 00000000..d3ea44dc Binary files /dev/null and b/pkg/samples/signatures/adbe.pkcs7.detached/usageRights.pdf differ diff --git a/pkg/samples/signatures/adbe.pkcs7.detached/your_signed_files_here_for_testing b/pkg/samples/signatures/adbe.pkcs7.detached/your_signed_files_here_for_testing new file mode 100644 index 00000000..e69de29b diff --git a/pkg/samples/signatures/adbe.pkcs7.sha1/your_signed_files_here_for_testing b/pkg/samples/signatures/adbe.pkcs7.sha1/your_signed_files_here_for_testing new file mode 100644 index 00000000..e69de29b diff --git a/pkg/samples/signatures/adbe.x509.rsa_sha1/sample01.pdf b/pkg/samples/signatures/adbe.x509.rsa_sha1/sample01.pdf new file mode 100644 index 00000000..6efb7a38 Binary files /dev/null and b/pkg/samples/signatures/adbe.x509.rsa_sha1/sample01.pdf differ diff --git a/pkg/samples/signatures/adbe.x509.rsa_sha1/your_signed_files_here_for_testing b/pkg/samples/signatures/adbe.x509.rsa_sha1/your_signed_files_here_for_testing new file mode 100644 index 00000000..e69de29b diff --git a/pkg/testdata/bookletTestA6.pdf b/pkg/testdata/bookletTestA6.pdf new file mode 100644 index 00000000..9cdf341d Binary files /dev/null and b/pkg/testdata/bookletTestA6.pdf differ diff --git a/pkg/testdata/bookletTestA6L.pdf b/pkg/testdata/bookletTestA6L.pdf new file mode 100644 index 00000000..55eaa22b Binary files /dev/null and b/pkg/testdata/bookletTestA6L.pdf differ diff --git a/pkg/testdata/pdf20/SimplePDF2.0.pdf b/pkg/testdata/pdf20/SimplePDF2.0.pdf new file mode 100644 index 00000000..65e6bcf2 Binary files /dev/null and b/pkg/testdata/pdf20/SimplePDF2.0.pdf differ diff --git a/pkg/testdata/pdf20/i277.pdf b/pkg/testdata/pdf20/i277.pdf new file mode 100644 index 00000000..257c47e3 Binary files /dev/null and b/pkg/testdata/pdf20/i277.pdf differ diff --git a/pkg/testdata/pdf20/imageWithBPC.pdf b/pkg/testdata/pdf20/imageWithBPC.pdf new file mode 100644 index 00000000..2d05531b Binary files /dev/null and b/pkg/testdata/pdf20/imageWithBPC.pdf differ diff --git a/pkg/testdata/pdf20/pageLevelOutputIntent.pdf b/pkg/testdata/pdf20/pageLevelOutputIntent.pdf new file mode 100644 index 00000000..46e3da9e Binary files /dev/null and b/pkg/testdata/pdf20/pageLevelOutputIntent.pdf differ diff --git a/pkg/testdata/pdf20/utf8stringAndAnnotation.pdf b/pkg/testdata/pdf20/utf8stringAndAnnotation.pdf new file mode 100644 index 00000000..df1369bd Binary files /dev/null and b/pkg/testdata/pdf20/utf8stringAndAnnotation.pdf differ diff --git a/pkg/testdata/pdf20/utf8test.pdf b/pkg/testdata/pdf20/utf8test.pdf new file mode 100644 index 00000000..181db8bd Binary files /dev/null and b/pkg/testdata/pdf20/utf8test.pdf differ diff --git a/pkg/testdata/pdf20/viaIncrementalSave.pdf b/pkg/testdata/pdf20/viaIncrementalSave.pdf new file mode 100644 index 00000000..e6244918 Binary files /dev/null and b/pkg/testdata/pdf20/viaIncrementalSave.pdf differ diff --git a/pkg/testdata/pdf20/withOffsetStart.pdf b/pkg/testdata/pdf20/withOffsetStart.pdf new file mode 100644 index 00000000..72f5460d Binary files /dev/null and b/pkg/testdata/pdf20/withOffsetStart.pdf differ diff --git a/pkg/testdata/resources/multipage.tif b/pkg/testdata/resources/multipage.tif new file mode 100644 index 00000000..1f89b9a3 Binary files /dev/null and b/pkg/testdata/resources/multipage.tif differ diff --git a/pkg/testdata/testWithText.pdf b/pkg/testdata/testWithText.pdf new file mode 100644 index 00000000..69187f61 Binary files /dev/null and b/pkg/testdata/testWithText.pdf differ diff --git a/resources/cjkv.png b/resources/cjkv.png index d52485f3..26bc8ff2 100644 Binary files a/resources/cjkv.png and b/resources/cjkv.png differ