Skip to content
This repository was archived by the owner on Jul 7, 2020. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 77 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,78 @@
go get rsc.io/pdf
# PDF Reader

http://godoc.org/rsc.io/pdf
A simple Go library which enables reading PDF files. Forked from https://github.com/rsc/pdf

Features
- Get plain text content (without format)
- Get Content (including all font and formatting information)

## Install:

`go get -u github.com/ledongthuc/pdf`


## Read plain text

```golang
package main

import (
"bytes"
"fmt"

"github.com/ledongthuc/pdf"
)

func main() {
content, err := readPdf("test.pdf") // Read local pdf file
if err != nil {
panic(err)
}
fmt.Println(content)
return
}

func readPdf(path string) (string, error) {
r, err := pdf.Open(path)
if err != nil {
return "", err
}

var buf bytes.Buffer
buf.ReadFrom(r.GetPlainText())

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be:

func readPdf(path string) (string, error) {
	r, err := pdf.Open(path)
	if err != nil {
		return "", err
	}

	i, err := r.GetPlainText()
	if err != nil {
		return "", err
	}

	var buf bytes.Buffer
	buf.ReadFrom(i)
	return buf.String(), nil
}

return buf.String(), nil
}
```

## Read all text with styles from PDF

```golang
func readPdf2(path string) (string, error) {
r, err := pdf.Open(path)
if err != nil {
return "", err
}
totalPage := r.NumPage()

for pageIndex := 1; pageIndex <= totalPage; pageIndex++ {
p := r.Page(pageIndex)
if p.V.IsNull() {
continue
}
var lastTextStyle pdf.Text
texts := p.Content().Text
for _, text := range texts {
if isSameSentence(text, lastTextStyle) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This example lost a function,maybe like this:

func isSameSentence(t1, t2 pdf.Text) bool {
	if t1.Font == t2.Font && t1.FontSize == t2.FontSize {
		return true
	}
	return false
}

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

func isSameSentence(t1, t2 pdf.Text) bool {
       // if Y axis changes new line else same line
	if t1.Y != t2.Y {
		return false
	}
	return true
}

lastTextStyle.S = lastTextStyle.S + text.S
} else {
fmt.Printf("Font: %s, Font-size: %f, x: %f, y: %f, content: %s \n", lastTextStyle.Font, lastTextStyle.FontSize, lastTextStyle.X, lastTextStyle.Y, lastTextStyle.S)
lastTextStyle = text
}
}
}
return "", nil
}
```

## Demo
![Run example](https://i.gyazo.com/01fbc539e9872593e0ff6bac7e954e6d.gif)
Loading