Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions swagger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ definitions:
- vie
- chi-sim
- chi-tra



output_format:
type: string
description: The format type of output. If omitted, will use simple text
enum:
- tsv
- pdf
- hocr
22 changes: 18 additions & 4 deletions tesseract_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ type TesseractEngine struct {
}

type TesseractEngineArgs struct {
configVars map[string]string `json:"config_vars"`
pageSegMode string `json:"psm"`
lang string `json:"lang"`
configVars map[string]string `json:"config_vars"`
pageSegMode string `json:"psm"`
lang string `json:"lang"`
outputFormat string `json:"output_format"`
}

func NewTesseractEngineArgs(ocrRequest OcrRequest) (*TesseractEngineArgs, error) {
Expand Down Expand Up @@ -70,6 +71,16 @@ func NewTesseractEngineArgs(ocrRequest OcrRequest) (*TesseractEngineArgs, error)
engineArgs.lang = langStr
}

// output format
outputFormat := ocrRequest.EngineArgs["output_format"]
if outputFormat != nil {
outputFormatStr, ok := outputFormat.(string)
if !ok {
return nil, fmt.Errorf("Could not convert output_format into string: %v", outputFormat)
}
engineArgs.outputFormat = outputFormatStr
}

return engineArgs, nil

}
Expand All @@ -91,6 +102,9 @@ func (t TesseractEngineArgs) Export() []string {
result = append(result, "-l")
result = append(result, t.lang)
}
if t.outputFormat != "" {
result = append(result, t.outputFormat)
}

return result
}
Expand Down Expand Up @@ -198,7 +212,7 @@ func (t TesseractEngine) processImageFile(inputFilename string, engineArgs Tesse
tmpOutFileBaseName := inputFilename

// possible file extensions
fileExtensions := []string{"txt", "hocr"}
fileExtensions := []string{"txt", "hocr", "tsv"}

// build args array
cflags := engineArgs.Export()
Expand Down