-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdf2scan.sh
More file actions
161 lines (130 loc) · 3.95 KB
/
pdf2scan.sh
File metadata and controls
161 lines (130 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env bash
set -euo pipefail
show_help() {
cat <<EOF
Usage: $0 [options] input.pdf output.pdf
Options:
-h, --help Show this help message
-d, --dpi DPI Render DPI (default: 100)
-g, --gamma GAMMA Gamma correction (default: 1.5)
-c, --contrast CONTRAST Brightness-contrast (default: 0x80)
-t, --threshold THRESH Convert to 1-bit using threshold (optional, e.g., 50%)
-q, --quality QUALITY JPEG quality (1-100, default 60)
-a, --adaptive Convert to 1-bit using adaptive thresholding
-v, --verbose Show detailed logs
EOF
}
# ----------------------
# Defaults
# ----------------------
dpi=100
gamma=1
contrast="0x50"
threshold="" # default empty; only triggers 1-bit if explicitly set
quality=60
adaptive=false
verbose=false
# ----------------------
# Parse flags
# ----------------------
POSITIONAL=()
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help) show_help; exit 0 ;;
-d|--dpi) dpi="$2"; shift 2 ;;
-g|--gamma) gamma="$2"; shift 2 ;;
-c|--contrast) contrast="$2"; shift 2 ;;
-t|--threshold) threshold="$2"; shift 2 ;;
-q|--quality) quality="$2"; shift 2 ;;
-a|--adaptive) adaptive=true; shift ;;
-v|--verbose) verbose=true; shift ;;
--) shift; break ;;
-*) echo "Unknown option: $1"; show_help; exit 1 ;;
*) POSITIONAL+=("$1"); shift ;;
esac
done
set -- "${POSITIONAL[@]}"
if [ "${#POSITIONAL[@]}" -lt 2 ]; then
echo "Error: input.pdf and output.pdf required"
show_help
exit 1
fi
in="$1"
out="$2"
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' EXIT
pages=$(pdfinfo "$in" | awk '/^Pages:/ {print $2}')
cpu_cores=$(nproc)
total_mem_kb=$(grep MemAvailable /proc/meminfo | awk '{print $2}')
total_mem_mb=$((total_mem_kb / 1024))
mem_per_page=150
max_jobs=$(( total_mem_mb / mem_per_page ))
(( max_jobs > cpu_cores )) && max_jobs=$cpu_cores
(( max_jobs < 1 )) && max_jobs=1
log() {
if [ "$verbose" = true ]; then
echo "$@"
fi
}
# ----------------------
# Render pages to grayscale
# ----------------------
render_page() {
i="$1"
img="$tmpdir/$i.jpg"
log "[RENDER] Page $i: start (PID $$)"
gs -q -dBATCH -dNOPAUSE \
-sDEVICE=jpeggray \
-dJPEGQ="${quality}" \
-r"${dpi}" \
-dFirstPage="$i" -dLastPage="$i" \
-sOutputFile="$img" \
"$in"
log "[RENDER] Page $i: done"
}
export -f render_page log
export in tmpdir dpi quality verbose
seq 1 "$pages" | parallel -j "$max_jobs" render_page
# ----------------------
# Process pages: gamma/contrast, optional 1-bit
# ----------------------
process_page() {
i="$1"
img="$tmpdir/$i.jpg"
proc="$tmpdir/${i}_proc.png"
while true; do
avail_kb=$(grep MemAvailable /proc/meminfo | awk '{print $2}')
avail_mb=$((avail_kb / 1024))
(( avail_mb > mem_per_page * 2 )) && break
log "[PROCESS] Page $i: waiting for RAM... (${avail_mb}MB free)"
sleep 1
done
log "[PROCESS] Page $i: start (PID $$)"
args=(-colorspace Gray)
# Apply gamma/contrast
[ -n "$gamma" ] && args+=(-gamma "$gamma")
[ -n "$contrast" ] && args+=(-brightness-contrast "$contrast")
# Apply 1-bit only if threshold is set or adaptive flag is used
if [ "$adaptive" = true ]; then
args+=(-type bilevel)
elif [ -n "$threshold" ]; then
args+=(-threshold "$threshold")
fi
magick "$img" "${args[@]}" "$proc"
log "[PROCESS] Page $i: done"
rm -f "$img"
}
export -f process_page
export mem_per_page adaptive gamma contrast threshold verbose tmpdir
seq 1 "$pages" | parallel -j "$max_jobs" process_page
# ----------------------
# Merge into PDF
# ----------------------
log "[INFO] Merging pages into PDF..."
mapfile -t proc_files < <(find "$tmpdir" -name "*_proc.png" | sort -V)
if [ ${#proc_files[@]} -eq 0 ]; then
echo "Error: no processed images found"
exit 1
fi
img2pdf "${proc_files[@]}" -o "$out"
log "[INFO] Finished: $out"