forked from nathanhaigh/parallel-rsync
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprsync
More file actions
executable file
·126 lines (110 loc) · 3.64 KB
/
prsync
File metadata and controls
executable file
·126 lines (110 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/bin/bash
set -e
# Inspired by https://gist.github.com/akorn/644855ddaa8065f564be
# Usage:
# prsync [--parallel=N] [rsync args...]
#
# Options:
# --parallel=N Use N parallel processes for transfer. Default is to use all available processors (`nproc`) or fail back to 10.
#
# Notes:
# * Requires GNU Parallel
# * Use with ssh-keys. Lots of password prompts will get very annoying.
# * Does an itemize-changes first, then chunks the resulting file list and launches N parallel
# rsyncs to transfer a chunk each.
# * be a little careful with the options you pass through to rsync. Normal ones will work, you
# might want to test weird options upfront.
#
# Define colours for STDERR text
RED='\033[0;31m'
ORANGE='\033[0;33m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color
if ! command -v parallel &> /dev/null
then
echo -e "${RED}parallel could not be found${NC}"
exit
fi
if ! command -v awk &> /dev/null
then
echo -e "${RED}awk could not be found${NC}"
exit
fi
if [[ "$1" == --parallel=* ]]; then
PARALLEL_RSYNC="${1##*=}"
shift
else
PARALLEL_RSYNC=$(nproc 2> /dev/null || echo 10)
fi
echo -e "${GREEN}INFO: Using up to ${PARALLEL_RSYNC} processes for transfer ...${NC}"
TMPDIR=$(mktemp -d)
trap 'rm -rf "${TMPDIR}"' EXIT
echo -e "${GREEN}INFO: Determining file list for transfer ...${NC}"
# sorted by size (descending)
rsync "$@" --out-format="%l %n" --no-v --dry-run 2> /dev/null \
| grep -v "sending incremental file list" \
| sort --numeric-sort --reverse \
> "${TMPDIR}/files.all"
# check for nothing-to-do
TOTAL_FILES=$(wc -l < "${TMPDIR}/files.all")
TOTAL_SIZE=$(awk '{ts+=$1}END{printf "%.0f", ts}' < "${TMPDIR}/files.all")
echo -e "${GREEN}INFO: ${TOTAL_FILES} ($(( TOTAL_SIZE/1024**2 )) MB) files to transfer.${NC}"
if [ "${TOTAL_FILES}" -eq "0" ]; then
echo -e "${ORANGE}WARN: Nothing to transfer :)${NC}"
exit 0
fi
echo -e "${GREEN}INFO: Distributing and reversing chunks (via awk)...${NC}"
SECONDS=0
# We use awk to do the bin-packing AND the reversing in one pass.
# Even chunks write immediately (Large->Small).
# Odd chunks buffer in RAM and write at the end (Small->Large).
awk -v n="${PARALLEL_RSYNC}" -v tmp="${TMPDIR}" '
BEGIN {
# Initialize load trackers
for(i=0; i<n; i++) loads[i]=0
}
{
# $1 is size, rest of line is path
size = $1
path = substr($0, index($0," ")+1)
# Find emptiest chunk
min_idx = 0
min_load = loads[0]
for(i=1; i<n; i++) {
if(loads[i] < min_load) {
min_load = loads[i]
min_idx = i
}
}
# Update load
loads[min_idx] += size
# Logic:
# If index is EVEN: Write directly to file (keeps Large -> Small order)
# If index is ODD: Store in array to write later (allows Small -> Large order)
if (min_idx % 2 == 0) {
print path > (tmp "/chunk." min_idx)
} else {
# Store in memory: buffer[chunk_id, line_number]
idx = ++counts[min_idx]
buffer[min_idx, idx] = path
}
}
END {
# Flush the buffered (ODD) chunks in reverse order
for (c = 1; c < n; c += 2) {
outfile = (tmp "/chunk." c)
count = counts[c]
# Loop backwards from last line to first
for (j = count; j >= 1; j--) {
print buffer[c, j] > outfile
}
# Good practice to close file descriptors in loops
close(outfile)
}
}
' "${TMPDIR}/files.all"
echo -e "${GREEN}DONE (${SECONDS}s)${NC}"
#find "${TMPDIR}" -type f -name "chunk.*" -exec cat {} \;
echo -e "${GREEN}INFO: Starting transfers ...${NC}"
find "${TMPDIR}" -type f -name "chunk.*" | parallel -j "${PARALLEL_RSYNC}" -t --verbose --progress rsync --files-from={} "$@"
echo -e "${GREEN}DONE (${SECONDS}s)${NC}"