blob: b9fc8064fc8056620e6e0770c66cb97b8660d875 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
|
#!/usr/bin/env sh
USAGE="ssync-fetch [options] QUEUE_FILE DEST_DIR
OPTIONS
-b BACKEND
sftp (default) | rsync
-k KEY_FILE
ssh-key file to use (needs to be non-interactive)
optional: will use default session key
or key set in ssh_config for REMOTE_HOST
-n dry-run
-p PARALLEL
integer indicating how many concurrent downloads
-r REMOTE_HOST
remote host to download from such as user@hostname
username can be omitted if identical to $USER
or if set in ssh_config
-v verbose logging
-h print this message"
# HELPER FUNCTIONS
verbose_log() {
if [ ! -z "$VERBOSE_FLAG" ]; then
echo "$@"
fi
}
lines() {
echo $(wc -l $1 | cut -d' ' -f1)
}
# OPTIONS
KEY_FILE_FLAG=
KEY_FILE_ARG=
REMOTE_HOST_FLAG=
REMOTE_HOST_ARG=
BACKEND_FLAG=
BACKEND_ARG=
PARALLEL_FLAG=
PARALLEL_ARG=
VERBOSE_FLAG=
DRY_RUN_FLAG=
while getopts "hvnr:k:b:p:" opt; do
case "${opt}" in
h) echo "$USAGE"
exit 1
;;
v) VERBOSE_FLAG=1
;;
k) KEY_FILE_FLAG=1
KEY_FILE_ARG="${OPTARG}"
;;
r) REMOTE_HOST_FLAG=1
REMOTE_HOST_ARG="${OPTARG}"
;;
b) BACKEND_FLAG=1
BACKEND_ARG="${OPTARG}"
;;
p) PARALLEL_FLAG=1
PARALLEL_ARG="${OPTARG}"
;;
n) DRY_RUN_FLAG=1
;;
esac
done
shift $(($OPTIND -1))
if [ $# -ne 2 ]; then
echo "$USAGE"
exit 1
fi
QUEUE_FILE=$1
DEST_DIR=$2
# VALIDATION
if [ ! -f "$QUEUE_FILE" ]; then
echo "Queue file '$QUEUE_FILE' does not exist"
exit 1
elif [ $(lines $QUEUE_FILE) -le 0 ]; then
echo "Queue file empty. Exiting"
exit 0
fi
if [ ! -d "$DEST_DIR" ]; then
echo "Destination directory '$DEST_DIR' does not exist"
exit 1
fi
if [ -z "$REMOTE_HOST_FLAG" ]; then
echo "Remote host option -r required"
exit 1
elif [ -z "$REMOTE_HOST_ARG" ]; then
echo "Invalid remote host '$REMOTE_HOST_ARG'"
exit 1
fi
if [ -z "$BACKEND_FLAG" ]; then
BACKEND="sftp"
else
BACKEND=$BACKEND_ARG
fi
if [ "$BACKEND" != "sftp" ]; then
if [ "$BACKEND" != "rsync" ]; then
echo "Invalid backend $BACKEND"
exit 1
fi
fi
if [ ! -z "$PARALLEL_FLAG" ]; then
if [ "$PARALLEL_ARG" -le 1 ]; then
echo "Invalid parallel level: must be > 1"
exit 1
fi
fi
# CONFIGURATIONS
ssh_id_param=""
if [ ! -z "${KEY_FILE_FLAG}" ]; then
if [ ! -f "${KEY_FILE_ARG}" ]; then
echo "Identity file '${KEY_FILE_ARG}' does not exist"
exit 1
fi
ssh_id_param="-i ${KEY_FILE_ARG}"
fi
dry_run_param=""
real_dest=$(realpath $DEST_DIR)
tmp_dir=$(mktemp -d /tmp/ssync_fetch_run.XXXXXX)
ts=$(date +%s)
verbose_log "Writing temp files to ${tmp_dir} with timestamp ${ts}"
# GENERATE BATCH
if [ "$BACKEND" = "sftp" ]; then
batch_file=$tmp_dir/batch_${ts}
verbose_log "Converting the queue file to sftp batch file: ${batch_file}"
sort $QUEUE_FILE | xargs -I{} echo -e "@reget {} ${real_dest}/" > $batch_file
if [ ! -z "$PARALLEL_FLAG" ]; then
split --additional-suffix=.batch -en l/${PARALLEL_ARG} $batch_file $tmp_dir/split_${ts}_
verbose_log "Beginning ${PARALLEL_ARG} parallel downloads for files: $(ls $tmp_dir/split_${ts}_*.batch)"
verbose_log "Running 'sftp -N ${ssh_id_param} -b '{}' ${REMOTE_HOST_ARG}' across ${PARALLEL_ARG} jobs"
if [ ! -z "$DRY_RUN_FLAG" ]; then
verbose_log "Dry-run fetch"
dry_run_param="echo"
fi
ls -1 $tmp_dir/split_${ts}_*.batch | xargs -P${PARALLEL_ARG} -I '{}' ${dry_run_param} sftp -N ${ssh_id_param} -b '{}' ${REMOTE_HOST_ARG}
else
verbose_log "Beginning download"
verbose_log "sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG}"
sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG}
fi
elif [ "$BACKEND" = "rsync" ]; then
if [ ! -z "$DRY_RUN_FLAG" ]; then
verbose_log "Dry-run fetch"
dry_run_param="n"
fi
if [ ! -z "$PARALLEL_FLAG" ]; then
split --additional-suffix=.batch -en l/${PARALLEL_ARG} $QUEUE_FILE $tmp_dir/split_${ts}_
verbose_log "Beginning ${PARALLEL_ARG} parallel downloads for files: $(ls $tmp_dir/split_${ts}_*.batch)"
verbose_log "ls -1 $tmp_dir/split_${ts}_*.batch | xargs -P${PARALLEL_ARG} -I '{}' rsync -e \"ssh ${ssh_id_param}\" -av --no-relative --files-from='{}' ${REMOTE_HOST_ARG}:/ ${real_dest}"
ls -1 $tmp_dir/split_${ts}_*.batch | \
xargs -P${PARALLEL_ARG} -I '{}' \
rsync -e "ssh ${ssh_id_param}" \
-av${dry_run_param} \
--no-relative \
--files-from='{}' ${REMOTE_HOST_ARG}:/ ${real_dest}
else
verbose_log "Fetching files from queue file: $QUEUE_FILE"
verbose_log "Beginning download"
verbose_log "rsync -e \"ssh ${ssh_id_param}\" -av ${dry_run_param} --no-relative --files-from=${QUEUE_FILE} rsync://${REMOTE_HOST_ARG} ${real_dest}"
rsync -e "ssh ${ssh_id_param}" \
-av${dry_run_param} --no-relative \
--files-from=${QUEUE_FILE} ${REMOTE_HOST_ARG}:/ ${real_dest}
fi
fi
verbose_log "ssync-fetch finished"
|