aboutsummaryrefslogtreecommitdiff
path: root/ssync-fetch
diff options
context:
space:
mode:
authorSteph Enders <steph@senders.io>2025-12-12 12:34:03 -0500
committerSteph Enders <steph@senders.io>2025-12-12 12:34:03 -0500
commitca3d6f3f9401c1ab3e1017827bfac315b09db3cf (patch)
tree2f3fca005b01334160538190b0f808c394ba65e1 /ssync-fetch
parent69fbbd87f895580ebb5fb2b58362ba6243bd1043 (diff)
Break ssync into multiple processes
Breaking ssync into 3 sub-processes: 1) ssync-index - indexes remote and local dirs 2) ssync-queue - generates queue of yet-fetched files 3) ssync-fetch - downloads the queue Which will ultimately be executed using ssync which will allow for unified config files and transfer locking. The rewrite is being done in hopes of preventing "missing files" during large queues and ensure completeness. The breakdown into multiple files should also help with narrowing the logic and improving the process without interfering with the execution and readability of the other stages. This commit has complete subprocesses - though ssync-index needs remediation to remove the config file - as we need predictable I/O to be able to pass the index files into the queue process
Diffstat (limited to 'ssync-fetch')
-rwxr-xr-xssync-fetch114
1 files changed, 114 insertions, 0 deletions
diff --git a/ssync-fetch b/ssync-fetch
new file mode 100755
index 0000000..2a79e64
--- /dev/null
+++ b/ssync-fetch
@@ -0,0 +1,114 @@
+#!/usr/bin/env sh
+
+USAGE="ssync-fetch [options] QUEUE_FILE DEST_DIR
+ OPTIONS
+ -r REMOTE_HOST
+ remote host to download from such as user@hostname
+ username can be omitted if identical to $USER
+ or if set in ssh_config
+ -k KEY_FILE
+ ssh-key file to use (needs to be non-interactive)
+ optional: will use default session key
+ or key set in ssh_config for REMOTE_HOST
+ -v verbose logging
+ -h print this message"
+
+# HELPER FUNCTIONS
+
+function verbose_log {
+ if [ ! -z "$VERBOSE_FLAG" ]; then
+ echo "$@"
+ fi
+}
+
+# OPTIONS
+
+KEY_FILE_FLAG=
+KEY_FILE_ARG=
+REMOTE_HOST_FLAG=
+REMOTE_HOST_ARG=
+VERBOSE_FLAG=
+
+while getopts "hvr:k:" opt; do
+ case "${opt}" in
+ h) echo "$USAGE"
+ exit 1
+ ;;
+ v) VERBOSE_FLAG=1
+ ;;
+ k) KEY_FILE_FLAG=1
+ KEY_FILE_ARG="${OPTARG}"
+ ;;
+ c) CONCURRENCY_FLAG=1
+ CONCURRENCY_ARG="${OPTARG}"
+ ;;
+ r) REMOTE_HOST_FLAG=1
+ REMOTE_HOST_ARG="${OPTARG}"
+ ;;
+ esac
+done
+
+shift $(($OPTIND -1))
+
+if [ $# -ne 2 ]; then
+ echo "$USAGE"
+ exit 1
+fi
+
+QUEUE_FILE=$1
+DEST_DIR=$2
+
+# VALIDATION
+
+if [ ! -f "$QUEUE_FILE" ]; then
+ echo "Queue file '$QUEUE_FILE' does not exist"
+ exit 1
+fi
+
+if [ ! -d "$DEST_DIR" ]; then
+ echo "Destination directory '$DEST_DIR' does not exist"
+ exit 1
+fi
+
+if [ -z "$REMOTE_HOST_FLAG" ]; then
+ echo "Remote host option -r required"
+ exit 1
+elif [ -z "$REMOTE_HOST_ARG" ]; then
+ echo "Invalid remote host '$REMOTE_HOST_ARG'"
+ exit 1
+fi
+
+# CONFIGURATIONS
+
+ssh_id_param=""
+if [ ! -z "${KEY_FILE_FLAG}" ]; then
+ if [ ! -f "${KEY_FILE_ARG}" ]; then
+ echo "Identity file '${KEY_FILE_ARG}' does not exist"
+ exit 1
+ fi
+ ssh_id_param="-i ${KEY_FILE_ARG}"
+fi
+
+concurrent_param=""
+if [ ! -z "${CONCURRENCY_FLAG}" ]; then
+ if [ "${CONCURRENCY_ARG}" -gt 0 ]; then
+ concurrent_param="-X nrequests=${CONCURRENCY_ARG}"
+ fi
+fi
+real_dest=$(realpath $DEST_DIR)
+tmp_dir=$(mktemp -d /tmp/ssync_fetch_run.XXXXXX)
+ts=$(date +%s)
+verbose_log "Writing temp files to ${tmp_dir} with timestamp ${ts}"
+
+# GENERATE BATCH
+batch_file=$tmp_dir/batch_${ts}
+verbose_log "Converting the queue file to sftp batch file: ${batch_file}"
+cat $QUEUE_FILE | xargs -I{} echo "@reget {} ${real_dest}/" >> $batch_file
+
+verbose_log "Beginning download"
+verbose_log "sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG}"
+
+sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG}
+
+verbose_log "ssync-fetch finished"
+