From ca3d6f3f9401c1ab3e1017827bfac315b09db3cf Mon Sep 17 00:00:00 2001 From: Steph Enders Date: Fri, 12 Dec 2025 12:34:03 -0500 Subject: Break ssync into multiple processes Breaking ssync into 3 sub-processes: 1) ssync-index - indexes remote and local dirs 2) ssync-queue - generates queue of yet-fetched files 3) ssync-fetch - downloads the queue Which will ultimately be executed using ssync which will allow for unified config files and transfer locking. The rewrite is being done in hopes of preventing "missing files" during large queues and ensure completeness. The breakdown into multiple files should also help with narrowing the logic and improving the process without interfering with the execution and readability of the other stages. This commit has complete subprocesses - though ssync-index needs remediation to remove the config file - as we need predictable I/O to be able to pass the index files into the queue process --- ssync-fetch | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100755 ssync-fetch (limited to 'ssync-fetch') diff --git a/ssync-fetch b/ssync-fetch new file mode 100755 index 0000000..2a79e64 --- /dev/null +++ b/ssync-fetch @@ -0,0 +1,114 @@ +#!/usr/bin/env sh + +USAGE="ssync-fetch [options] QUEUE_FILE DEST_DIR + OPTIONS + -r REMOTE_HOST + remote host to download from such as user@hostname + username can be omitted if identical to $USER + or if set in ssh_config + -k KEY_FILE + ssh-key file to use (needs to be non-interactive) + optional: will use default session key + or key set in ssh_config for REMOTE_HOST + -v verbose logging + -h print this message" + +# HELPER FUNCTIONS + +function verbose_log { + if [ ! -z "$VERBOSE_FLAG" ]; then + echo "$@" + fi +} + +# OPTIONS + +KEY_FILE_FLAG= +KEY_FILE_ARG= +REMOTE_HOST_FLAG= +REMOTE_HOST_ARG= +VERBOSE_FLAG= + +while getopts "hvr:k:" opt; do + case "${opt}" in + h) echo "$USAGE" + exit 1 + ;; + v) VERBOSE_FLAG=1 + ;; + k) KEY_FILE_FLAG=1 + KEY_FILE_ARG="${OPTARG}" + ;; + c) CONCURRENCY_FLAG=1 + CONCURRENCY_ARG="${OPTARG}" + ;; + r) REMOTE_HOST_FLAG=1 + REMOTE_HOST_ARG="${OPTARG}" + ;; + esac +done + +shift $(($OPTIND -1)) + +if [ $# -ne 2 ]; then + echo "$USAGE" + exit 1 +fi + +QUEUE_FILE=$1 +DEST_DIR=$2 + +# VALIDATION + +if [ ! -f "$QUEUE_FILE" ]; then + echo "Queue file '$QUEUE_FILE' does not exist" + exit 1 +fi + +if [ ! -d "$DEST_DIR" ]; then + echo "Destination directory '$DEST_DIR' does not exist" + exit 1 +fi + +if [ -z "$REMOTE_HOST_FLAG" ]; then + echo "Remote host option -r required" + exit 1 +elif [ -z "$REMOTE_HOST_ARG" ]; then + echo "Invalid remote host '$REMOTE_HOST_ARG'" + exit 1 +fi + +# CONFIGURATIONS + +ssh_id_param="" +if [ ! -z "${KEY_FILE_FLAG}" ]; then + if [ ! -f "${KEY_FILE_ARG}" ]; then + echo "Identity file '${KEY_FILE_ARG}' does not exist" + exit 1 + fi + ssh_id_param="-i ${KEY_FILE_ARG}" +fi + +concurrent_param="" +if [ ! -z "${CONCURRENCY_FLAG}" ]; then + if [ "${CONCURRENCY_ARG}" -gt 0 ]; then + concurrent_param="-X nrequests=${CONCURRENCY_ARG}" + fi +fi +real_dest=$(realpath $DEST_DIR) +tmp_dir=$(mktemp -d /tmp/ssync_fetch_run.XXXXXX) +ts=$(date +%s) +verbose_log "Writing temp files to ${tmp_dir} with timestamp ${ts}" + +# GENERATE BATCH +batch_file=$tmp_dir/batch_${ts} +verbose_log "Converting the queue file to sftp batch file: ${batch_file}" +cat $QUEUE_FILE | xargs -I{} echo "@reget {} ${real_dest}/" >> $batch_file + +verbose_log "Beginning download" +verbose_log "sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG}" + +sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG} + +verbose_log "ssync-fetch finished" + -- cgit v1.2.3-54-g00ecf