From 230bb634388c41426e2cd32ba411188b38ddcb5b Mon Sep 17 00:00:00 2001 From: Steph Enders Date: Sun, 14 Dec 2025 19:14:49 -0500 Subject: Add dry-run flag for testing Should help debug potential things --- VERSION | 2 +- docs/ssync-fetch.1 | 3 ++ docs/ssync.1 | 5 +++ ssync | 35 +++++++++++--------- ssync-fetch | 96 +++++++++++++++++++++++++++++++++++++++++------------- 5 files changed, 103 insertions(+), 38 deletions(-) diff --git a/VERSION b/VERSION index 21fa149..738ab53 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -"December 2025" "2.1.3" \ No newline at end of file +"December 2025" "2.1.4" \ No newline at end of file diff --git a/docs/ssync-fetch.1 b/docs/ssync-fetch.1 index 3b0a2e4..71fec10 100644 --- a/docs/ssync-fetch.1 +++ b/docs/ssync-fetch.1 @@ -15,6 +15,9 @@ Backend used for fetching remote files. Can be or .MR rsync 1 .TP +.BR \-n " dry-run" +Dry-run the fetch operation. rsync supports dry-running. sftp will simply log the operation. Verbose logging recommened. +.TP .BI \-k " identity_file" The .BR optional diff --git a/docs/ssync.1 b/docs/ssync.1 index 15c0b45..b750ab5 100644 --- a/docs/ssync.1 +++ b/docs/ssync.1 @@ -11,6 +11,11 @@ ssync \- simple suite of syncing scripts .BI \-b " backend" Fetching backend: sftp (default) / rsync .TP +.BR \-n " no-fetch / dry-run" +dry-run fetch operation. Effectively passes this flag to ssync-fetch. See +.MR ssync-fetch 1 +for more details on the dry-run fetch operation. +.TP .BR \-v " verbose logging" .TP .BR \-h " print this message" diff --git a/ssync b/ssync index 441c6a3..c135091 100755 --- a/ssync +++ b/ssync @@ -3,7 +3,8 @@ set -e USAGE="ssync [options] CONFIG_FILE OPTIONS - -b BACKEND + -b BACKEND + -n no-fetch / dry-run -v verbose logging -h print this message" @@ -18,20 +19,20 @@ lines() { } # OPTIONS -NO_LOCK_FLAG= +NO_FETCH_FLAG= VERBOSE_FLAG= while getopts "nvhb:" opt; do case "${opt}" in h) echo "$USAGE" exit 1 ;; - n) NO_LOCK_FLAG=1 + n) NO_FETCH_FLAG=1 ;; v) VERBOSE_FLAG=1 ;; - b) BACKEND_FLAG=1 - BACKEND="$OPTARG" - ;; + b) BACKEND_FLAG=1 + BACKEND="$OPTARG" + ;; esac done @@ -54,7 +55,7 @@ if [ ! -f "$CONFIG_FILE" ]; then fi # load config file -. $CONFIG_FILE +. $(realpath $CONFIG_FILE) if [ -d "$ssync_dir" ]; then if [ ! -f "$ssync_dir/ssync-index" ]; then @@ -146,7 +147,10 @@ elif [ ! -z "$backend" ]; then backend_opt="-b $backend" fi - +no_fetch_opt="" +if [ ! -z "$NO_FETCH_FLAG" ]; then + no_fetch_opt="-n" +fi RUN=$(date -Is | sed 's/[:]/-/g') output_dir=$output_files_dir/$RUN @@ -175,20 +179,21 @@ fi verbose_log "Indexing local files at $local_root_dir" $ssync_dir/ssync-index $verbose_opt -b \ - -o $local_index_file \ - $local_root_dir + -o $local_index_file \ + $local_root_dir # Queueing verbose_log "Creating queue from indexes" $ssync_dir/ssync-queue $verbose_opt \ - -l $local_index_file \ - -r $remote_index_file \ - -o $queue_file + -l $local_index_file \ + -r $remote_index_file \ + -o $queue_file # Fetching verbose_log "Fetching files from queue" -$ssync_dir/ssync-fetch $verbose_opt $key_file_opt $backend_opt \ - -r $remote_host \ +$ssync_dir/ssync-fetch $verbose_opt $key_file_opt $backend_opt $no_fetch_opt \ + -p 2 \ + -r $remote_host \ $queue_file \ $fetch_output_dir diff --git a/ssync-fetch b/ssync-fetch index 4b91a2e..b9fc806 100755 --- a/ssync-fetch +++ b/ssync-fetch @@ -2,16 +2,19 @@ USAGE="ssync-fetch [options] QUEUE_FILE DEST_DIR OPTIONS - -r REMOTE_HOST - remote host to download from such as user@hostname - username can be omitted if identical to $USER - or if set in ssh_config + -b BACKEND + sftp (default) | rsync -k KEY_FILE ssh-key file to use (needs to be non-interactive) optional: will use default session key or key set in ssh_config for REMOTE_HOST - -b BACKEND - sftp (default) | rsync + -n dry-run + -p PARALLEL + integer indicating how many concurrent downloads + -r REMOTE_HOST + remote host to download from such as user@hostname + username can be omitted if identical to $USER + or if set in ssh_config -v verbose logging -h print this message" @@ -34,9 +37,12 @@ REMOTE_HOST_FLAG= REMOTE_HOST_ARG= BACKEND_FLAG= BACKEND_ARG= +PARALLEL_FLAG= +PARALLEL_ARG= VERBOSE_FLAG= +DRY_RUN_FLAG= -while getopts "hvr:k:b:" opt; do +while getopts "hvnr:k:b:p:" opt; do case "${opt}" in h) echo "$USAGE" exit 1 @@ -49,9 +55,14 @@ while getopts "hvr:k:b:" opt; do r) REMOTE_HOST_FLAG=1 REMOTE_HOST_ARG="${OPTARG}" ;; - b) BACKEND_FLAG=1 - BACKEND_ARG="${OPTARG}" - ;; + b) BACKEND_FLAG=1 + BACKEND_ARG="${OPTARG}" + ;; + p) PARALLEL_FLAG=1 + PARALLEL_ARG="${OPTARG}" + ;; + n) DRY_RUN_FLAG=1 + ;; esac done @@ -96,8 +107,15 @@ fi if [ "$BACKEND" != "sftp" ]; then if [ "$BACKEND" != "rsync" ]; then - echo "Invalid backend $BACKEND" - exit 1 + echo "Invalid backend $BACKEND" + exit 1 + fi +fi + +if [ ! -z "$PARALLEL_FLAG" ]; then + if [ "$PARALLEL_ARG" -le 1 ]; then + echo "Invalid parallel level: must be > 1" + exit 1 fi fi @@ -112,6 +130,7 @@ if [ ! -z "${KEY_FILE_FLAG}" ]; then ssh_id_param="-i ${KEY_FILE_ARG}" fi +dry_run_param="" real_dest=$(realpath $DEST_DIR) tmp_dir=$(mktemp -d /tmp/ssync_fetch_run.XXXXXX) ts=$(date +%s) @@ -120,21 +139,54 @@ verbose_log "Writing temp files to ${tmp_dir} with timestamp ${ts}" # GENERATE BATCH if [ "$BACKEND" = "sftp" ]; then + batch_file=$tmp_dir/batch_${ts} verbose_log "Converting the queue file to sftp batch file: ${batch_file}" - cat $QUEUE_FILE | xargs -I{} echo -e "@reget {} ${real_dest}/" > $batch_file + sort $QUEUE_FILE | xargs -I{} echo -e "@reget {} ${real_dest}/" > $batch_file + + if [ ! -z "$PARALLEL_FLAG" ]; then + split --additional-suffix=.batch -en l/${PARALLEL_ARG} $batch_file $tmp_dir/split_${ts}_ + verbose_log "Beginning ${PARALLEL_ARG} parallel downloads for files: $(ls $tmp_dir/split_${ts}_*.batch)" + verbose_log "Running 'sftp -N ${ssh_id_param} -b '{}' ${REMOTE_HOST_ARG}' across ${PARALLEL_ARG} jobs" + + if [ ! -z "$DRY_RUN_FLAG" ]; then + verbose_log "Dry-run fetch" + dry_run_param="echo" + fi + + ls -1 $tmp_dir/split_${ts}_*.batch | xargs -P${PARALLEL_ARG} -I '{}' ${dry_run_param} sftp -N ${ssh_id_param} -b '{}' ${REMOTE_HOST_ARG} + else + verbose_log "Beginning download" + verbose_log "sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG}" + sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG} + fi - verbose_log "Beginning download" - verbose_log "sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG}" - sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG} elif [ "$BACKEND" = "rsync" ]; then - verbose_log "Fetching files from queue file: $QUEUE_FILE" - verbose_log "Beginning download" - verbose_log "rsync --files-from=${QUEUE_FILE} rsync://${REMOTE_HOST_ARG} ${real_dest}" - rsync -e "ssh ${ssh_id_param}" \ - -av --no-relative \ - --files-from=${QUEUE_FILE} ${REMOTE_HOST_ARG}:/ ${real_dest} + + if [ ! -z "$DRY_RUN_FLAG" ]; then + verbose_log "Dry-run fetch" + dry_run_param="n" + fi + + if [ ! -z "$PARALLEL_FLAG" ]; then + split --additional-suffix=.batch -en l/${PARALLEL_ARG} $QUEUE_FILE $tmp_dir/split_${ts}_ + verbose_log "Beginning ${PARALLEL_ARG} parallel downloads for files: $(ls $tmp_dir/split_${ts}_*.batch)" + verbose_log "ls -1 $tmp_dir/split_${ts}_*.batch | xargs -P${PARALLEL_ARG} -I '{}' rsync -e \"ssh ${ssh_id_param}\" -av --no-relative --files-from='{}' ${REMOTE_HOST_ARG}:/ ${real_dest}" + ls -1 $tmp_dir/split_${ts}_*.batch | \ + xargs -P${PARALLEL_ARG} -I '{}' \ + rsync -e "ssh ${ssh_id_param}" \ + -av${dry_run_param} \ + --no-relative \ + --files-from='{}' ${REMOTE_HOST_ARG}:/ ${real_dest} + else + verbose_log "Fetching files from queue file: $QUEUE_FILE" + verbose_log "Beginning download" + verbose_log "rsync -e \"ssh ${ssh_id_param}\" -av ${dry_run_param} --no-relative --files-from=${QUEUE_FILE} rsync://${REMOTE_HOST_ARG} ${real_dest}" + rsync -e "ssh ${ssh_id_param}" \ + -av${dry_run_param} --no-relative \ + --files-from=${QUEUE_FILE} ${REMOTE_HOST_ARG}:/ ${real_dest} + fi fi verbose_log "ssync-fetch finished" -- cgit v1.2.3-54-g00ecf