From ca3d6f3f9401c1ab3e1017827bfac315b09db3cf Mon Sep 17 00:00:00 2001 From: Steph Enders Date: Fri, 12 Dec 2025 12:34:03 -0500 Subject: Break ssync into multiple processes Breaking ssync into 3 sub-processes: 1) ssync-index - indexes remote and local dirs 2) ssync-queue - generates queue of yet-fetched files 3) ssync-fetch - downloads the queue Which will ultimately be executed using ssync which will allow for unified config files and transfer locking. The rewrite is being done in hopes of preventing "missing files" during large queues and ensure completeness. The breakdown into multiple files should also help with narrowing the logic and improving the process without interfering with the execution and readability of the other stages. This commit has complete subprocesses - though ssync-index needs remediation to remove the config file - as we need predictable I/O to be able to pass the index files into the queue process --- ssync-queue | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100755 ssync-queue (limited to 'ssync-queue') diff --git a/ssync-queue b/ssync-queue new file mode 100755 index 0000000..4f98731 --- /dev/null +++ b/ssync-queue @@ -0,0 +1,102 @@ +#!/usr/bin/env sh + +USAGE="ssync-queue [options] -l LOCAL_INDEX_FILE -r REMOTE_INDEX_FILE -q QUEUE_OUTPUT_FILE + OPTIONS + -l LOCAL_INDEX_FILE + target local index file + -r REMOTE_INDEX_FILE + target remote index file + -q QUEUE_OUTPUT_FILE + queue output file + -v verbose logging + -h print this message" + +# HELPER FUNCTIONS + +function verbose_log { + if [ ! -z "$VERBOSE_FLAG" ]; then + echo "$@" + fi +} +function lines { + echo $(wc -l $1 | cut -d' ' -f1) +} + +# OPTIONS + +VERBOSE_FLAG= +LOCAL_FILE_FLAG= +REMOTE_FILE_FLAG= +QUEUE_FILE_FLAG= +CONFIG_FILE_FLAG= +LOCAL_FILE_ARG= +REMOTE_FILE_ARG= +QUEUE_FILE_ARG= + +while getopts "hvl:r:q:c:" opt; do + case "${opt}" in + l) LOCAL_FILE_FLAG=1 + LOCAL_FILE_ARG="${OPTARG}" + ;; + r) REMOTE_FILE_FLAG=1 + REMOTE_FILE_ARG="${OPTARG}" + ;; + q) QUEUE_FILE_FLAG=1 + QUEUE_FILE_ARG="${OPTARG}" + ;; + v) VERBOSE_FLAG=1 + ;; + h) echo "$USAGE" + exit 1 + ;; + esac +done + +shift $(($OPTIND -1)) + +if [ -z "$LOCAL_FILE_FLAG" ]; then + echo "-l LOCAL_INDEX_FILE option is required" + exit 1 +elif [ ! -f "$LOCAL_FILE_ARG" ]; then + echo "local index file '$LOCAL_FILE_ARG' does not exist" + exit 1 +fi + +if [ -z "$REMOTE_FILE_FLAG" ]; then + echo "-r REMOTE_INDEX_FILE option is required" + exit 1 +elif [ ! -f "$REMOTE_FILE_ARG" ]; then + echo "remote index file '${REMOTE_FILE_ARG}' does not exist" + exit 1 +fi + +if [ -z "$QUEUE_FILE_FLAG" ]; then + echo "-q QUEUE_OUTPUT_FILE option is required" + exit 1 +fi + +queue_tmp_dir=$(mktemp -d /tmp/ssync_queue_run.XXXXXX) +verbose_log "Writing temp files to $queue_tmp_dir" + +# get remote filenames +remote_index_filenames_file=$queue_tmp_dir/remote_filenames.idx +verbose_log "Writing remote index filenames to $remote_index_filenames_file" +cat $REMOTE_FILE_ARG | xargs -I{} basename {} > $remote_index_filenames_file +original_line_count=$(lines $REMOTE_FILE_ARG) +unique_line_count=$(lines <(sort -u $remote_index_filenames_file)) +verbose_log "Remote index contains $unique_line_count unique filenames out of $original_line_count indexed files" + +if [ $original_line_count != $unique_line_count ]; then + echo "Remote index contains non-unique files. Check $REMOTE_INDEX_FILE to find which files aren't unique" >&2 +fi + +# find which filenames are unique to the remote +remote_only_filenames_file=$queue_tmp_dir/remote_only_filenames.idx +comm -23 <(sort $remote_index_filenames_file) <(sort $LOCAL_FILE_ARG) \ + > $remote_only_filenames_file +verbose_log "Found $(lines $remote_only_filenames_file) remote only files" + +# push matching files into queue +cat $remote_only_filenames_file | xargs -I{} grep "^.*{}$" $REMOTE_FILE_ARG >> $QUEUE_FILE_ARG +verbose_log "Added $(lines $QUEUE_FILE_ARG) to the queue" + -- cgit v1.2.3-54-g00ecf