diff options
| author | Steph Enders <steph@senders.io> | 2025-12-12 12:34:03 -0500 |
|---|---|---|
| committer | Steph Enders <steph@senders.io> | 2025-12-12 12:34:03 -0500 |
| commit | ca3d6f3f9401c1ab3e1017827bfac315b09db3cf (patch) | |
| tree | 2f3fca005b01334160538190b0f808c394ba65e1 /ssync-queue | |
| parent | 69fbbd87f895580ebb5fb2b58362ba6243bd1043 (diff) | |
Break ssync into multiple processes
Breaking ssync into 3 sub-processes:
1) ssync-index - indexes remote and local dirs
2) ssync-queue - generates queue of yet-fetched files
3) ssync-fetch - downloads the queue
Which will ultimately be executed using ssync which will allow for
unified config files and transfer locking.
The rewrite is being done in hopes of preventing "missing files"
during large queues and ensure completeness.
The breakdown into multiple files should also help with narrowing the
logic and improving the process without interfering with the execution
and readability of the other stages.
This commit has complete subprocesses - though ssync-index needs
remediation to remove the config file - as we need predictable I/O to
be able to pass the index files into the queue process
Diffstat (limited to 'ssync-queue')
| -rwxr-xr-x | ssync-queue | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/ssync-queue b/ssync-queue new file mode 100755 index 0000000..4f98731 --- /dev/null +++ b/ssync-queue @@ -0,0 +1,102 @@ +#!/usr/bin/env sh + +USAGE="ssync-queue [options] -l LOCAL_INDEX_FILE -r REMOTE_INDEX_FILE -q QUEUE_OUTPUT_FILE + OPTIONS + -l LOCAL_INDEX_FILE + target local index file + -r REMOTE_INDEX_FILE + target remote index file + -q QUEUE_OUTPUT_FILE + queue output file + -v verbose logging + -h print this message" + +# HELPER FUNCTIONS + +function verbose_log { + if [ ! -z "$VERBOSE_FLAG" ]; then + echo "$@" + fi +} +function lines { + echo $(wc -l $1 | cut -d' ' -f1) +} + +# OPTIONS + +VERBOSE_FLAG= +LOCAL_FILE_FLAG= +REMOTE_FILE_FLAG= +QUEUE_FILE_FLAG= +CONFIG_FILE_FLAG= +LOCAL_FILE_ARG= +REMOTE_FILE_ARG= +QUEUE_FILE_ARG= + +while getopts "hvl:r:q:c:" opt; do + case "${opt}" in + l) LOCAL_FILE_FLAG=1 + LOCAL_FILE_ARG="${OPTARG}" + ;; + r) REMOTE_FILE_FLAG=1 + REMOTE_FILE_ARG="${OPTARG}" + ;; + q) QUEUE_FILE_FLAG=1 + QUEUE_FILE_ARG="${OPTARG}" + ;; + v) VERBOSE_FLAG=1 + ;; + h) echo "$USAGE" + exit 1 + ;; + esac +done + +shift $(($OPTIND -1)) + +if [ -z "$LOCAL_FILE_FLAG" ]; then + echo "-l LOCAL_INDEX_FILE option is required" + exit 1 +elif [ ! -f "$LOCAL_FILE_ARG" ]; then + echo "local index file '$LOCAL_FILE_ARG' does not exist" + exit 1 +fi + +if [ -z "$REMOTE_FILE_FLAG" ]; then + echo "-r REMOTE_INDEX_FILE option is required" + exit 1 +elif [ ! -f "$REMOTE_FILE_ARG" ]; then + echo "remote index file '${REMOTE_FILE_ARG}' does not exist" + exit 1 +fi + +if [ -z "$QUEUE_FILE_FLAG" ]; then + echo "-q QUEUE_OUTPUT_FILE option is required" + exit 1 +fi + +queue_tmp_dir=$(mktemp -d /tmp/ssync_queue_run.XXXXXX) +verbose_log "Writing temp files to $queue_tmp_dir" + +# get remote filenames +remote_index_filenames_file=$queue_tmp_dir/remote_filenames.idx +verbose_log "Writing remote index filenames to $remote_index_filenames_file" +cat $REMOTE_FILE_ARG | xargs -I{} basename {} > $remote_index_filenames_file +original_line_count=$(lines $REMOTE_FILE_ARG) +unique_line_count=$(lines <(sort -u $remote_index_filenames_file)) +verbose_log "Remote index contains $unique_line_count unique filenames out of $original_line_count indexed files" + +if [ $original_line_count != $unique_line_count ]; then + echo "Remote index contains non-unique files. Check $REMOTE_INDEX_FILE to find which files aren't unique" >&2 +fi + +# find which filenames are unique to the remote +remote_only_filenames_file=$queue_tmp_dir/remote_only_filenames.idx +comm -23 <(sort $remote_index_filenames_file) <(sort $LOCAL_FILE_ARG) \ + > $remote_only_filenames_file +verbose_log "Found $(lines $remote_only_filenames_file) remote only files" + +# push matching files into queue +cat $remote_only_filenames_file | xargs -I{} grep "^.*{}$" $REMOTE_FILE_ARG >> $QUEUE_FILE_ARG +verbose_log "Added $(lines $QUEUE_FILE_ARG) to the queue" + |