diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | README.md | 85 | ||||
| -rwxr-xr-x | ssync-fetch | 114 | ||||
| -rwxr-xr-x | ssync-index | 157 | ||||
| -rwxr-xr-x | ssync-queue | 102 |
5 files changed, 459 insertions, 0 deletions
@@ -1,2 +1,3 @@ creds ssync-cron +ssync.conf diff --git a/README.md b/README.md new file mode 100644 index 0000000..3b8c72f --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ +# ssync + +ssync is a suite of utilities that facilitate syncing a remote device with a local one regardless of file structure. For example if your remote storage's structure and design differs from your local - rsync wouldn't be able to easily 1:1 fetch everything neatly; so ssync works by indexing all of the files it hasn't yet - then queues them for download to single local target dir; then you just have to do whatever you want locally, separately, to reorganize things. I generally use [automv](https://git.senders.io/senders/automv) for common/repeat fetches. + +## setup + +ssync works via three main modules: + +- `ssync-index` - a script that indexes a directory (run separately from remote and local target) +- `ssync-queue` - generates the queue for `-fetch` to fetch +- `ssync-fetch` - fetches off of the queue + +With the optional `ssync` bundled as a single executable that runs in the typical aka "my desired" use-case. + +The typical process is: + +1) have a cron execute `ssync` on the local/target system +2) it will run `index` against the remote and local systems, refreshing the index for anything new +3) it will then run `queue` which generates the queue of files yet-to-be-downloaded from the index, appending any files to the queue +4) finally run `fetch` to process through the queue + +## logic flow + +- (s) establish a lock +- (i) Generate the remote and local indexes +- (q) Compare remote to local adding left side diff to the queue +- (f) iterate over the queue + - (f) check if the file exists + - (f) if not download +- (s) report queue diff, process duration, and status +- (s) release lock + +### Notes on queue maintenance + +Rather than maintain a queue, each process will generate its own queue from the index diffs. Each queue is placed in a specified directory, defaulting to `$XDG_CACHE_DIR/ssync/queue/` (if not set uses `/home/$USER/.config/` if exists, else creates and stores in `/tmp/ssync/queue/`) + +Since it runs off of process local indexes the queue can be reaped between processes and not incur any potential data loss. + +### Index window + +So long as files on the local system are expect to persist longer than it does on the remote you'll always be safe. But the index window helps set a maximum lookback - so that any older files may be removed from the local system without being resynced. + +In a previous implementation of this - not starting from scratch each run led to examples where a set of sequential files had a missing file in between. Like img-1, img-2, img-3, img-5, img-6: with img-4 missing. It was annoying and easy to not notice. + +## Configuration + +ssync.conf + +```config +remote_host=HOST +remote_root_dir=/path/to/sync/root/ +keyfile=/path/to/key +local_root_dir=/path/to/local/sync/root/ + +index_window_s=86400 # 24 hours +index_dir=/path/to/index/dir # optional +queue_dir=/path/to/queue/dir # optional +lock_file=/path/to/desired/file.lock # optional +``` + +## commands + +``` +ssync [options] + OPTIONS + -c [CONFIG_FILE] optional config file to use + default: ~/.config/ssync/ssync.conf + -l [LOCK_FILE] optional lock file + default: from config) + -q [QUEUE_DIR] optional queue dir + default: from config) + -k [KEY_FILE] optional key file + default: from config) +``` + +``` +ssync-index [options] -c [FILE] + REQUIRED + -c [CONFIG_FILE] config file to use + OPTIONS + -l local only (cannot be used in conjunction with -r) + -r remote only (cannot be used in conjunction with -l) + -o [OUTPUT_FILE] output file override + -k [KEY_FILE] key file override +``` diff --git a/ssync-fetch b/ssync-fetch new file mode 100755 index 0000000..2a79e64 --- /dev/null +++ b/ssync-fetch @@ -0,0 +1,114 @@ +#!/usr/bin/env sh + +USAGE="ssync-fetch [options] QUEUE_FILE DEST_DIR + OPTIONS + -r REMOTE_HOST + remote host to download from such as user@hostname + username can be omitted if identical to $USER + or if set in ssh_config + -k KEY_FILE + ssh-key file to use (needs to be non-interactive) + optional: will use default session key + or key set in ssh_config for REMOTE_HOST + -v verbose logging + -h print this message" + +# HELPER FUNCTIONS + +function verbose_log { + if [ ! -z "$VERBOSE_FLAG" ]; then + echo "$@" + fi +} + +# OPTIONS + +KEY_FILE_FLAG= +KEY_FILE_ARG= +REMOTE_HOST_FLAG= +REMOTE_HOST_ARG= +VERBOSE_FLAG= + +while getopts "hvr:k:" opt; do + case "${opt}" in + h) echo "$USAGE" + exit 1 + ;; + v) VERBOSE_FLAG=1 + ;; + k) KEY_FILE_FLAG=1 + KEY_FILE_ARG="${OPTARG}" + ;; + c) CONCURRENCY_FLAG=1 + CONCURRENCY_ARG="${OPTARG}" + ;; + r) REMOTE_HOST_FLAG=1 + REMOTE_HOST_ARG="${OPTARG}" + ;; + esac +done + +shift $(($OPTIND -1)) + +if [ $# -ne 2 ]; then + echo "$USAGE" + exit 1 +fi + +QUEUE_FILE=$1 +DEST_DIR=$2 + +# VALIDATION + +if [ ! -f "$QUEUE_FILE" ]; then + echo "Queue file '$QUEUE_FILE' does not exist" + exit 1 +fi + +if [ ! -d "$DEST_DIR" ]; then + echo "Destination directory '$DEST_DIR' does not exist" + exit 1 +fi + +if [ -z "$REMOTE_HOST_FLAG" ]; then + echo "Remote host option -r required" + exit 1 +elif [ -z "$REMOTE_HOST_ARG" ]; then + echo "Invalid remote host '$REMOTE_HOST_ARG'" + exit 1 +fi + +# CONFIGURATIONS + +ssh_id_param="" +if [ ! -z "${KEY_FILE_FLAG}" ]; then + if [ ! -f "${KEY_FILE_ARG}" ]; then + echo "Identity file '${KEY_FILE_ARG}' does not exist" + exit 1 + fi + ssh_id_param="-i ${KEY_FILE_ARG}" +fi + +concurrent_param="" +if [ ! -z "${CONCURRENCY_FLAG}" ]; then + if [ "${CONCURRENCY_ARG}" -gt 0 ]; then + concurrent_param="-X nrequests=${CONCURRENCY_ARG}" + fi +fi +real_dest=$(realpath $DEST_DIR) +tmp_dir=$(mktemp -d /tmp/ssync_fetch_run.XXXXXX) +ts=$(date +%s) +verbose_log "Writing temp files to ${tmp_dir} with timestamp ${ts}" + +# GENERATE BATCH +batch_file=$tmp_dir/batch_${ts} +verbose_log "Converting the queue file to sftp batch file: ${batch_file}" +cat $QUEUE_FILE | xargs -I{} echo "@reget {} ${real_dest}/" >> $batch_file + +verbose_log "Beginning download" +verbose_log "sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG}" + +sftp -N ${ssh_id_param} -b ${batch_file} ${REMOTE_HOST_ARG} + +verbose_log "ssync-fetch finished" + diff --git a/ssync-index b/ssync-index new file mode 100755 index 0000000..e19ee78 --- /dev/null +++ b/ssync-index @@ -0,0 +1,157 @@ +#!/usr/bin/env sh + +USAGE="ssync-index [options] [CONFIG_FILE] + OPTIONS + -k [KEY_FILE] override conigured key file + -o [OUTPUT_DIR] override configured index output dir + -l local only + -r remote only + -v verbose logging + -h print this message" + +# HELPER METHODS +function verbose_log { + if [ ! -z "$VERBOSE_FLAG" ]; then + echo "$1" + fi +} +function lines { + wc -l $1 | cut -d' ' -f1 +} +function get_default_index_dir { + local cache_path=ssync/index/ + local cache_dir= + if [ ! -z "$XDG_CACHE_DIR" ]; then + cache_dir=$XDG_CACHE_DIR/$cache_path + else + cache_dir=/tmp/ + fi + + mkdir -p $cache_dir/$cache_path + echo $cache_dir/$cache_path +} + +# GLOBAL VALUES +DEFAULT_INDEX_WINDOW=86400 +LOCAL_ONLY_FLAG= +REMOTE_ONLY_FLAG= +VERBOSE_FLAG= +KEY_FILE_FLAG= +KEY_FILE_ARG= +OUTPUT_DIR_FLAG= +OUTPUT_DIR_ARG= + +# OPTIONS PARSING +while getopts "hvlro:k:" opt; do + case "${opt}" in + h) echo "$USAGE" + exit 1 + ;; + o) OUTPUT_DIR_FLAG=1 + OUTPUT_DIR_ARG="$OPTARG" + ;; + k) KEY_FILE_FLAG=1 + KEY_FILE_ARG="$OPTARG" + ;; + l) LOCAL_ONLY_FLAG=1 + ;; + r) REMOTE_ONLY_FLAG=1 + ;; + v) VERBOSE_FLAG=1 + ;; + esac +done + +shift $(($OPTIND -1)) + +if [ $# -eq 0 ]; then + echo "$USAGE" + exit 1 + +fi + +CONFIG_FILE=$1 + +# CONFIG PARSING +if [ -z "$CONFIG_FILE" ]; then + echo "CONFIG_FILE is missing." + exit 1 +fi + +if [ ! -f $CONFIG_FILE ]; then + echo "$CONFIG_FILE does not exist." + exit 1 +fi + +source $CONFIG_FILE + +# CONFIG VALIDATION +if [ -z "$remote_host" ]; then + echo "Config is missing remote_host" + exit 1 +fi + +if [ -z "$remote_root_dir" ]; then + echo "Config is missing remote_root_dir" + exit 1 +fi + +if [ -z "$keyfile" ]; then + echo "Config is missing keyfile" + exit 1 +elif [ ! -f "$keyfile" ]; then + echo "Configured keyfile '$keyfile' does not exist" + exit 1 +fi + +if [ -z "$local_root_dir" ]; then + echo "Config is missing local_root_dir" + exit 1 +elif [ ! -d "$local_root_dir" ]; then + echo "Configured local_root_dir '$local_root_dir' does not exist" + exit 1 +fi + +# VALUE SETTING + +if [ -z "$index_window_s" ]; then + index_window_s=$DEFAULT_INDEX_WINDOW +fi + +local_index_file= +remote_index_file= +if [ ! -z "$OUTPUT_DIR_FLAG" ]; then + index_dir=$OUTPUT_DIR_ARG +fi + +if [ ! -z "$index_dir" ]; then + if [ ! -d "$index_dir" ]; then + echo "Configured index_dir '$index_dir' does not exist" + exit 1 + fi +else + index_dir=$(get_default_index_dir) +fi + +# RUN INDEXING + +ts=$(date -u +%s) +local_index_file=${index_dir}/${ts}_local.idx +remote_index_file=${index_dir}/${ts}_remote.idx +newermt=$(date -d "$index_window_s seconds ago" -u -Is) +verbose_log "Targetting files from $index_window_s seconds ago: $newermt" + +# index remote host relative to your fetch dir +if [ -z "$LOCAL_ONLY_FLAG" ]; then + verbose_log "Generating remote index from ${remote_root_dir} to ${remote_index_file}" + ssh -i $keyfile $remote_host \ + "find ${remote_root_dir} -type f -newermt $newermt -exec realpath {} \;" > $remote_index_file + verbose_log "Indexed $(lines $remote_index_file) remote files" +fi + +# index local filenames ONLY +if [ -z "$REMOTE_ONLY_FLAG" ]; then + verbose_log "Generate local index from ${local_root_dir} to ${local_index_file}" + find ${local_root_dir} -type f -exec basename {} \; > $local_index_file + verbose_log "Indexed $(lines $local_index_file) local files" +fi diff --git a/ssync-queue b/ssync-queue new file mode 100755 index 0000000..4f98731 --- /dev/null +++ b/ssync-queue @@ -0,0 +1,102 @@ +#!/usr/bin/env sh + +USAGE="ssync-queue [options] -l LOCAL_INDEX_FILE -r REMOTE_INDEX_FILE -q QUEUE_OUTPUT_FILE + OPTIONS + -l LOCAL_INDEX_FILE + target local index file + -r REMOTE_INDEX_FILE + target remote index file + -q QUEUE_OUTPUT_FILE + queue output file + -v verbose logging + -h print this message" + +# HELPER FUNCTIONS + +function verbose_log { + if [ ! -z "$VERBOSE_FLAG" ]; then + echo "$@" + fi +} +function lines { + echo $(wc -l $1 | cut -d' ' -f1) +} + +# OPTIONS + +VERBOSE_FLAG= +LOCAL_FILE_FLAG= +REMOTE_FILE_FLAG= +QUEUE_FILE_FLAG= +CONFIG_FILE_FLAG= +LOCAL_FILE_ARG= +REMOTE_FILE_ARG= +QUEUE_FILE_ARG= + +while getopts "hvl:r:q:c:" opt; do + case "${opt}" in + l) LOCAL_FILE_FLAG=1 + LOCAL_FILE_ARG="${OPTARG}" + ;; + r) REMOTE_FILE_FLAG=1 + REMOTE_FILE_ARG="${OPTARG}" + ;; + q) QUEUE_FILE_FLAG=1 + QUEUE_FILE_ARG="${OPTARG}" + ;; + v) VERBOSE_FLAG=1 + ;; + h) echo "$USAGE" + exit 1 + ;; + esac +done + +shift $(($OPTIND -1)) + +if [ -z "$LOCAL_FILE_FLAG" ]; then + echo "-l LOCAL_INDEX_FILE option is required" + exit 1 +elif [ ! -f "$LOCAL_FILE_ARG" ]; then + echo "local index file '$LOCAL_FILE_ARG' does not exist" + exit 1 +fi + +if [ -z "$REMOTE_FILE_FLAG" ]; then + echo "-r REMOTE_INDEX_FILE option is required" + exit 1 +elif [ ! -f "$REMOTE_FILE_ARG" ]; then + echo "remote index file '${REMOTE_FILE_ARG}' does not exist" + exit 1 +fi + +if [ -z "$QUEUE_FILE_FLAG" ]; then + echo "-q QUEUE_OUTPUT_FILE option is required" + exit 1 +fi + +queue_tmp_dir=$(mktemp -d /tmp/ssync_queue_run.XXXXXX) +verbose_log "Writing temp files to $queue_tmp_dir" + +# get remote filenames +remote_index_filenames_file=$queue_tmp_dir/remote_filenames.idx +verbose_log "Writing remote index filenames to $remote_index_filenames_file" +cat $REMOTE_FILE_ARG | xargs -I{} basename {} > $remote_index_filenames_file +original_line_count=$(lines $REMOTE_FILE_ARG) +unique_line_count=$(lines <(sort -u $remote_index_filenames_file)) +verbose_log "Remote index contains $unique_line_count unique filenames out of $original_line_count indexed files" + +if [ $original_line_count != $unique_line_count ]; then + echo "Remote index contains non-unique files. Check $REMOTE_INDEX_FILE to find which files aren't unique" >&2 +fi + +# find which filenames are unique to the remote +remote_only_filenames_file=$queue_tmp_dir/remote_only_filenames.idx +comm -23 <(sort $remote_index_filenames_file) <(sort $LOCAL_FILE_ARG) \ + > $remote_only_filenames_file +verbose_log "Found $(lines $remote_only_filenames_file) remote only files" + +# push matching files into queue +cat $remote_only_filenames_file | xargs -I{} grep "^.*{}$" $REMOTE_FILE_ARG >> $QUEUE_FILE_ARG +verbose_log "Added $(lines $QUEUE_FILE_ARG) to the queue" + |