From b3bb29fb13bf40b8bf6e5eefdbc214609b8f6042 Mon Sep 17 00:00:00 2001 From: Bill Date: Sat, 17 Apr 2021 10:30:46 -0400 Subject: Retention gemlog update --- docker-compose.yml | 1 + gemini/gemlog/2021-04-17-capsule-log-retention.gmi | 50 ++++++++++++++++++++++ gemini/gemlog/index.gmi | 1 + gemlog-staging/enjoying-music.gmi | 2 + stats/calc.sh | 6 +-- 5 files changed, 57 insertions(+), 3 deletions(-) create mode 100755 gemini/gemlog/2021-04-17-capsule-log-retention.gmi diff --git a/docker-compose.yml b/docker-compose.yml index b6e45dd..fe2fc92 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,6 +15,7 @@ services: build: capsule/ ports: - "1965:1965" + restart: unless-stopped volumes: - "${PWD}/volumes/log/jgs:/var/log/jgs" - "${PWD}/gemini:/var/gemini" diff --git a/gemini/gemlog/2021-04-17-capsule-log-retention.gmi b/gemini/gemlog/2021-04-17-capsule-log-retention.gmi new file mode 100755 index 0000000..6f7eab2 --- /dev/null +++ b/gemini/gemlog/2021-04-17-capsule-log-retention.gmi @@ -0,0 +1,50 @@ +# Capsule Log Retention + +I wrote previously that I had setup stats based on my servers access logs. I noted at the bottom that I was clearing my logs to keep somewhat compliant with user privacy. + +## Deleting logs older than 7 days + +I wrote a daily cronjob to remove any log lines older than 7 days from my access.log + +```retention.sh +#!/usr/bin/env bash + +set -e + +LOGFILE=$1 + +mindate=$(head -n1 $LOGFILE | cut -f1 | cut -d'T' -f1) +maxdate=$(date --date="-6 days" -u -Id) + +echo "Deleting log lines from ${mindate} to ${maxdate}" + +sed -i -E "/${mindate}/,/${maxdate}/d" $LOGFILE + +echo "Cleared logs" +``` + +### Date command + +I had no idea you could do relative dates! I am not surprised bur it was nice to find out since it meant I could do this entirely in bash. + +## What this is doing + +I essentially take two dates: the first date in the file, and the inclusive date I want to remove logs for. Then using the magic of sed, remove all the lines that match between those dates. + +### Sed + +I do a lot of bash scripting for """productivity""". Sed is an amazing utility that I find allows me to do so much without needing to expand into other languages like awk or python. + +## Obfuscating any personal data + +The only "personal data" my access logs have are IPs. I am kicking around a good way to so I can still debug and trace requests, but for now keeping a week of logs should be compliant and hopefully personally reassuring. + +## Conclusion + +Honestly, I find myself writing small script files like this (though this is basically just a sed command) to do day-to-day tasks. For instance my 'todo' app for work, to track asks, essentially just echos the args into a unique file in a directory. Then you can print the file contents to see whats open. Then using their UID to mark as done. + +So I was happy to be able to quickly write this up to clear old logs. Do you do log rotation? Or just delete the old files? + +=> /gemlog/ Gemlog +=> / Home + diff --git a/gemini/gemlog/index.gmi b/gemini/gemlog/index.gmi index 8c91adf..4cfecb4 100644 --- a/gemini/gemlog/index.gmi +++ b/gemini/gemlog/index.gmi @@ -4,6 +4,7 @@ Welcome to my gemlog. I post whenever I do something I feel is worth writing abo ## My posts +=> 2021-04-17-capsule-log-retention.gmi 2021-04-17 - Capsule Log Retention => 2021-04-15-capsule-stats.gmi 2021-04-15 - Capsule Stats => 2021-04-13-digital-hygiene-one-week-in.gmi 2021-04-13 - Digital Hygiene - One Week In => 2021-04-12-girl-2020-land-before-time.gmi 2021-04-12 - Girl (2020) and The Land Before Time diff --git a/gemlog-staging/enjoying-music.gmi b/gemlog-staging/enjoying-music.gmi index 1ab4cc2..07e0bd8 100644 --- a/gemlog-staging/enjoying-music.gmi +++ b/gemlog-staging/enjoying-music.gmi @@ -17,3 +17,5 @@ According to my Spotify Wrapped for the past 4 years I only listen to music. I e I use to listen to "Welcome to the Magic Tavern" which I REALLY enjoyed. However, I cannot pay attention to an "audio show" while doing OTHER things. So that limits how easily I can consume that type of content. I use to watch british panel shows while working at a job I was pretty much checked out of, but those are easy to "miss a joke in" because they moved on in a minute or two to a new story/joke. + +Since lockdown, I started listening to one podcast regularly, which is diff --git a/stats/calc.sh b/stats/calc.sh index 8734a18..4fe921c 100755 --- a/stats/calc.sh +++ b/stats/calc.sh @@ -16,17 +16,17 @@ echo -e " Total Reqs:\t"$(grep 'OUT' ${LOGFILE} | grep "${TODAY}" | wc -l) >> echo -e " Gemlog Reads:\t"$(grep 'IN' ${LOGFILE} | grep "${TODAY}" | grep "gemlog" | grep "gmi" | wc -l) >> $OUTFILE echo "Top 5 Gemlogs" >> $OUTFILE echo "--------------" >> $OUTFILE -grep "IN" ${LOGFILE} | grep "${TODAY}" | cut -f4 | grep "gemlog" | grep ".gmi" | sort | uniq -c | sort -rn | head -n5 >> $OUTFILE +grep "IN" ${LOGFILE} | grep "${TODAY}" | grep -v '172.105.81.166' | cut -f4 | grep "gemlog" | grep ".gmi" | sort | uniq -c | sort -rn | head -n5 >> $OUTFILE # Stats total EARLIEST=$(head -n1 $LOGFILE | cut -f1) echo "" >> $OUTFILE -echo -e " Stats since:\t$EARLIEST" >> $OUTFILE +echo -e " Stats since:\t$EARLIEST" >> $OUTFILE echo -e " Total Reqs:\t"$(grep 'OUT' ${LOGFILE} | wc -l) >> $OUTFILE echo -e " Gemlog Reads:\t"$(grep 'IN' ${LOGFILE} | grep "gemlog" | grep "gmi" | wc -l) >> $OUTFILE echo "Top 5 Gemlogs" >> $OUTFILE echo "--------------" >> $OUTFILE -grep "IN" ${LOGFILE} | cut -f4 | grep "gemlog" | grep ".gmi" | sort | uniq -c | sort -rn | head -n5 >> $OUTFILE +grep "IN" ${LOGFILE} | grep -v '172.105.81.166' | cut -f4 | grep "gemlog" | grep ".gmi" | sort | uniq -c | sort -rn | head -n5 >> $OUTFILE # print generating timestamp echo -e "\n// generated $(date -u -Is)" >> $OUTFILE -- cgit v1.2.3-54-g00ecf