import argparse import datetime import glob import os import os.path import stat import urllib.parse from feedgen.feed import FeedGenerator def is_world_readable(filename): """ Return True if the named file is world readable, otherwise return False. """ st = os.stat(filename) return st.st_mode & stat.S_IROTH def extract_first_heading(filename, default=""): """ Open a file which is presumed to contain text/gemini content and return the contents of the first heading line (regardless of heading level). If no heading lines are found, return the specified default. """ with open(filename) as fp: for line in fp: if line.startswith("#"): while line[0] == "#": line = line[1:] return line.strip() return default def get_feed_title(): """ If an index.gmi or index.gemini file exists and is worldreadable, return the content of the first heading line in the file, otherwise return a default feed title. """ default = "Just another Gemini feed" for index_file in ("index.gmi", "index.gemini"): if os.path.exists(index_file) and is_world_readable(index_file): return extract_first_heading(index_file, default) return default def find_files(n=10): """ Return the n most recently created world readable files with extensions of .gmi or .gemini, as a list sorted from most to least recent. """ files = [] for extension in ("gmi", "gemini"): files.extend(glob.glob("*.{}".format(extension))) index = "index.{}".format(extension) if index in files: files.remove(index) files = [f for f in files if is_world_readable(f)] files.sort(key=os.path.getctime, reverse=True) return files[0:n] def urljoin(base, url): """ Return an absolute URL formed by combining the provided base and relative URLs. This is necessary because the various functions in Python's urllib to do this do not function as expected if the URL scheme is not recognised, which of course gemini:// is not. Thus, we need to do a little dance where we transform gemini URLs to https URLs, join them, and then undo the transformation. """ base = urllib.parse.urlsplit(base) base = base._replace(scheme="https") base = urllib.parse.urlunsplit(base) joined = urllib.parse.urljoin(base, url) joined = urllib.parse.urlsplit(joined) joined = joined._replace(scheme="gemini") return urllib.parse.urlunsplit(joined) def populate_entry_from_file(filename, base_url, entry): """ Set the id, title, updated and link attributes of the provided FeedGenerator entry object according the contents of the named Gemini file and the base URL. """ url = urljoin(base_url, filename) entry.guid(url) entry.link(href=url, rel="alternate") updated = os.path.getctime(filename) updated = datetime.datetime.fromtimestamp(updated, tz=datetime.timezone.utc) entry.updated(updated) title = extract_first_heading(filename, filename) entry.title(title) def main(): # Get default title from index page, if there is one feed_title = get_feed_title() # Parse arguments parser = argparse.ArgumentParser(description='Generate an Atom feed for Gemini content.') parser.add_argument('-a', '--author', dest='author', type=str, help="feed author's name") parser.add_argument('-b', '--base', dest='base_url', type=str, required=True, help='base URL for feed and entries') parser.add_argument('-e', '--email', dest='email', type=str, help="feed author's email address") parser.add_argument('-n', dest='n', type=int, default=10, help='include N most recently created files in feed (default 10)') parser.add_argument('-o', '--output', dest='output', type=str, default="atom.xml", help='output filename') parser.add_argument('-s', '--subtitle', dest='subtitle', type=str, help='feed subtitle') parser.add_argument('-t', '--title', dest='title', type=str, default=feed_title, help='feed title') args = parser.parse_args() # Normalise base URL base_url = urllib.parse.urlsplit(args.base_url) if not base_url.netloc and base_url.path: # Handle a naked domain, which urlsplit will interpet at a local path base_url = base_url._replace(netloc=base_url.path, path="") base_url = base_url._replace(scheme="gemini") args.base_url = urllib.parse.urlunsplit(base_url) # Setup feed feed = FeedGenerator() feed.id(args.base_url) feed.title(args.title) if args.subtitle: feed.subtitle(args.subtitle) author = {} if args.author: author["name"] = args.author if args.email: author["email"] = args.email if author: feed.author(author) feed.link(href=args.base_url, rel='alternate') feed.link(href=urljoin(args.base_url, args.output), rel='self') # Add one entry per .gmi file files = find_files(args.n) if not files: print("No world-readable Gemini content found! :(") return for n, filename in enumerate(files): entry = feed.add_entry() populate_entry_from_file(filename, args.base_url, entry) print("Adding {} with title '{}'...".format(filename, entry.title())) if n == 0: feed.updated(entry.updated()) # Write file feed.atom_file(args.output, pretty=True) print("Wrote Atom feed to {}.".format(args.output)) if __name__ == "__main__": main()