summaryrefslogtreecommitdiff
path: root/gemfeed.py
blob: 0cbe6a42bfce9432a8de41f0f79a91d20246a360 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import argparse
import datetime
import glob
import os.path
import urllib.parse

from feedgen.feed import FeedGenerator

def find_files():
    files = []
    for extension in ("gmi", "gem", "gemini"):
        files.extend(glob.glob("*.{}".format(extension)))
        index = "index.{}".format(extension)
        if index in files:
            files.remove(index)
    return files

def populate_entry_from_file(filename, base_url, entry):
    url = urljoin(base_url, filename)
    entry.guid(url)
    entry.link(href=url, rel="alternate")
    updated = os.path.getctime(filename)
    updated = datetime.datetime.fromtimestamp(updated, tz=datetime.timezone.utc)
    entry.updated(updated)
    with open(filename) as fp:
        for line in fp:
            if line.startswith("#"):
                while line[0] == "#":
                    line = line[1:]
                title = line.strip()
                break
        else:
            title = filename
    entry.title(title)

def urljoin(base, url):
    base = urllib.parse.urlsplit(base)
    base = base._replace(scheme="https")
    base = urllib.parse.urlunsplit(base)
    joined = urllib.parse.urljoin(base, url)
    joined = urllib.parse.urlsplit(joined)
    joined = joined._replace(scheme="gemini")
    return urllib.parse.urlunsplit(joined)

def main():

    # Parse arguments
    parser = argparse.ArgumentParser(description='Generate an Atom feed for Gemini content.')
    parser.add_argument('-a', '--author', dest='author', type=str,
            help="feed author's name")
    parser.add_argument('-b', '--base', dest='base_url', type=str,
            required=True, help='base URL for feed and entries')
    parser.add_argument('-e', '--email', dest='email', type=str,
            help="feed author's email address")
    parser.add_argument('-o', '--output', dest='output', type=str,
            default="atom.xml", help='output filename')
    parser.add_argument('-s', '--subtitle', dest='subtitle', type=str,
            help='feed subtitle')
    parser.add_argument('-t', '--title', dest='title', type=str,
            default="A Gemini feed", help='feed title')
    args = parser.parse_args()

    # Normalise base URL
    base_url = urllib.parse.urlsplit(args.base_url)
    if not base_url.netloc and base_url.path:
        # Handle a naked domain, which urlsplit will interpet at a local path
        base_url = base_url._replace(netloc=base_url.path, path="")
    base_url = base_url._replace(scheme="gemini")
    args.base_url = urllib.parse.urlunsplit(base_url)

    # Setup feed
    feed = FeedGenerator()
    feed.id(args.base_url)
    feed.title(args.title)
    if args.subtitle:
        feed.subtitle(args.subtitle)
    author = {}
    if args.author:
        author["name"] = args.author
    if args.email:
        author["email"] = args.email
    if author:
        feed.author(author)
    feed.link(href=args.base_url, rel='alternate')
    feed.link(href=urljoin(args.base_url, args.output), rel='self')

    # Add one entry per .gmi file
    files = find_files()
    latest_update = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)
    for filename in files:
        entry = feed.add_entry()
        populate_entry_from_file(filename, args.base_url, entry)
        print("Adding {} with title '{}'...".format(filename, entry.title()))
        if entry.updated() > latest_update:
            latest_update = entry.updated()
    feed.updated(latest_update)

    # Write file
    feed.atom_file(args.output, pretty=True)
    print("Wrote Atom feed to {}.".format(args.output))

if __name__ == "__main__":
    main()