From 43762acc52ea0bc976b38f26bc0baa1454311a17 Mon Sep 17 00:00:00 2001 From: Steph Enders Date: Tue, 27 Jun 2023 17:07:06 -0400 Subject: Create gemini capsule for website Generates the gemini capsule files in the make file. --- gemparse.py | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100755 gemparse.py (limited to 'gemparse.py') diff --git a/gemparse.py b/gemparse.py new file mode 100755 index 0000000..28ac91a --- /dev/null +++ b/gemparse.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +import sys +import re +from bs4 import BeautifulSoup + +def strip(string): + ret = [] + for s in string.split("\n"): + ret.append(s.lstrip()) + return "\n".join(ret) + +source = sys.argv[1] +dest = sys.argv[2] +http_url = sys.argv[3] + +source_f = open(source, "r") +dest_f = open(dest, "w") + +html = BeautifulSoup(source_f, 'html.parser') +heading = None +subheading = None + +if html.article.header: + heading = strip(html.article.header.h2.string) + if html.article.header.p: + subheading = strip(html.article.header.p.string) +else: + heading = strip(html.article.h2.string) + +if heading is None: + print("No heading found", file=sys.stderr) + exit(1) +print("# {}\n".format(heading), file=dest_f) +if subheading is not None: + print(subheading, file=dest_f) + +links = dict() +linkCnt = 0 + +for tag in html.article.children: + if tag.name == "p": + if len(tag.contents) == 1: + # it's just text + print(strip(tag.string), file=dest_f) + else: + # contains multiple tags + # we should check for links + for child in tag.contents: + if child.name == "a": + href = child.get("href") + print("{}[{}]".format(strip(child.string), linkCnt), end=" ", file=dest_f) + links[linkCnt] = href + linkCnt += 1 + else: + if child.string == "\n": + print(child.string, end="", file=dest_f) + else: + print(strip(child.string), end=" ", file=dest_f) + + +print("\n## Links\n", file=dest_f) +for link_n, link in links.items(): + print("=> {} [{}] {}".format(link, link_n, link), file=dest_f) + +print("=> https://thewomaninmyeye.org/{} view on http".format(http_url), file=dest_f) +print("=> /log/ back", file=dest_f) +print("=> / capsule", file=dest_f) + -- cgit v1.2.3-54-g00ecf