summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteph Enders <steph@senders.io>2023-07-14 17:06:08 -0400
committerSteph Enders <steph@senders.io>2023-07-14 17:06:08 -0400
commit8178dd95210373495ffe9dd6ca333819eec882c1 (patch)
tree1a33cb8530db4bc9610a09872a2a021eaf9c2ca2
parenta4b3b4995e8456a9702bcafe6058f099d0fe5434 (diff)
Support more tags in gemparse
Gemparse now supports parsing: - h3 - h4 - ul, ol, li - a tags as anchors (<a name="" />) - a tags as anchor refs (<a href="#.." ></a>)
-rwxr-xr-xgemparse.py23
1 files changed, 17 insertions, 6 deletions
diff --git a/gemparse.py b/gemparse.py
index 54dd990..dd83c89 100755
--- a/gemparse.py
+++ b/gemparse.py
@@ -13,9 +13,16 @@ def handleNestedTag(tag, links, linkCnt, dest_f):
for child in tag.contents:
if child.name == "a":
href = child.get("href")
- print("{}[{}]".format(strip(child.string), linkCnt), end=" ", file=dest_f)
- links[linkCnt] = href
- linkCnt += 1
+ if href is not None:
+ if href.startswith("#"):
+ print(strip(child.string), end=" ", file=dest_f)
+ else:
+ print("{}[{}]".format(strip(child.string), linkCnt), end=" ", file=dest_f)
+ links[linkCnt] = href
+ linkCnt += 1
+ else:
+ # it's not a href link then its likely an anchor
+ pass
else:
if child.string == "\n":
print(strip(child.string), end="", file=dest_f)
@@ -59,14 +66,18 @@ for tag in html.article.children:
else:
print("\n", end="", file=dest_f)
handleNestedTag(tag, links, linkCnt, dest_f)
- if tag.name == "ul" or tag.name == "ol":
+ elif tag.name == "ul" or tag.name == "ol":
for child in tag.contents:
if child.name == "li":
print("*", end=" ", file=dest_f)
handleNestedTag(child, links, linkCnt, dest_f)
- if tag.name == "h3":
+ if tag.name == "figure":
+ if tag.blockquote is not None:
+ print(">", end=" ", file=dest_f)
+ handleNestedTag(tag.blockquote, links, linkCnt, dest_f)
+ elif tag.name == "h3":
print("\n## {}".format(strip(tag.string)), file=dest_f)
- if tag.name == "h4":
+ elif tag.name == "h4":
print("\n### {}".format(strip(tag.string)), file=dest_f)
print("\n## Links\n", file=dest_f)