diff options
-rwxr-xr-x | gemparse.py | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/gemparse.py b/gemparse.py index 54dd990..dd83c89 100755 --- a/gemparse.py +++ b/gemparse.py @@ -13,9 +13,16 @@ def handleNestedTag(tag, links, linkCnt, dest_f): for child in tag.contents: if child.name == "a": href = child.get("href") - print("{}[{}]".format(strip(child.string), linkCnt), end=" ", file=dest_f) - links[linkCnt] = href - linkCnt += 1 + if href is not None: + if href.startswith("#"): + print(strip(child.string), end=" ", file=dest_f) + else: + print("{}[{}]".format(strip(child.string), linkCnt), end=" ", file=dest_f) + links[linkCnt] = href + linkCnt += 1 + else: + # it's not a href link then its likely an anchor + pass else: if child.string == "\n": print(strip(child.string), end="", file=dest_f) @@ -59,14 +66,18 @@ for tag in html.article.children: else: print("\n", end="", file=dest_f) handleNestedTag(tag, links, linkCnt, dest_f) - if tag.name == "ul" or tag.name == "ol": + elif tag.name == "ul" or tag.name == "ol": for child in tag.contents: if child.name == "li": print("*", end=" ", file=dest_f) handleNestedTag(child, links, linkCnt, dest_f) - if tag.name == "h3": + if tag.name == "figure": + if tag.blockquote is not None: + print(">", end=" ", file=dest_f) + handleNestedTag(tag.blockquote, links, linkCnt, dest_f) + elif tag.name == "h3": print("\n## {}".format(strip(tag.string)), file=dest_f) - if tag.name == "h4": + elif tag.name == "h4": print("\n### {}".format(strip(tag.string)), file=dest_f) print("\n## Links\n", file=dest_f) |