infi.clickhouse_orm/scripts/html_to_markdown_toc.py

32 lines
743 B
Python
Raw Normal View History

2020-02-08 13:38:23 +03:00
from html.parser import HTMLParser
2017-05-02 17:46:47 +03:00
import sys
HEADER_TAGS = ('h1', 'h2', 'h3')
class HeadersToMarkdownParser(HTMLParser):
inside = None
text = ''
def handle_starttag(self, tag, attrs):
if tag.lower() in HEADER_TAGS:
self.inside = tag
def handle_endtag(self, tag):
if tag.lower() in HEADER_TAGS:
indent = ' ' * int(self.inside[1])
fragment = self.text.lower().replace(' ', '-')
2020-02-08 13:38:23 +03:00
print('%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment))
2017-05-02 17:46:47 +03:00
self.inside = None
self.text = ''
def handle_data(self, data):
if self.inside:
self.text += data
2017-05-03 08:36:47 +03:00
HeadersToMarkdownParser().feed(sys.stdin.read())
2020-02-08 13:38:23 +03:00
print('')