Better TOC generation

This commit is contained in:
Itai Shirav 2017-05-02 17:46:47 +03:00
parent d1a0fe7ee8
commit f1ab9b6179
5 changed files with 55 additions and 204 deletions

View File

@ -1,5 +1,4 @@
Table of Contents
=================
# Table of Contents
* [Overview](index.md#overview)
* [Installation](index.md#installation)
@ -29,8 +28,10 @@ Table of Contents
* [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields)
* [Table Engines](table_engines.md#table-engines)
* [Buffer Models](table_engines.md#buffer-models)
* [Data Replication](table_engines.md#data-replication)
* [Simple Engines](table_engines.md#simple-engines)
* [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family)
* [Data Replication](table_engines.md#data-replication)
* [Buffer Engine](table_engines.md#buffer-engine)
* [Schema Migrations](schema_migrations.md#schema-migrations)
* [Writing Migrations](schema_migrations.md#writing-migrations)
@ -40,3 +41,4 @@ Table of Contents
* [Partitions and Parts](system_models.md#partitions-and-parts)
* [Contributing](contributing.md#contributing)

View File

@ -2,14 +2,14 @@ This directory contains various scripts for use while developing.
generate_toc
------------
Generates the table of contents (toc.md)
Generates the table of contents (toc.md). Requires Pandoc.
Usage:
cd docs
../scripts/generate_toc.sh
gh-md-toc
---------
html_to_markdown_toc.py
-----------------------
Used by generate_toc.

View File

@ -1,13 +1,16 @@
echo "Table of Contents" > toc.md
echo "=================" >> toc.md
../scripts/gh-md-toc \
index.md \
models_and_databases.md \
querysets.md \
field_types.md \
table_engines.md \
schema_migrations.md \
system_models.md \
contributing.md \
>> toc.md
generate_one() {
# Converts Markdown to HTML using Pandoc, and then extracts the header tags
pandoc "$1" | python "../scripts/html_to_markdown_toc.py" "$1" >> toc.md
}
printf "# Table of Contents\n\n" > toc.md
generate_one "index.md"
generate_one "models_and_databases.md"
generate_one "querysets.md"
generate_one "field_types.md"
generate_one "table_engines.md"
generate_one "schema_migrations.md"
generate_one "system_models.md"
generate_one "contributing.md"

View File

@ -1,185 +0,0 @@
#!/usr/bin/env bash
#
# Source: https://github.com/ekalinin/github-markdown-toc
#
# Steps:
#
# 1. Download corresponding html file for some README.md:
# curl -s $1
#
# 2. Discard rows where no substring 'user-content-' (github's markup):
# awk '/user-content-/ { ...
#
# 3.1 Get last number in each row like ' ... </span></a>sitemap.js</h1'.
# It's a level of the current header:
# substr($0, length($0), 1)
#
# 3.2 Get level from 3.1 and insert corresponding number of spaces before '*':
# sprintf("%*s", substr($0, length($0), 1)*2, " ")
#
# 4. Find head's text and insert it inside "* [ ... ]":
# substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
#
# 5. Find anchor and insert it inside "(...)":
# substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8)
#
gh_toc_version="0.4.8"
gh_user_agent="gh-md-toc v$gh_toc_version"
#
# Download rendered into html README.md by its url.
#
#
gh_toc_load() {
local gh_url=$1
if type curl &>/dev/null; then
curl --user-agent "$gh_user_agent" -s "$gh_url"
elif type wget &>/dev/null; then
wget --user-agent="$gh_user_agent" -qO- "$gh_url"
else
echo "Please, install 'curl' or 'wget' and try again."
exit 1
fi
}
#
# Converts local md file into html by GitHub
#
# ➥ curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown
# <p>Hello world github/linguist#1 <strong>cool</strong>, and #1!</p>'"
gh_toc_md2html() {
local gh_file_md=$1
curl -s --user-agent "$gh_user_agent" \
--data-binary @"$gh_file_md" -H "Content-Type:text/plain" \
https://api.github.com/markdown/raw
}
#
# Is passed string url
#
gh_is_url() {
if [[ $1 == https* || $1 == http* ]]; then
echo "yes"
else
echo "no"
fi
}
#
# TOC generator
#
gh_toc(){
local gh_src=$1
local gh_src_copy=$1
local gh_ttl_docs=$2
if [ "$gh_src" = "" ]; then
echo "Please, enter URL or local path for a README.md"
exit 1
fi
# Show "TOC" string only if working with one document
if [ "$gh_ttl_docs" = "1" ]; then
echo "Table of Contents"
echo "================="
echo ""
gh_src_copy=""
fi
if [ "$(gh_is_url "$gh_src")" == "yes" ]; then
gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy"
else
gh_toc_md2html "$gh_src" | gh_toc_grab "$gh_src_copy"
fi
}
#
# Grabber of the TOC from rendered html
#
# $1 — a source url of document.
# It's need if TOC is generated for multiple documents.
#
gh_toc_grab() {
# if closed <h[1-6]> is on the new line, then move it on the prev line
# for example:
# was: The command <code>foo1</code>
# </h1>
# became: The command <code>foo1</code></h1>
sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' |
# find strings that corresponds to template
grep -E -o '<a\s*id="user-content-[^"]*".*</h[1-6]' |
# remove code tags
sed 's/<code>//' | sed 's/<\/code>//' |
# now all rows are like:
# <a id="user-content-..." href="..."><span ...></span></a> ... </h1
# format result line
# * $0 — whole string
echo -e "$(awk -v "gh_url=$1" '{
print sprintf("%*s", substr($0, length($0), 1)*3, " ") "* [" substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)"](" gh_url substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8) ")"}' | sed 'y/+/ /; s/%/\\x/g')"
}
#
# Returns filename only from full path or url
#
gh_toc_get_filename() {
echo "${1##*/}"
}
#
# Options hendlers
#
gh_toc_app() {
local app_name="gh-md-toc"
if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then
echo "GitHub TOC generator ($app_name): $gh_toc_version"
echo ""
echo "Usage:"
echo " $app_name src [src] Create TOC for a README file (url or local path)"
echo " $app_name - Create TOC for markdown from STDIN"
echo " $app_name --help Show help"
echo " $app_name --version Show version"
return
fi
if [ "$1" = '--version' ]; then
echo "$gh_toc_version"
return
fi
if [ "$1" = "-" ]; then
if [ -z "$TMPDIR" ]; then
TMPDIR="/tmp"
elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then
mkdir -p "$TMPDIR"
fi
local gh_tmp_md
gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX)
while read input; do
echo "$input" >> "$gh_tmp_md"
done
gh_toc_md2html "$gh_tmp_md" | gh_toc_grab ""
return
fi
for md in "$@"
do
echo ""
gh_toc "$md" "$#"
done
#echo ""
#echo "Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc)"
}
#
# Entry point
#
gh_toc_app "$@"

View File

@ -0,0 +1,31 @@
from HTMLParser import HTMLParser
import sys
HEADER_TAGS = ('h1', 'h2', 'h3')
class HeadersToMarkdownParser(HTMLParser):
inside = None
text = ''
def handle_starttag(self, tag, attrs):
if tag.lower() in HEADER_TAGS:
self.inside = tag
def handle_endtag(self, tag):
if tag.lower() in HEADER_TAGS:
indent = ' ' * int(self.inside[1])
fragment = self.text.lower().replace(' ', '-')
print '%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment)
self.inside = None
self.text = ''
def handle_data(self, data):
if self.inside:
self.text += data
HeadersToMarkdownParser.feed(sys.stdin.read())
print