From f1ab9b6179996f3e3e0569c27ce31796238c6cc8 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 2 May 2017 17:46:47 +0300 Subject: [PATCH] Better TOC generation --- docs/toc.md | 10 +- scripts/README.md | 6 +- scripts/generate_toc.sh | 27 ++--- scripts/gh-md-toc | 185 -------------------------------- scripts/html_to_markdown_toc.py | 31 ++++++ 5 files changed, 55 insertions(+), 204 deletions(-) delete mode 100755 scripts/gh-md-toc create mode 100644 scripts/html_to_markdown_toc.py diff --git a/docs/toc.md b/docs/toc.md index 1848631..b339fd3 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -1,5 +1,4 @@ -Table of Contents -================= +# Table of Contents * [Overview](index.md#overview) * [Installation](index.md#installation) @@ -29,8 +28,10 @@ Table of Contents * [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields) * [Table Engines](table_engines.md#table-engines) - * [Buffer Models](table_engines.md#buffer-models) - * [Data Replication](table_engines.md#data-replication) + * [Simple Engines](table_engines.md#simple-engines) + * [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family) + * [Data Replication](table_engines.md#data-replication) + * [Buffer Engine](table_engines.md#buffer-engine) * [Schema Migrations](schema_migrations.md#schema-migrations) * [Writing Migrations](schema_migrations.md#writing-migrations) @@ -40,3 +41,4 @@ Table of Contents * [Partitions and Parts](system_models.md#partitions-and-parts) * [Contributing](contributing.md#contributing) + diff --git a/scripts/README.md b/scripts/README.md index f3e4096..5782dc9 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -2,14 +2,14 @@ This directory contains various scripts for use while developing. generate_toc ------------ -Generates the table of contents (toc.md) +Generates the table of contents (toc.md). Requires Pandoc. Usage: cd docs ../scripts/generate_toc.sh -gh-md-toc ---------- +html_to_markdown_toc.py +----------------------- Used by generate_toc. diff --git a/scripts/generate_toc.sh b/scripts/generate_toc.sh index 7b57403..1b1f1ed 100755 --- a/scripts/generate_toc.sh +++ b/scripts/generate_toc.sh @@ -1,13 +1,16 @@ -echo "Table of Contents" > toc.md -echo "=================" >> toc.md -../scripts/gh-md-toc \ - index.md \ - models_and_databases.md \ - querysets.md \ - field_types.md \ - table_engines.md \ - schema_migrations.md \ - system_models.md \ - contributing.md \ - >> toc.md +generate_one() { + # Converts Markdown to HTML using Pandoc, and then extracts the header tags + pandoc "$1" | python "../scripts/html_to_markdown_toc.py" "$1" >> toc.md +} + +printf "# Table of Contents\n\n" > toc.md + +generate_one "index.md" +generate_one "models_and_databases.md" +generate_one "querysets.md" +generate_one "field_types.md" +generate_one "table_engines.md" +generate_one "schema_migrations.md" +generate_one "system_models.md" +generate_one "contributing.md" diff --git a/scripts/gh-md-toc b/scripts/gh-md-toc deleted file mode 100755 index 158bc5f..0000000 --- a/scripts/gh-md-toc +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env bash - -# -# Source: https://github.com/ekalinin/github-markdown-toc -# -# Steps: -# -# 1. Download corresponding html file for some README.md: -# curl -s $1 -# -# 2. Discard rows where no substring 'user-content-' (github's markup): -# awk '/user-content-/ { ... -# -# 3.1 Get last number in each row like ' ... sitemap.js.*<\/h/)+2, RLENGTH-5) -# -# 5. Find anchor and insert it inside "(...)": -# substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8) -# - -gh_toc_version="0.4.8" - -gh_user_agent="gh-md-toc v$gh_toc_version" - -# -# Download rendered into html README.md by its url. -# -# -gh_toc_load() { - local gh_url=$1 - - if type curl &>/dev/null; then - curl --user-agent "$gh_user_agent" -s "$gh_url" - elif type wget &>/dev/null; then - wget --user-agent="$gh_user_agent" -qO- "$gh_url" - else - echo "Please, install 'curl' or 'wget' and try again." - exit 1 - fi -} - -# -# Converts local md file into html by GitHub -# -# ➥ curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown -#

Hello world github/linguist#1 cool, and #1!

'" -gh_toc_md2html() { - local gh_file_md=$1 - curl -s --user-agent "$gh_user_agent" \ - --data-binary @"$gh_file_md" -H "Content-Type:text/plain" \ - https://api.github.com/markdown/raw -} - -# -# Is passed string url -# -gh_is_url() { - if [[ $1 == https* || $1 == http* ]]; then - echo "yes" - else - echo "no" - fi -} - -# -# TOC generator -# -gh_toc(){ - local gh_src=$1 - local gh_src_copy=$1 - local gh_ttl_docs=$2 - - if [ "$gh_src" = "" ]; then - echo "Please, enter URL or local path for a README.md" - exit 1 - fi - - - # Show "TOC" string only if working with one document - if [ "$gh_ttl_docs" = "1" ]; then - - echo "Table of Contents" - echo "=================" - echo "" - gh_src_copy="" - - fi - - if [ "$(gh_is_url "$gh_src")" == "yes" ]; then - gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy" - else - gh_toc_md2html "$gh_src" | gh_toc_grab "$gh_src_copy" - fi -} - -# -# Grabber of the TOC from rendered html -# -# $1 — a source url of document. -# It's need if TOC is generated for multiple documents. -# -gh_toc_grab() { - # if closed is on the new line, then move it on the prev line - # for example: - # was: The command foo1 - # - # became: The command foo1 - sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' | - # find strings that corresponds to template - grep -E -o '//' | sed 's/<\/code>//' | - # now all rows are like: - # ... .*<\/h/)+2, RLENGTH-5)"](" gh_url substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8) ")"}' | sed 'y/+/ /; s/%/\\x/g')" -} - -# -# Returns filename only from full path or url -# -gh_toc_get_filename() { - echo "${1##*/}" -} - -# -# Options hendlers -# -gh_toc_app() { - local app_name="gh-md-toc" - - if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then - echo "GitHub TOC generator ($app_name): $gh_toc_version" - echo "" - echo "Usage:" - echo " $app_name src [src] Create TOC for a README file (url or local path)" - echo " $app_name - Create TOC for markdown from STDIN" - echo " $app_name --help Show help" - echo " $app_name --version Show version" - return - fi - - if [ "$1" = '--version' ]; then - echo "$gh_toc_version" - return - fi - - if [ "$1" = "-" ]; then - if [ -z "$TMPDIR" ]; then - TMPDIR="/tmp" - elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then - mkdir -p "$TMPDIR" - fi - local gh_tmp_md - gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX) - while read input; do - echo "$input" >> "$gh_tmp_md" - done - gh_toc_md2html "$gh_tmp_md" | gh_toc_grab "" - return - fi - - for md in "$@" - do - echo "" - gh_toc "$md" "$#" - done - - #echo "" - #echo "Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc)" -} - -# -# Entry point -# -gh_toc_app "$@" \ No newline at end of file diff --git a/scripts/html_to_markdown_toc.py b/scripts/html_to_markdown_toc.py new file mode 100644 index 0000000..494f32a --- /dev/null +++ b/scripts/html_to_markdown_toc.py @@ -0,0 +1,31 @@ +from HTMLParser import HTMLParser +import sys + + +HEADER_TAGS = ('h1', 'h2', 'h3') + + +class HeadersToMarkdownParser(HTMLParser): + + inside = None + text = '' + + def handle_starttag(self, tag, attrs): + if tag.lower() in HEADER_TAGS: + self.inside = tag + + def handle_endtag(self, tag): + if tag.lower() in HEADER_TAGS: + indent = ' ' * int(self.inside[1]) + fragment = self.text.lower().replace(' ', '-') + print '%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment) + self.inside = None + self.text = '' + + def handle_data(self, data): + if self.inside: + self.text += data + + +HeadersToMarkdownParser.feed(sys.stdin.read()) +print