diff --git a/docs/toc.md b/docs/toc.md index 1848631..b339fd3 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -1,5 +1,4 @@ -Table of Contents -================= +# Table of Contents * [Overview](index.md#overview) * [Installation](index.md#installation) @@ -29,8 +28,10 @@ Table of Contents * [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields) * [Table Engines](table_engines.md#table-engines) - * [Buffer Models](table_engines.md#buffer-models) - * [Data Replication](table_engines.md#data-replication) + * [Simple Engines](table_engines.md#simple-engines) + * [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family) + * [Data Replication](table_engines.md#data-replication) + * [Buffer Engine](table_engines.md#buffer-engine) * [Schema Migrations](schema_migrations.md#schema-migrations) * [Writing Migrations](schema_migrations.md#writing-migrations) @@ -40,3 +41,4 @@ Table of Contents * [Partitions and Parts](system_models.md#partitions-and-parts) * [Contributing](contributing.md#contributing) + diff --git a/scripts/README.md b/scripts/README.md index f3e4096..5782dc9 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -2,14 +2,14 @@ This directory contains various scripts for use while developing. generate_toc ------------ -Generates the table of contents (toc.md) +Generates the table of contents (toc.md). Requires Pandoc. Usage: cd docs ../scripts/generate_toc.sh -gh-md-toc ---------- +html_to_markdown_toc.py +----------------------- Used by generate_toc. diff --git a/scripts/generate_toc.sh b/scripts/generate_toc.sh index 7b57403..1b1f1ed 100755 --- a/scripts/generate_toc.sh +++ b/scripts/generate_toc.sh @@ -1,13 +1,16 @@ -echo "Table of Contents" > toc.md -echo "=================" >> toc.md -../scripts/gh-md-toc \ - index.md \ - models_and_databases.md \ - querysets.md \ - field_types.md \ - table_engines.md \ - schema_migrations.md \ - system_models.md \ - contributing.md \ - >> toc.md +generate_one() { + # Converts Markdown to HTML using Pandoc, and then extracts the header tags + pandoc "$1" | python "../scripts/html_to_markdown_toc.py" "$1" >> toc.md +} + +printf "# Table of Contents\n\n" > toc.md + +generate_one "index.md" +generate_one "models_and_databases.md" +generate_one "querysets.md" +generate_one "field_types.md" +generate_one "table_engines.md" +generate_one "schema_migrations.md" +generate_one "system_models.md" +generate_one "contributing.md" diff --git a/scripts/gh-md-toc b/scripts/gh-md-toc deleted file mode 100755 index 158bc5f..0000000 --- a/scripts/gh-md-toc +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env bash - -# -# Source: https://github.com/ekalinin/github-markdown-toc -# -# Steps: -# -# 1. Download corresponding html file for some README.md: -# curl -s $1 -# -# 2. Discard rows where no substring 'user-content-' (github's markup): -# awk '/user-content-/ { ... -# -# 3.1 Get last number in each row like ' ... sitemap.js.*<\/h/)+2, RLENGTH-5) -# -# 5. Find anchor and insert it inside "(...)": -# substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8) -# - -gh_toc_version="0.4.8" - -gh_user_agent="gh-md-toc v$gh_toc_version" - -# -# Download rendered into html README.md by its url. -# -# -gh_toc_load() { - local gh_url=$1 - - if type curl &>/dev/null; then - curl --user-agent "$gh_user_agent" -s "$gh_url" - elif type wget &>/dev/null; then - wget --user-agent="$gh_user_agent" -qO- "$gh_url" - else - echo "Please, install 'curl' or 'wget' and try again." - exit 1 - fi -} - -# -# Converts local md file into html by GitHub -# -# ➥ curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown -#
Hello world github/linguist#1 cool, and #1!
'" -gh_toc_md2html() { - local gh_file_md=$1 - curl -s --user-agent "$gh_user_agent" \ - --data-binary @"$gh_file_md" -H "Content-Type:text/plain" \ - https://api.github.com/markdown/raw -} - -# -# Is passed string url -# -gh_is_url() { - if [[ $1 == https* || $1 == http* ]]; then - echo "yes" - else - echo "no" - fi -} - -# -# TOC generator -# -gh_toc(){ - local gh_src=$1 - local gh_src_copy=$1 - local gh_ttl_docs=$2 - - if [ "$gh_src" = "" ]; then - echo "Please, enter URL or local path for a README.md" - exit 1 - fi - - - # Show "TOC" string only if working with one document - if [ "$gh_ttl_docs" = "1" ]; then - - echo "Table of Contents" - echo "=================" - echo "" - gh_src_copy="" - - fi - - if [ "$(gh_is_url "$gh_src")" == "yes" ]; then - gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy" - else - gh_toc_md2html "$gh_src" | gh_toc_grab "$gh_src_copy" - fi -} - -# -# Grabber of the TOC from rendered html -# -# $1 — a source url of document. -# It's need if TOC is generated for multiple documents. -# -gh_toc_grab() { - # if closedfoo1
- #
- # became: The command foo1
- sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' |
- # find strings that corresponds to template
- grep -E -o '//' | sed 's/<\/code>//' |
- # now all rows are like:
- # ... .*<\/h/)+2, RLENGTH-5)"](" gh_url substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8) ")"}' | sed 'y/+/ /; s/%/\\x/g')"
-}
-
-#
-# Returns filename only from full path or url
-#
-gh_toc_get_filename() {
- echo "${1##*/}"
-}
-
-#
-# Options hendlers
-#
-gh_toc_app() {
- local app_name="gh-md-toc"
-
- if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then
- echo "GitHub TOC generator ($app_name): $gh_toc_version"
- echo ""
- echo "Usage:"
- echo " $app_name src [src] Create TOC for a README file (url or local path)"
- echo " $app_name - Create TOC for markdown from STDIN"
- echo " $app_name --help Show help"
- echo " $app_name --version Show version"
- return
- fi
-
- if [ "$1" = '--version' ]; then
- echo "$gh_toc_version"
- return
- fi
-
- if [ "$1" = "-" ]; then
- if [ -z "$TMPDIR" ]; then
- TMPDIR="/tmp"
- elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then
- mkdir -p "$TMPDIR"
- fi
- local gh_tmp_md
- gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX)
- while read input; do
- echo "$input" >> "$gh_tmp_md"
- done
- gh_toc_md2html "$gh_tmp_md" | gh_toc_grab ""
- return
- fi
-
- for md in "$@"
- do
- echo ""
- gh_toc "$md" "$#"
- done
-
- #echo ""
- #echo "Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc)"
-}
-
-#
-# Entry point
-#
-gh_toc_app "$@"
\ No newline at end of file
diff --git a/scripts/html_to_markdown_toc.py b/scripts/html_to_markdown_toc.py
new file mode 100644
index 0000000..494f32a
--- /dev/null
+++ b/scripts/html_to_markdown_toc.py
@@ -0,0 +1,31 @@
+from HTMLParser import HTMLParser
+import sys
+
+
+HEADER_TAGS = ('h1', 'h2', 'h3')
+
+
+class HeadersToMarkdownParser(HTMLParser):
+
+ inside = None
+ text = ''
+
+ def handle_starttag(self, tag, attrs):
+ if tag.lower() in HEADER_TAGS:
+ self.inside = tag
+
+ def handle_endtag(self, tag):
+ if tag.lower() in HEADER_TAGS:
+ indent = ' ' * int(self.inside[1])
+ fragment = self.text.lower().replace(' ', '-')
+ print '%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment)
+ self.inside = None
+ self.text = ''
+
+ def handle_data(self, data):
+ if self.inside:
+ self.text += data
+
+
+HeadersToMarkdownParser.feed(sys.stdin.read())
+print