From d51579ffe6b53f64c9b4c5b29fc01478d6f1e61d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 19 Oct 2015 12:56:00 +1100 Subject: [PATCH] * Pedantic edits to website/create_code_samples. Make it use plac for interface, remove unnecessary regex, ensure unicode is handled correctly under Python 2. --- website/create_code_samples | 114 ++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 52 deletions(-) diff --git a/website/create_code_samples b/website/create_code_samples index 5eb1b80c4..659a3d71c 100755 --- a/website/create_code_samples +++ b/website/create_code_samples @@ -1,8 +1,11 @@ #!/usr/bin/env python -import sys -import re +from __future__ import unicode_literals + import os import ast +import io + +import plac # cgi.escape is deprecated since py32 try: @@ -11,55 +14,62 @@ except ImportError: from cgi import escape -src_dirname = sys.argv[1] -dst_dirname = sys.argv[2] -prefix = "test_" +# e.g. python website/create_code_samples tests/website/ website/src/ +def main(src_dirname, dst_dirname): + prefix = "test_" + + for filename in os.listdir(src_dirname): + if not filename.startswith('test_'): + continue + if not filename.endswith('.py'): + continue + + # Remove test_ prefix and .py suffix + name = filename[6:-3] + with io.open(os.path.join(src_dirname, filename), 'r', encoding='utf8') as file_: + source = file_.readlines() + tree = ast.parse("".join(source)) + + for root in tree.body: + if isinstance(root, ast.FunctionDef) and root.name.startswith(prefix): + + # only ast.expr and ast.stmt have line numbers, see: + # https://docs.python.org/2/library/ast.html#ast.AST.lineno + line_numbers = [] + + for node in ast.walk(root): + if hasattr(node, "lineno"): + line_numbers.append(node.lineno) + + body = source[min(line_numbers)-1:max(line_numbers)] + while not body[0][0].isspace(): + body = body[1:] + + # make sure we are inside an indented function body + assert all([l[0].isspace() for l in body]) + + offset = 0 + for line in body: + match = re.search(r"[^\s]", line) + if match: + offset = match.start(0) + break + + # remove indentation + assert offset > 0 + + for i in range(len(body)): + body[i] = body[i][offset:] if len(body[i]) > offset else "\n" + + # make sure empty lines contain a newline + assert all([l[-1] == "\n" for l in body]) + + code_filename = "%s.%s" % (name, root.name[len(prefix):]) + + with io.open(os.path.join(dst_dirname, code_filename), + "w", encoding='utf8') as f: + f.write(escape("".join(body))) -for filename in os.listdir(src_dirname): - match = re.match(re.escape(prefix) + r"(.+)\.py$", filename) - if not match: - continue - - name = match.group(1) - source = open(os.path.join(src_dirname, filename)).readlines() - tree = ast.parse("".join(source)) - - for root in tree.body: - if isinstance(root, ast.FunctionDef) and root.name.startswith(prefix): - - # only ast.expr and ast.stmt have line numbers, see: - # https://docs.python.org/2/library/ast.html#ast.AST.lineno - line_numbers = [] - - for node in ast.walk(root): - if hasattr(node, "lineno"): - line_numbers.append(node.lineno) - - body = source[min(line_numbers)-1:max(line_numbers)] - while not body[0][0].isspace(): - body = body[1:] - - # make sure we are inside an indented function body - assert all([l[0].isspace() for l in body]) - - offset = 0 - for line in body: - match = re.search(r"[^\s]", line) - if match: - offset = match.start(0) - break - - # remove indentation - assert offset > 0 - - for i in range(len(body)): - body[i] = body[i][offset:] if len(body[i]) > offset else "\n" - - # make sure empty lines contain a newline - assert all([l[-1] == "\n" for l in body]) - - code_filename = "%s.%s" % (name, root.name[len(prefix):]) - - with open(os.path.join(dst_dirname, code_filename), "w") as f: - f.write(escape("".join(body))) +if __name__ == '__main__': + plac.call(main)