From d51579ffe6b53f64c9b4c5b29fc01478d6f1e61d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Mon, 19 Oct 2015 12:56:00 +1100
Subject: [PATCH] * Pedantic edits to website/create_code_samples. Make it use
 plac for interface, remove unnecessary regex, ensure unicode is handled
 correctly under Python 2.

---
 website/create_code_samples | 114 ++++++++++++++++++++----------------
 1 file changed, 62 insertions(+), 52 deletions(-)

diff --git a/website/create_code_samples b/website/create_code_samples
index 5eb1b80c4..659a3d71c 100755
--- a/website/create_code_samples
+++ b/website/create_code_samples
@@ -1,8 +1,11 @@
 #!/usr/bin/env python
-import sys
-import re
+from __future__ import unicode_literals
+
 import os
 import ast
+import io
+
+import plac
 
 # cgi.escape is deprecated since py32
 try:
@@ -11,55 +14,62 @@ except ImportError:
     from cgi import escape
 
 
-src_dirname = sys.argv[1]
-dst_dirname = sys.argv[2]
-prefix = "test_"
+# e.g. python website/create_code_samples tests/website/ website/src/
+def main(src_dirname, dst_dirname):
+    prefix = "test_"
+    
+    for filename in os.listdir(src_dirname):
+        if not filename.startswith('test_'):
+            continue
+        if not filename.endswith('.py'):
+            continue
+    
+        # Remove test_ prefix and .py suffix
+        name = filename[6:-3]
+        with io.open(os.path.join(src_dirname, filename), 'r', encoding='utf8') as file_:
+            source = file_.readlines()
+        tree = ast.parse("".join(source))
+    
+        for root in tree.body:
+            if isinstance(root, ast.FunctionDef) and root.name.startswith(prefix):
+    
+                # only ast.expr and ast.stmt have line numbers, see:
+                # https://docs.python.org/2/library/ast.html#ast.AST.lineno
+                line_numbers = []
+    
+                for node in ast.walk(root):
+                    if hasattr(node, "lineno"):
+                        line_numbers.append(node.lineno)
+    
+                body = source[min(line_numbers)-1:max(line_numbers)]
+                while not body[0][0].isspace():
+                    body = body[1:]
+    
+                # make sure we are inside an indented function body
+                assert all([l[0].isspace() for l in body])
+    
+                offset = 0
+                for line in body:
+                    match = re.search(r"[^\s]", line)
+                    if match:
+                        offset = match.start(0)
+                        break
+    
+                # remove indentation
+                assert offset > 0
+    
+                for i in range(len(body)):
+                    body[i] = body[i][offset:] if len(body[i]) > offset else "\n"
+    
+                # make sure empty lines contain a newline
+                assert all([l[-1] == "\n" for l in body])
+    
+                code_filename = "%s.%s" % (name, root.name[len(prefix):])
+    
+                with io.open(os.path.join(dst_dirname, code_filename),
+                             "w", encoding='utf8') as f:
+                    f.write(escape("".join(body)))
 
 
-for filename in os.listdir(src_dirname):
-    match = re.match(re.escape(prefix) + r"(.+)\.py$", filename)
-    if not match:
-        continue
-
-    name = match.group(1)
-    source = open(os.path.join(src_dirname, filename)).readlines()
-    tree = ast.parse("".join(source))
-
-    for root in tree.body:
-        if isinstance(root, ast.FunctionDef) and root.name.startswith(prefix):
-
-            # only ast.expr and ast.stmt have line numbers, see:
-            # https://docs.python.org/2/library/ast.html#ast.AST.lineno
-            line_numbers = []
-
-            for node in ast.walk(root):
-                if hasattr(node, "lineno"):
-                    line_numbers.append(node.lineno)
-
-            body = source[min(line_numbers)-1:max(line_numbers)]
-            while not body[0][0].isspace():
-                body = body[1:]
-
-            # make sure we are inside an indented function body
-            assert all([l[0].isspace() for l in body])
-
-            offset = 0
-            for line in body:
-                match = re.search(r"[^\s]", line)
-                if match:
-                    offset = match.start(0)
-                    break
-
-            # remove indentation
-            assert offset > 0
-
-            for i in range(len(body)):
-                body[i] = body[i][offset:] if len(body[i]) > offset else "\n"
-
-            # make sure empty lines contain a newline
-            assert all([l[-1] == "\n" for l in body])
-
-            code_filename = "%s.%s" % (name, root.name[len(prefix):])
-
-            with open(os.path.join(dst_dirname, code_filename), "w") as f:
-                f.write(escape("".join(body)))
+if __name__ == '__main__':
+    plac.call(main)