mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 13:11:03 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			77 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			77 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python
 | |
| from __future__ import unicode_literals
 | |
| 
 | |
| import os
 | |
| import ast
 | |
| import io
 | |
| import re
 | |
| 
 | |
| import plac
 | |
| 
 | |
| # cgi.escape is deprecated since py32
 | |
| try:
 | |
|     from html import escape
 | |
| except ImportError:
 | |
|     from cgi import escape
 | |
| 
 | |
| 
 | |
| # e.g. python website/create_code_samples tests/website/ website/src/
 | |
| def main(src_dirname, dst_dirname):
 | |
|     prefix = "test_"
 | |
|     
 | |
|     for filename in os.listdir(src_dirname):
 | |
|         if not filename.startswith('test_'):
 | |
|             continue
 | |
|         if not filename.endswith('.py'):
 | |
|             continue
 | |
|     
 | |
|         # Remove test_ prefix and .py suffix
 | |
|         name = filename[6:-3]
 | |
|         with io.open(os.path.join(src_dirname, filename), 'r', encoding='utf8') as file_:
 | |
|             source = file_.readlines()
 | |
|         tree = ast.parse("".join(source))
 | |
|     
 | |
|         for root in tree.body:
 | |
|             if isinstance(root, ast.FunctionDef) and root.name.startswith(prefix):
 | |
|     
 | |
|                 # only ast.expr and ast.stmt have line numbers, see:
 | |
|                 # https://docs.python.org/2/library/ast.html#ast.AST.lineno
 | |
|                 line_numbers = []
 | |
|     
 | |
|                 for node in ast.walk(root):
 | |
|                     if hasattr(node, "lineno"):
 | |
|                         line_numbers.append(node.lineno)
 | |
|     
 | |
|                 body = source[min(line_numbers)-1:max(line_numbers)]
 | |
|                 while not body[0][0].isspace():
 | |
|                     body = body[1:]
 | |
|     
 | |
|                 # make sure we are inside an indented function body
 | |
|                 assert all([l[0].isspace() for l in body])
 | |
|     
 | |
|                 offset = 0
 | |
|                 for line in body:
 | |
|                     match = re.search(r"[^\s]", line)
 | |
|                     if match:
 | |
|                         offset = match.start(0)
 | |
|                         break
 | |
|     
 | |
|                 # remove indentation
 | |
|                 assert offset > 0
 | |
|     
 | |
|                 for i in range(len(body)):
 | |
|                     body[i] = body[i][offset:] if len(body[i]) > offset else "\n"
 | |
|     
 | |
|                 # make sure empty lines contain a newline
 | |
|                 assert all([l[-1] == "\n" for l in body])
 | |
|     
 | |
|                 code_filename = "%s.%s" % (name, root.name[len(prefix):])
 | |
|     
 | |
|                 with io.open(os.path.join(dst_dirname, code_filename),
 | |
|                              "w", encoding='utf8') as f:
 | |
|                     f.write(escape("".join(body)))
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     plac.call(main)
 |