mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			66 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			66 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env python
 | 
						|
import sys
 | 
						|
import re
 | 
						|
import os
 | 
						|
import ast
 | 
						|
 | 
						|
# cgi.escape is deprecated since py32
 | 
						|
try:
 | 
						|
    from html import escape
 | 
						|
except ImportError:
 | 
						|
    from cgi import escape
 | 
						|
 | 
						|
 | 
						|
src_dirname = sys.argv[1]
 | 
						|
dst_dirname = sys.argv[2]
 | 
						|
prefix = "test_"
 | 
						|
 | 
						|
 | 
						|
for filename in os.listdir(src_dirname):
 | 
						|
    match = re.match(re.escape(prefix) + r"(.+)\.py$", filename)
 | 
						|
    if not match:
 | 
						|
        continue
 | 
						|
 | 
						|
    name = match.group(1)
 | 
						|
    source = open(os.path.join(src_dirname, filename)).readlines()
 | 
						|
    tree = ast.parse("".join(source))
 | 
						|
 | 
						|
    for root in tree.body:
 | 
						|
        if isinstance(root, ast.FunctionDef) and root.name.startswith(prefix):
 | 
						|
 | 
						|
            # only ast.expr and ast.stmt have line numbers, see:
 | 
						|
            # https://docs.python.org/2/library/ast.html#ast.AST.lineno
 | 
						|
            line_numbers = []
 | 
						|
 | 
						|
            for node in ast.walk(root):
 | 
						|
                if hasattr(node, "lineno"):
 | 
						|
                    line_numbers.append(node.lineno)
 | 
						|
 | 
						|
            body = source[min(line_numbers)-1:max(line_numbers)]
 | 
						|
            while not body[0][0].isspace():
 | 
						|
                body = body[1:]
 | 
						|
 | 
						|
            # make sure we are inside an indented function body
 | 
						|
            assert all([l[0].isspace() for l in body])
 | 
						|
 | 
						|
            offset = 0
 | 
						|
            for line in body:
 | 
						|
                match = re.search(r"[^\s]", line)
 | 
						|
                if match:
 | 
						|
                    offset = match.start(0)
 | 
						|
                    break
 | 
						|
 | 
						|
            # remove indentation
 | 
						|
            assert offset > 0
 | 
						|
 | 
						|
            for i in range(len(body)):
 | 
						|
                body[i] = body[i][offset:] if len(body[i]) > offset else "\n"
 | 
						|
 | 
						|
            # make sure empty lines contain a newline
 | 
						|
            assert all([l[-1] == "\n" for l in body])
 | 
						|
 | 
						|
            code_filename = "%s.%s" % (name, root.name[len(prefix):])
 | 
						|
 | 
						|
            with open(os.path.join(dst_dirname, code_filename), "w") as f:
 | 
						|
                f.write(escape("".join(body)))
 |