mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
		
						commit
						8a683a4494
					
				| 
						 | 
					@ -5,6 +5,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 | 
				
			||||||
from .tag_map import TAG_MAP
 | 
					from .tag_map import TAG_MAP
 | 
				
			||||||
from .stop_words import STOP_WORDS
 | 
					from .stop_words import STOP_WORDS
 | 
				
			||||||
from .lemmatizer import LOOKUP
 | 
					from .lemmatizer import LOOKUP
 | 
				
			||||||
 | 
					from .syntax_iterators import SYNTAX_ITERATORS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
 | 
					from ..tokenizer_exceptions import BASE_EXCEPTIONS
 | 
				
			||||||
from ..norm_exceptions import BASE_NORMS
 | 
					from ..norm_exceptions import BASE_NORMS
 | 
				
			||||||
| 
						 | 
					@ -22,6 +23,7 @@ class SpanishDefaults(Language.Defaults):
 | 
				
			||||||
    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
 | 
					    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
 | 
				
			||||||
    tag_map = dict(TAG_MAP)
 | 
					    tag_map = dict(TAG_MAP)
 | 
				
			||||||
    stop_words = set(STOP_WORDS)
 | 
					    stop_words = set(STOP_WORDS)
 | 
				
			||||||
 | 
					    sytax_iterators = dict(SYNTAX_ITERATORS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def create_lemmatizer(cls, nlp=None):
 | 
					    def create_lemmatizer(cls, nlp=None):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -478,7 +478,7 @@ def print_table(data, title=None):
 | 
				
			||||||
    if isinstance(data, dict):
 | 
					    if isinstance(data, dict):
 | 
				
			||||||
        data = list(data.items())
 | 
					        data = list(data.items())
 | 
				
			||||||
    tpl_row = '    {:<15}' * len(data[0])
 | 
					    tpl_row = '    {:<15}' * len(data[0])
 | 
				
			||||||
    table = '\n'.join([tpl_row.format(l, v) for l, v in data])
 | 
					    table = '\n'.join([tpl_row.format(l, unicode_(v)) for l, v in data])
 | 
				
			||||||
    if title:
 | 
					    if title:
 | 
				
			||||||
        print('\n    \033[93m{}\033[0m'.format(title))
 | 
					        print('\n    \033[93m{}\033[0m'.format(title))
 | 
				
			||||||
    print('\n{}\n'.format(table))
 | 
					    print('\n{}\n'.format(table))
 | 
				
			||||||
| 
						 | 
					@ -491,11 +491,12 @@ def print_markdown(data, title=None):
 | 
				
			||||||
    title (unicode or None): Title, will be rendered as headline 2.
 | 
					    title (unicode or None): Title, will be rendered as headline 2.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    def excl_value(value):
 | 
					    def excl_value(value):
 | 
				
			||||||
        return Path(value).exists() # contains path (personal info)
 | 
					        # contains path, i.e. personal info
 | 
				
			||||||
 | 
					        return isinstance(value, basestring_) and Path(value).exists()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if isinstance(data, dict):
 | 
					    if isinstance(data, dict):
 | 
				
			||||||
        data = list(data.items())
 | 
					        data = list(data.items())
 | 
				
			||||||
    markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)]
 | 
					    markdown = ["* **{}:** {}".format(l, unicode_(v)) for l, v in data if not excl_value(v)]
 | 
				
			||||||
    if title:
 | 
					    if title:
 | 
				
			||||||
        print("\n## {}".format(title))
 | 
					        print("\n## {}".format(title))
 | 
				
			||||||
    print('\n{}\n'.format('\n'.join(markdown)))
 | 
					    print('\n{}\n'.format('\n'.join(markdown)))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -78,6 +78,14 @@ p
 | 
				
			||||||
            |  #[code like_num], which includes language-specific words like "ten"
 | 
					            |  #[code like_num], which includes language-specific words like "ten"
 | 
				
			||||||
            |  or "hundred".
 | 
					            |  or "hundred".
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    +row
 | 
				
			||||||
 | 
					        +cell #[strong Syntax iterators]
 | 
				
			||||||
 | 
					            |  #[+src(gh("spaCy", "spacy/lang/en/syntax_iterators.py")) syntax_iterators.py]
 | 
				
			||||||
 | 
					        +cell
 | 
				
			||||||
 | 
					            |  Functions that compute views of a #[code Doc] object based on its
 | 
				
			||||||
 | 
					            |  syntax. At the moment, only used for
 | 
				
			||||||
 | 
					            |  #[+a("/docs/usage/dependency-parse#noun-chunks") noun chunks].
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    +row
 | 
					    +row
 | 
				
			||||||
        +cell #[strong Lemmatizer]
 | 
					        +cell #[strong Lemmatizer]
 | 
				
			||||||
            |  #[+src(gh("spacy-dev-resources", "templates/new_language/lemmatizer.py")) lemmatizer.py]
 | 
					            |  #[+src(gh("spacy-dev-resources", "templates/new_language/lemmatizer.py")) lemmatizer.py]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user