mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
8a683a4494
|
@ -5,6 +5,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
|||
from .tag_map import TAG_MAP
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lemmatizer import LOOKUP
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
from ..norm_exceptions import BASE_NORMS
|
||||
|
@ -22,6 +23,7 @@ class SpanishDefaults(Language.Defaults):
|
|||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||
tag_map = dict(TAG_MAP)
|
||||
stop_words = set(STOP_WORDS)
|
||||
sytax_iterators = dict(SYNTAX_ITERATORS)
|
||||
|
||||
@classmethod
|
||||
def create_lemmatizer(cls, nlp=None):
|
||||
|
|
|
@ -478,7 +478,7 @@ def print_table(data, title=None):
|
|||
if isinstance(data, dict):
|
||||
data = list(data.items())
|
||||
tpl_row = ' {:<15}' * len(data[0])
|
||||
table = '\n'.join([tpl_row.format(l, v) for l, v in data])
|
||||
table = '\n'.join([tpl_row.format(l, unicode_(v)) for l, v in data])
|
||||
if title:
|
||||
print('\n \033[93m{}\033[0m'.format(title))
|
||||
print('\n{}\n'.format(table))
|
||||
|
@ -491,11 +491,12 @@ def print_markdown(data, title=None):
|
|||
title (unicode or None): Title, will be rendered as headline 2.
|
||||
"""
|
||||
def excl_value(value):
|
||||
return Path(value).exists() # contains path (personal info)
|
||||
# contains path, i.e. personal info
|
||||
return isinstance(value, basestring_) and Path(value).exists()
|
||||
|
||||
if isinstance(data, dict):
|
||||
data = list(data.items())
|
||||
markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)]
|
||||
markdown = ["* **{}:** {}".format(l, unicode_(v)) for l, v in data if not excl_value(v)]
|
||||
if title:
|
||||
print("\n## {}".format(title))
|
||||
print('\n{}\n'.format('\n'.join(markdown)))
|
||||
|
|
|
@ -78,6 +78,14 @@ p
|
|||
| #[code like_num], which includes language-specific words like "ten"
|
||||
| or "hundred".
|
||||
|
||||
+row
|
||||
+cell #[strong Syntax iterators]
|
||||
| #[+src(gh("spaCy", "spacy/lang/en/syntax_iterators.py")) syntax_iterators.py]
|
||||
+cell
|
||||
| Functions that compute views of a #[code Doc] object based on its
|
||||
| syntax. At the moment, only used for
|
||||
| #[+a("/docs/usage/dependency-parse#noun-chunks") noun chunks].
|
||||
|
||||
+row
|
||||
+cell #[strong Lemmatizer]
|
||||
| #[+src(gh("spacy-dev-resources", "templates/new_language/lemmatizer.py")) lemmatizer.py]
|
||||
|
|
Loading…
Reference in New Issue
Block a user