mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-06 22:23:06 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
8a683a4494
|
@ -5,6 +5,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||||
from .tag_map import TAG_MAP
|
from .tag_map import TAG_MAP
|
||||||
from .stop_words import STOP_WORDS
|
from .stop_words import STOP_WORDS
|
||||||
from .lemmatizer import LOOKUP
|
from .lemmatizer import LOOKUP
|
||||||
|
from .syntax_iterators import SYNTAX_ITERATORS
|
||||||
|
|
||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
from ..norm_exceptions import BASE_NORMS
|
from ..norm_exceptions import BASE_NORMS
|
||||||
|
@ -22,6 +23,7 @@ class SpanishDefaults(Language.Defaults):
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
tag_map = dict(TAG_MAP)
|
tag_map = dict(TAG_MAP)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = set(STOP_WORDS)
|
||||||
|
sytax_iterators = dict(SYNTAX_ITERATORS)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_lemmatizer(cls, nlp=None):
|
def create_lemmatizer(cls, nlp=None):
|
||||||
|
|
|
@ -478,7 +478,7 @@ def print_table(data, title=None):
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
data = list(data.items())
|
data = list(data.items())
|
||||||
tpl_row = ' {:<15}' * len(data[0])
|
tpl_row = ' {:<15}' * len(data[0])
|
||||||
table = '\n'.join([tpl_row.format(l, v) for l, v in data])
|
table = '\n'.join([tpl_row.format(l, unicode_(v)) for l, v in data])
|
||||||
if title:
|
if title:
|
||||||
print('\n \033[93m{}\033[0m'.format(title))
|
print('\n \033[93m{}\033[0m'.format(title))
|
||||||
print('\n{}\n'.format(table))
|
print('\n{}\n'.format(table))
|
||||||
|
@ -491,11 +491,12 @@ def print_markdown(data, title=None):
|
||||||
title (unicode or None): Title, will be rendered as headline 2.
|
title (unicode or None): Title, will be rendered as headline 2.
|
||||||
"""
|
"""
|
||||||
def excl_value(value):
|
def excl_value(value):
|
||||||
return Path(value).exists() # contains path (personal info)
|
# contains path, i.e. personal info
|
||||||
|
return isinstance(value, basestring_) and Path(value).exists()
|
||||||
|
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
data = list(data.items())
|
data = list(data.items())
|
||||||
markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)]
|
markdown = ["* **{}:** {}".format(l, unicode_(v)) for l, v in data if not excl_value(v)]
|
||||||
if title:
|
if title:
|
||||||
print("\n## {}".format(title))
|
print("\n## {}".format(title))
|
||||||
print('\n{}\n'.format('\n'.join(markdown)))
|
print('\n{}\n'.format('\n'.join(markdown)))
|
||||||
|
|
|
@ -78,6 +78,14 @@ p
|
||||||
| #[code like_num], which includes language-specific words like "ten"
|
| #[code like_num], which includes language-specific words like "ten"
|
||||||
| or "hundred".
|
| or "hundred".
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[strong Syntax iterators]
|
||||||
|
| #[+src(gh("spaCy", "spacy/lang/en/syntax_iterators.py")) syntax_iterators.py]
|
||||||
|
+cell
|
||||||
|
| Functions that compute views of a #[code Doc] object based on its
|
||||||
|
| syntax. At the moment, only used for
|
||||||
|
| #[+a("/docs/usage/dependency-parse#noun-chunks") noun chunks].
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[strong Lemmatizer]
|
+cell #[strong Lemmatizer]
|
||||||
| #[+src(gh("spacy-dev-resources", "templates/new_language/lemmatizer.py")) lemmatizer.py]
|
| #[+src(gh("spacy-dev-resources", "templates/new_language/lemmatizer.py")) lemmatizer.py]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user