diff --git a/spacy/lang/es/__init__.py b/spacy/lang/es/__init__.py index e20338b39..1e7f55be8 100644 --- a/spacy/lang/es/__init__.py +++ b/spacy/lang/es/__init__.py @@ -5,6 +5,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .tag_map import TAG_MAP from .stop_words import STOP_WORDS from .lemmatizer import LOOKUP +from .syntax_iterators import SYNTAX_ITERATORS from ..tokenizer_exceptions import BASE_EXCEPTIONS from ..norm_exceptions import BASE_NORMS @@ -22,6 +23,7 @@ class SpanishDefaults(Language.Defaults): tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tag_map = dict(TAG_MAP) stop_words = set(STOP_WORDS) + sytax_iterators = dict(SYNTAX_ITERATORS) @classmethod def create_lemmatizer(cls, nlp=None): diff --git a/spacy/util.py b/spacy/util.py index 9216edee8..cb1aec4c3 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -478,7 +478,7 @@ def print_table(data, title=None): if isinstance(data, dict): data = list(data.items()) tpl_row = ' {:<15}' * len(data[0]) - table = '\n'.join([tpl_row.format(l, v) for l, v in data]) + table = '\n'.join([tpl_row.format(l, unicode_(v)) for l, v in data]) if title: print('\n \033[93m{}\033[0m'.format(title)) print('\n{}\n'.format(table)) @@ -491,11 +491,12 @@ def print_markdown(data, title=None): title (unicode or None): Title, will be rendered as headline 2. """ def excl_value(value): - return Path(value).exists() # contains path (personal info) + # contains path, i.e. personal info + return isinstance(value, basestring_) and Path(value).exists() if isinstance(data, dict): data = list(data.items()) - markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)] + markdown = ["* **{}:** {}".format(l, unicode_(v)) for l, v in data if not excl_value(v)] if title: print("\n## {}".format(title)) print('\n{}\n'.format('\n'.join(markdown))) diff --git a/website/docs/usage/_spacy-101/_language-data.jade b/website/docs/usage/_spacy-101/_language-data.jade index c70bb5c7a..aaca10ebb 100644 --- a/website/docs/usage/_spacy-101/_language-data.jade +++ b/website/docs/usage/_spacy-101/_language-data.jade @@ -78,6 +78,14 @@ p | #[code like_num], which includes language-specific words like "ten" | or "hundred". + +row + +cell #[strong Syntax iterators] + | #[+src(gh("spaCy", "spacy/lang/en/syntax_iterators.py")) syntax_iterators.py] + +cell + | Functions that compute views of a #[code Doc] object based on its + | syntax. At the moment, only used for + | #[+a("/docs/usage/dependency-parse#noun-chunks") noun chunks]. + +row +cell #[strong Lemmatizer] | #[+src(gh("spacy-dev-resources", "templates/new_language/lemmatizer.py")) lemmatizer.py]