Merge branch 'master' of https://github.com/explosion/spaCy into feature/better-faster-matcher

This commit is contained in:
Matthew Honnibal 2018-02-22 21:47:10 +01:00
commit 331904fa9c
7 changed files with 1639 additions and 5 deletions

View File

@ -13,5 +13,5 @@ regex==2017.4.5
ftfy>=4.4.2,<5.0.0 ftfy>=4.4.2,<5.0.0
pytest>=3.0.6,<4.0.0 pytest>=3.0.6,<4.0.0
mock>=2.0.0,<3.0.0 mock>=2.0.0,<3.0.0
msgpack-python msgpack-python==0.5.4
msgpack-numpy==0.4.1 msgpack-numpy==0.4.1

View File

@ -197,7 +197,7 @@ def setup_package():
'requests>=2.13.0,<3.0.0', 'requests>=2.13.0,<3.0.0',
'regex==2017.4.5', 'regex==2017.4.5',
'ftfy>=4.4.2,<5.0.0', 'ftfy>=4.4.2,<5.0.0',
'msgpack-python', 'msgpack-python==0.5.4',
'msgpack-numpy==0.4.1'], 'msgpack-numpy==0.4.1'],
classifiers=[ classifiers=[
'Development Status :: 5 - Production/Stable', 'Development Status :: 5 - Production/Stable',

View File

@ -131,7 +131,7 @@ def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
'NumValue', 'PartType', 'Polite', 'StyleVariant', 'NumValue', 'PartType', 'Polite', 'StyleVariant',
'PronType', 'AdjType', 'Person', 'Variant', 'AdpType', 'PronType', 'AdjType', 'Person', 'Variant', 'AdpType',
'Reflex', 'Negative', 'Mood', 'Aspect', 'Case', 'Reflex', 'Negative', 'Mood', 'Aspect', 'Case',
'Polarity', 'Animacy' # U20 'Polarity', 'PrepCase', 'Animacy' # U20
] ]
for key in morph_keys: for key in morph_keys:
if key in stringy_attrs: if key in stringy_attrs:

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .tag_map import TAG_MAP
from .stop_words import STOP_WORDS from .stop_words import STOP_WORDS
from ..tokenizer_exceptions import BASE_EXCEPTIONS from ..tokenizer_exceptions import BASE_EXCEPTIONS
@ -17,6 +18,7 @@ class PolishDefaults(Language.Defaults):
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = STOP_WORDS stop_words = STOP_WORDS
tag_map = TAG_MAP
class Polish(Language): class Polish(Language):

1628
spacy/lang/pl/tag_map.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -47,7 +47,9 @@ cdef class Morphology:
cdef enum univ_morph_t: cdef enum univ_morph_t:
NIL = 0 NIL = 0
Animacy_anim = symbols.Animacy_anim Animacy_anim = symbols.Animacy_anim
Animacy_inam Animacy_inan
Animacy_hum
Animacy_nhum
Aspect_freq Aspect_freq
Aspect_imp Aspect_imp
Aspect_mod Aspect_mod

View File

@ -184,7 +184,9 @@ cdef class Morphology:
IDS = { IDS = {
"Animacy_anim": Animacy_anim, "Animacy_anim": Animacy_anim,
"Animacy_inam": Animacy_inam, "Animacy_inan": Animacy_inan,
"Animacy_hum": Animacy_hum, # U20
"Animacy_nhum": Animacy_nhum,
"Aspect_freq": Aspect_freq, "Aspect_freq": Aspect_freq,
"Aspect_imp": Aspect_imp, "Aspect_imp": Aspect_imp,
"Aspect_mod": Aspect_mod, "Aspect_mod": Aspect_mod,