mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Fix lemmatizer is_base_form for python2.7 (#5734)
* Fix lemmatizer init args for python2.7 * Move English is_base_form to a class method * Skip test pickling PhraseMatcher for python2
This commit is contained in:
parent
923affd091
commit
0a62098c5f
|
@ -18,7 +18,26 @@ def _return_en(_):
|
|||
return "en"
|
||||
|
||||
|
||||
def en_is_base_form(univ_pos, morphology=None):
|
||||
class EnglishDefaults(Language.Defaults):
|
||||
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
||||
lex_attr_getters.update(LEX_ATTRS)
|
||||
lex_attr_getters[LANG] = _return_en
|
||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||
tag_map = TAG_MAP
|
||||
stop_words = STOP_WORDS
|
||||
morph_rules = MORPH_RULES
|
||||
syntax_iterators = SYNTAX_ITERATORS
|
||||
single_orth_variants = [
|
||||
{"tags": ["NFP"], "variants": ["…", "..."]},
|
||||
{"tags": [":"], "variants": ["-", "—", "–", "--", "---", "——"]},
|
||||
]
|
||||
paired_orth_variants = [
|
||||
{"tags": ["``", "''"], "variants": [("'", "'"), ("‘", "’")]},
|
||||
{"tags": ["``", "''"], "variants": [('"', '"'), ("“", "”")]},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def is_base_form(cls, univ_pos, morphology=None):
|
||||
"""
|
||||
Check whether we're dealing with an uninflected paradigm, so we can
|
||||
avoid lemmatization entirely.
|
||||
|
@ -53,26 +72,6 @@ def en_is_base_form(univ_pos, morphology=None):
|
|||
return False
|
||||
|
||||
|
||||
class EnglishDefaults(Language.Defaults):
|
||||
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
||||
lex_attr_getters.update(LEX_ATTRS)
|
||||
lex_attr_getters[LANG] = _return_en
|
||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||
tag_map = TAG_MAP
|
||||
stop_words = STOP_WORDS
|
||||
morph_rules = MORPH_RULES
|
||||
is_base_form = en_is_base_form
|
||||
syntax_iterators = SYNTAX_ITERATORS
|
||||
single_orth_variants = [
|
||||
{"tags": ["NFP"], "variants": ["…", "..."]},
|
||||
{"tags": [":"], "variants": ["-", "—", "–", "--", "---", "——"]},
|
||||
]
|
||||
paired_orth_variants = [
|
||||
{"tags": ["``", "''"], "variants": [("'", "'"), ("‘", "’")]},
|
||||
{"tags": ["``", "''"], "variants": [('"', '"'), ("“", "”")]},
|
||||
]
|
||||
|
||||
|
||||
class English(Language):
|
||||
lang = "en"
|
||||
Defaults = EnglishDefaults
|
||||
|
|
|
@ -21,7 +21,7 @@ class Lemmatizer(object):
|
|||
def load(cls, *args, **kwargs):
|
||||
raise NotImplementedError(Errors.E172)
|
||||
|
||||
def __init__(self, lookups, *args, is_base_form=None, **kwargs):
|
||||
def __init__(self, lookups, is_base_form=None, *args, **kwargs):
|
||||
"""Initialize a Lemmatizer.
|
||||
|
||||
lookups (Lookups): The lookups object containing the (optional) tables
|
||||
|
|
|
@ -121,6 +121,7 @@ def test_issue3248_1():
|
|||
assert len(matcher) == 2
|
||||
|
||||
|
||||
@pytest.mark.skipif(is_python2, reason="Can't pickle instancemethod for is_base_form")
|
||||
def test_issue3248_2():
|
||||
"""Test that the PhraseMatcher can be pickled correctly."""
|
||||
nlp = English()
|
||||
|
|
Loading…
Reference in New Issue
Block a user