Fix lemmatizer is_base_form for python2.7 (#5734)

* Fix lemmatizer init args for python2.7 * Move English is_base_form to a class method * Skip test pickling PhraseMatcher for python2
2025-11-09 20:38:06 +03:00 · 2020-07-09 22:11:24 +02:00 · 2020-07-09 22:11:24 +02:00 · 0a62098c5f
commit 0a62098c5f
parent 923affd091
3 changed files with 37 additions and 37 deletions
--- a/spacy/lang/en/init.py
+++ b/spacy/lang/en/init.py
@ -18,7 +18,26 @@ def _return_en(_):
    return "en"


-def en_is_base_form(univ_pos, morphology=None):
+class EnglishDefaults(Language.Defaults):
+    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
+    lex_attr_getters.update(LEX_ATTRS)
+    lex_attr_getters[LANG] = _return_en
+    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
+    tag_map = TAG_MAP
+    stop_words = STOP_WORDS
+    morph_rules = MORPH_RULES
+    syntax_iterators = SYNTAX_ITERATORS
+    single_orth_variants = [
+        {"tags": ["NFP"], "variants": ["…", "..."]},
+        {"tags": [":"], "variants": ["-", "—", "–", "--", "---", "——"]},
+    ]
+    paired_orth_variants = [
+        {"tags": ["``", "''"], "variants": [("'", "'"), ("‘", "’")]},
+        {"tags": ["``", "''"], "variants": [('"', '"'), ("“", "”")]},
+    ]
+
+    @classmethod
+    def is_base_form(cls, univ_pos, morphology=None):
        """
        Check whether we're dealing with an uninflected paradigm, so we can
        avoid lemmatization entirely.
@ -53,26 +72,6 @@ def en_is_base_form(univ_pos, morphology=None):
            return False


-class EnglishDefaults(Language.Defaults):
-    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
-    lex_attr_getters.update(LEX_ATTRS)
-    lex_attr_getters[LANG] = _return_en
-    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
-    tag_map = TAG_MAP
-    stop_words = STOP_WORDS
-    morph_rules = MORPH_RULES
-    is_base_form = en_is_base_form
-    syntax_iterators = SYNTAX_ITERATORS
-    single_orth_variants = [
-        {"tags": ["NFP"], "variants": ["…", "..."]},
-        {"tags": [":"], "variants": ["-", "—", "–", "--", "---", "——"]},
-    ]
-    paired_orth_variants = [
-        {"tags": ["``", "''"], "variants": [("'", "'"), ("‘", "’")]},
-        {"tags": ["``", "''"], "variants": [('"', '"'), ("“", "”")]},
-    ]
-
-
 class English(Language):
    lang = "en"
    Defaults = EnglishDefaults
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@ -21,7 +21,7 @@ class Lemmatizer(object):
    def load(cls, *args, **kwargs):
        raise NotImplementedError(Errors.E172)

-    def __init__(self, lookups, *args, is_base_form=None, **kwargs):
+    def __init__(self, lookups, is_base_form=None, *args, **kwargs):
        """Initialize a Lemmatizer.

        lookups (Lookups): The lookups object containing the (optional) tables
--- a/spacy/tests/regression/test_issue3001-3500.py
+++ b/spacy/tests/regression/test_issue3001-3500.py
@ -121,6 +121,7 @@ def test_issue3248_1():
    assert len(matcher) == 2


+@pytest.mark.skipif(is_python2, reason="Can't pickle instancemethod for is_base_form")
 def test_issue3248_2():
    """Test that the PhraseMatcher can be pickled correctly."""
    nlp = English()