From 0a62098c5f0e0abe640a76776ddf6ea7094e2c23 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 9 Jul 2020 22:11:24 +0200
Subject: [PATCH] Fix lemmatizer is_base_form for python2.7 (#5734)

* Fix lemmatizer init args for python2.7

* Move English is_base_form to a class method

* Skip test pickling PhraseMatcher for python2
---
 spacy/lang/en/__init__.py                     | 71 +++++++++----------
 spacy/lemmatizer.py                           |  2 +-
 spacy/tests/regression/test_issue3001-3500.py |  1 +
 3 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/spacy/lang/en/__init__.py b/spacy/lang/en/__init__.py
index d52f3dfd8..f58ae4a4e 100644
--- a/spacy/lang/en/__init__.py
+++ b/spacy/lang/en/__init__.py
@@ -18,41 +18,6 @@ def _return_en(_):
     return "en"
 
 
-def en_is_base_form(univ_pos, morphology=None):
-    """
-    Check whether we're dealing with an uninflected paradigm, so we can
-    avoid lemmatization entirely.
-
-    univ_pos (unicode / int): The token's universal part-of-speech tag.
-    morphology (dict): The token's morphological features following the
-        Universal Dependencies scheme.
-    """
-    if morphology is None:
-        morphology = {}
-    if univ_pos == "noun" and morphology.get("Number") == "sing":
-        return True
-    elif univ_pos == "verb" and morphology.get("VerbForm") == "inf":
-        return True
-    # This maps 'VBP' to base form -- probably just need 'IS_BASE'
-    # morphology
-    elif univ_pos == "verb" and (
-        morphology.get("VerbForm") == "fin"
-        and morphology.get("Tense") == "pres"
-        and morphology.get("Number") is None
-    ):
-        return True
-    elif univ_pos == "adj" and morphology.get("Degree") == "pos":
-        return True
-    elif morphology.get("VerbForm") == "inf":
-        return True
-    elif morphology.get("VerbForm") == "none":
-        return True
-    elif morphology.get("Degree") == "pos":
-        return True
-    else:
-        return False
-
-
 class EnglishDefaults(Language.Defaults):
     lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
     lex_attr_getters.update(LEX_ATTRS)
@@ -61,7 +26,6 @@ class EnglishDefaults(Language.Defaults):
     tag_map = TAG_MAP
     stop_words = STOP_WORDS
     morph_rules = MORPH_RULES
-    is_base_form = en_is_base_form
     syntax_iterators = SYNTAX_ITERATORS
     single_orth_variants = [
         {"tags": ["NFP"], "variants": ["…", "..."]},
@@ -72,6 +36,41 @@ class EnglishDefaults(Language.Defaults):
         {"tags": ["``", "''"], "variants": [('"', '"'), ("“", "”")]},
     ]
 
+    @classmethod
+    def is_base_form(cls, univ_pos, morphology=None):
+        """
+        Check whether we're dealing with an uninflected paradigm, so we can
+        avoid lemmatization entirely.
+
+        univ_pos (unicode / int): The token's universal part-of-speech tag.
+        morphology (dict): The token's morphological features following the
+            Universal Dependencies scheme.
+        """
+        if morphology is None:
+            morphology = {}
+        if univ_pos == "noun" and morphology.get("Number") == "sing":
+            return True
+        elif univ_pos == "verb" and morphology.get("VerbForm") == "inf":
+            return True
+        # This maps 'VBP' to base form -- probably just need 'IS_BASE'
+        # morphology
+        elif univ_pos == "verb" and (
+            morphology.get("VerbForm") == "fin"
+            and morphology.get("Tense") == "pres"
+            and morphology.get("Number") is None
+        ):
+            return True
+        elif univ_pos == "adj" and morphology.get("Degree") == "pos":
+            return True
+        elif morphology.get("VerbForm") == "inf":
+            return True
+        elif morphology.get("VerbForm") == "none":
+            return True
+        elif morphology.get("Degree") == "pos":
+            return True
+        else:
+            return False
+
 
 class English(Language):
     lang = "en"
diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py
index f72eae128..8b2375257 100644
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@@ -21,7 +21,7 @@ class Lemmatizer(object):
     def load(cls, *args, **kwargs):
         raise NotImplementedError(Errors.E172)
 
-    def __init__(self, lookups, *args, is_base_form=None, **kwargs):
+    def __init__(self, lookups, is_base_form=None, *args, **kwargs):
         """Initialize a Lemmatizer.
 
         lookups (Lookups): The lookups object containing the (optional) tables
diff --git a/spacy/tests/regression/test_issue3001-3500.py b/spacy/tests/regression/test_issue3001-3500.py
index effbebb92..a10225390 100644
--- a/spacy/tests/regression/test_issue3001-3500.py
+++ b/spacy/tests/regression/test_issue3001-3500.py
@@ -121,6 +121,7 @@ def test_issue3248_1():
     assert len(matcher) == 2
 
 
+@pytest.mark.skipif(is_python2, reason="Can't pickle instancemethod for is_base_form")
 def test_issue3248_2():
     """Test that the PhraseMatcher can be pickled correctly."""
     nlp = English()