mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge pull request #1389 from mdcclv/lemmatizer_obey_exceptions
Lemmatizer obey exceptions
This commit is contained in:
		
						commit
						dea81f113d
					
				| 
						 | 
				
			
			@ -78,15 +78,16 @@ def lemmatize(string, index, exceptions, rules):
 | 
			
		|||
    #    forms.append(string)
 | 
			
		||||
    forms.extend(exceptions.get(string, []))
 | 
			
		||||
    oov_forms = []
 | 
			
		||||
    for old, new in rules:
 | 
			
		||||
        if string.endswith(old):
 | 
			
		||||
            form = string[:len(string) - len(old)] + new
 | 
			
		||||
            if not form:
 | 
			
		||||
                pass
 | 
			
		||||
            elif form in index or not form.isalpha():
 | 
			
		||||
                forms.append(form)
 | 
			
		||||
            else:
 | 
			
		||||
                oov_forms.append(form)
 | 
			
		||||
    if not forms:
 | 
			
		||||
        for old, new in rules:
 | 
			
		||||
            if string.endswith(old):
 | 
			
		||||
                form = string[:len(string) - len(old)] + new
 | 
			
		||||
                if not form:
 | 
			
		||||
                    pass
 | 
			
		||||
                elif form in index or not form.isalpha():
 | 
			
		||||
                    forms.append(form)
 | 
			
		||||
                else:
 | 
			
		||||
                    oov_forms.append(form)
 | 
			
		||||
    if not forms:
 | 
			
		||||
        forms.extend(oov_forms)
 | 
			
		||||
    if not forms:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										22
									
								
								spacy/tests/regression/test_issue1387.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								spacy/tests/regression/test_issue1387.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,22 @@
 | 
			
		|||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from ...symbols import POS, VERB, VerbForm_part
 | 
			
		||||
from ...vocab import Vocab
 | 
			
		||||
from ...lemmatizer import Lemmatizer
 | 
			
		||||
from ..util import get_doc
 | 
			
		||||
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
def test_issue1387():
 | 
			
		||||
    tag_map = {'VBG': {POS: VERB, VerbForm_part: True}}
 | 
			
		||||
    index = {"verb": ("cope","cop")}
 | 
			
		||||
    exc = {"verb": {"coping": ("cope",)}}
 | 
			
		||||
    rules = {"verb": [["ing", ""]]}
 | 
			
		||||
    lemmatizer = Lemmatizer(index, exc, rules)
 | 
			
		||||
    vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
 | 
			
		||||
    doc = get_doc(vocab, ["coping"])
 | 
			
		||||
    doc[0].tag_ = 'VBG'
 | 
			
		||||
    assert doc[0].text == "coping"
 | 
			
		||||
    assert doc[0].lemma_ == "cope"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -47,3 +47,20 @@ def test_tagger_lemmatizer_lemma_assignment(EN):
 | 
			
		|||
    assert all(t.lemma_ == '' for t in doc)
 | 
			
		||||
    EN.tagger(doc)
 | 
			
		||||
    assert all(t.lemma_ != '' for t in doc)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from ...symbols import POS, VERB, VerbForm_part
 | 
			
		||||
from ...vocab import Vocab
 | 
			
		||||
from ...lemmatizer import Lemmatizer
 | 
			
		||||
from ..util import get_doc
 | 
			
		||||
def test_tagger_lemmatizer_exceptions():
 | 
			
		||||
    index = {"verb": ("cope","cop")}
 | 
			
		||||
    exc = {"verb": {"coping": ("cope",)}}
 | 
			
		||||
    rules = {"verb": [["ing", ""]]}
 | 
			
		||||
    tag_map = {'VBG': {POS: VERB, VerbForm_part: True}}
 | 
			
		||||
    lemmatizer = Lemmatizer(index, exc, rules)
 | 
			
		||||
    vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
 | 
			
		||||
    doc = get_doc(vocab, ["coping"])
 | 
			
		||||
    doc[0].tag_ = 'VBG'
 | 
			
		||||
    assert doc[0].text == "coping"
 | 
			
		||||
    assert doc[0].lemma_ == "cope"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user