mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
		
						commit
						1e10383e1b
					
				| 
						 | 
					@ -70,11 +70,14 @@ def lemmatize(string, index, exceptions, rules):
 | 
				
			||||||
    #if string in index:
 | 
					    #if string in index:
 | 
				
			||||||
    #    forms.append(string)
 | 
					    #    forms.append(string)
 | 
				
			||||||
    forms.extend(exceptions.get(string, []))
 | 
					    forms.extend(exceptions.get(string, []))
 | 
				
			||||||
 | 
					    oov_forms = []
 | 
				
			||||||
    for old, new in rules:
 | 
					    for old, new in rules:
 | 
				
			||||||
        if string.endswith(old):
 | 
					        if string.endswith(old):
 | 
				
			||||||
            form = string[:len(string) - len(old)] + new
 | 
					            form = string[:len(string) - len(old)] + new
 | 
				
			||||||
            if form in index or not form.isalpha():
 | 
					            if form in index or not form.isalpha():
 | 
				
			||||||
                forms.append(form)
 | 
					                forms.append(form)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                oov_forms.append(form)
 | 
				
			||||||
    if not forms:
 | 
					    if not forms:
 | 
				
			||||||
        forms.append(string)
 | 
					        forms.extend(oov_forms)
 | 
				
			||||||
    return set(forms)
 | 
					    return set(forms)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										10
									
								
								spacy/tests/regression/test_issue781.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								spacy/tests/regression/test_issue781.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,10 @@
 | 
				
			||||||
 | 
					# coding: utf-8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import pytest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Note: "chromosomes" worked previous the bug fix
 | 
				
			||||||
 | 
					@pytest.mark.parametrize('word,lemmas', [("chromosomes", ["chromosome"]), ("endosomes", ["endosome"]), ("colocalizes", ["colocalize", "colocaliz"])])
 | 
				
			||||||
 | 
					def test_issue781(path, lemmatizer, word, lemmas):
 | 
				
			||||||
 | 
					    assert lemmatizer(word, 'noun', morphology={'number': 'plur'}) == set(lemmas)
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user