mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 05:01:02 +03:00 
			
		
		
		
	Block lemmatization of base-form adjectives
Fixes check that an adjective is a base form (as opposed to a comparative or superlative), so that it's not lemmatized. e.g. inner -!> inn. Closes #912.
This commit is contained in:
		
							parent
							
								
									97814f8da6
								
							
						
					
					
						commit
						4454c1b23f
					
				|  | @ -7,6 +7,8 @@ import ujson as json | ||||||
| from .en.lemmatizer import INDEX, EXC, RULES | from .en.lemmatizer import INDEX, EXC, RULES | ||||||
| from .symbols import POS, NOUN, VERB, ADJ, PUNCT | from .symbols import POS, NOUN, VERB, ADJ, PUNCT | ||||||
| from .symbols import VerbForm_inf, VerbForm_none | from .symbols import VerbForm_inf, VerbForm_none | ||||||
|  | from .symbols import Number_sing | ||||||
|  | from .symbols import Degree_pos | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Lemmatizer(object): | class Lemmatizer(object): | ||||||
|  | @ -42,6 +44,7 @@ class Lemmatizer(object): | ||||||
|     def is_base_form(self, univ_pos, morphology=None): |     def is_base_form(self, univ_pos, morphology=None): | ||||||
|         '''Check whether we're dealing with an uninflected paradigm, so we can |         '''Check whether we're dealing with an uninflected paradigm, so we can | ||||||
|         avoid lemmatization entirely.''' |         avoid lemmatization entirely.''' | ||||||
|  |         print("Is base form?", univ_pos, morphology) | ||||||
|         morphology = {} if morphology is None else morphology |         morphology = {} if morphology is None else morphology | ||||||
|         others = [key for key in morphology if key not in (POS, 'number', 'pos', 'verbform')] |         others = [key for key in morphology if key not in (POS, 'number', 'pos', 'verbform')] | ||||||
|         true_morph_key = morphology.get('morph', 0) |         true_morph_key = morphology.get('morph', 0) | ||||||
|  | @ -49,7 +52,10 @@ class Lemmatizer(object): | ||||||
|             return True |             return True | ||||||
|         elif univ_pos == 'verb' and morphology.get('verbform') == 'inf' and not others: |         elif univ_pos == 'verb' and morphology.get('verbform') == 'inf' and not others: | ||||||
|             return True |             return True | ||||||
|         elif true_morph_key in (VerbForm_inf, VerbForm_none): |         elif univ_pos == 'adj' and morphology.get('Degree') == 'pos': | ||||||
|  |             return True | ||||||
|  |         elif true_morph_key in \ | ||||||
|  |             (VerbForm_inf, VerbForm_none, Number_sing, Degree_pos): | ||||||
|             return True |             return True | ||||||
|         else: |         else: | ||||||
|             return False |             return False | ||||||
|  |  | ||||||
							
								
								
									
										14
									
								
								spacy/tests/regression/test_issue912.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								spacy/tests/regression/test_issue912.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,14 @@ | ||||||
|  | # coding: utf8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  | 
 | ||||||
|  | import pytest | ||||||
|  | from ...tokens import Doc | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @pytest.mark.parametrize('text,tag,lemma', [("inner", "JJ", "inner")]) | ||||||
|  | def test_issue912(en_vocab, text, tag, lemma): | ||||||
|  |     '''Test base-forms of adjectives are preserved.''' | ||||||
|  |     doc = Doc(en_vocab, words=[text]) | ||||||
|  |     doc[0].tag_ = tag | ||||||
|  |     assert doc[0].lemma_ == lemma | ||||||
|  | 
 | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user