mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 16:37:45 +03:00 
			
		
		
		
	Merge pull request #6503 from adrianeboyd/feature/lemmatizer-rule-warning-pos
Warn on empty POS for the rule-based lemmatizer
This commit is contained in:
		
						commit
						dfaef27f90
					
				|  | @ -119,6 +119,10 @@ class Warnings: | |||
|             "call the {matcher} on each Doc object.") | ||||
|     W107 = ("The property `Doc.{prop}` is deprecated. Use " | ||||
|             "`Doc.has_annotation(\"{attr}\")` instead.") | ||||
|     W108 = ("The rule-based lemmatizer did not find POS annotation for the " | ||||
|             "token '{text}'. Check that your pipeline includes components that " | ||||
|             "assign token.pos, typically 'tagger'+'attribute_ruler' or " | ||||
|             "'morphologizer'.") | ||||
| 
 | ||||
| 
 | ||||
| @add_codes | ||||
|  |  | |||
|  | @ -4,7 +4,7 @@ from thinc.api import Model | |||
| from pathlib import Path | ||||
| 
 | ||||
| from .pipe import Pipe | ||||
| from ..errors import Errors | ||||
| from ..errors import Errors, Warnings | ||||
| from ..language import Language | ||||
| from ..training import Example | ||||
| from ..lookups import Lookups, load_lookups | ||||
|  | @ -197,6 +197,8 @@ class Lemmatizer(Pipe): | |||
|         string = token.text | ||||
|         univ_pos = token.pos_.lower() | ||||
|         if univ_pos in ("", "eol", "space"): | ||||
|             if univ_pos == "": | ||||
|                 logger.warn(Warnings.W108.format(text=string)) | ||||
|             return [string.lower()] | ||||
|         # See Issue #435 for example of where this logic is requied. | ||||
|         if self.is_base_form(token): | ||||
|  |  | |||
|  | @ -1,4 +1,6 @@ | |||
| import pytest | ||||
| import logging | ||||
| import mock | ||||
| from spacy import util, registry | ||||
| from spacy.lang.en import English | ||||
| from spacy.lookups import Lookups | ||||
|  | @ -54,9 +56,18 @@ def test_lemmatizer_config(nlp): | |||
|     lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "rule"}) | ||||
|     nlp.initialize() | ||||
| 
 | ||||
|     # warning if no POS assigned | ||||
|     doc = nlp.make_doc("coping") | ||||
|     logger = logging.getLogger("spacy") | ||||
|     with mock.patch.object(logger, "warn") as mock_warn: | ||||
|         doc = lemmatizer(doc) | ||||
|         mock_warn.assert_called_once() | ||||
| 
 | ||||
|     # works with POS | ||||
|     doc = nlp.make_doc("coping") | ||||
|     doc[0].pos_ = "VERB" | ||||
|     assert doc[0].lemma_ == "" | ||||
|     doc[0].pos_ = "VERB" | ||||
|     doc = lemmatizer(doc) | ||||
|     doc = lemmatizer(doc) | ||||
|     assert doc[0].text == "coping" | ||||
|     assert doc[0].lemma_ == "cope" | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user