mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 05:31:15 +03:00 
			
		
		
		
	💫 Allow Span to take text label (#3031)
Fixes #3027. * Allow Span.__init__ to take unicode values for the `label` argument. * Allow `Span.label_` to be writeable. - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [ ] My changes don't require a change to the documentation, or if they do, I've added all required information.
This commit is contained in:
		
							parent
							
								
									11a29af751
								
							
						
					
					
						commit
						2c2db0c492
					
				|  | @ -3,7 +3,7 @@ from __future__ import unicode_literals | |||
| 
 | ||||
| import pytest | ||||
| from spacy.attrs import ORTH, LENGTH | ||||
| from spacy.tokens import Doc | ||||
| from spacy.tokens import Doc, Span | ||||
| from spacy.vocab import Vocab | ||||
| 
 | ||||
| from ..util import get_doc | ||||
|  | @ -154,6 +154,17 @@ def test_span_as_doc(doc): | |||
|     assert span.text == span_doc.text.strip() | ||||
| 
 | ||||
| 
 | ||||
| def test_span_string_label(doc): | ||||
|     span = Span(doc, 0, 1, label='hello') | ||||
|     assert span.label_ == 'hello' | ||||
|     assert span.label == doc.vocab.strings['hello'] | ||||
| 
 | ||||
| def test_span_string_set_label(doc): | ||||
|     span = Span(doc, 0, 1) | ||||
|     span.label_ = 'hello' | ||||
|     assert span.label_ == 'hello' | ||||
|     assert span.label == doc.vocab.strings['hello'] | ||||
| 
 | ||||
| def test_span_ents_property(doc): | ||||
|     """Test span.ents for the """ | ||||
|     doc.ents = [ | ||||
|  |  | |||
|  | @ -15,7 +15,7 @@ from ..parts_of_speech cimport univ_pos_t | |||
| from ..util import normalize_slice | ||||
| from ..attrs cimport IS_PUNCT, IS_SPACE | ||||
| from ..lexeme cimport Lexeme | ||||
| from ..compat import is_config | ||||
| from ..compat import is_config, basestring_ | ||||
| from ..errors import Errors, TempErrors, Warnings, user_warning, models_warning | ||||
| from .underscore import Underscore, get_ext_args | ||||
| 
 | ||||
|  | @ -42,7 +42,7 @@ cdef class Span: | |||
|             raise ValueError(Errors.E046.format(name=name)) | ||||
|         return Underscore.span_extensions.pop(name) | ||||
| 
 | ||||
|     def __cinit__(self, Doc doc, int start, int end, attr_t label=0, | ||||
|     def __cinit__(self, Doc doc, int start, int end, label=0, | ||||
|                   vector=None, vector_norm=None): | ||||
|         """Create a `Span` object from the slice `doc[start : end]`. | ||||
| 
 | ||||
|  | @ -64,6 +64,8 @@ cdef class Span: | |||
|             self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1]) | ||||
|         else: | ||||
|             self.end_char = 0 | ||||
|         if isinstance(label, basestring_): | ||||
|             label = doc.vocab.strings.add(label) | ||||
|         if label not in doc.vocab.strings: | ||||
|             raise ValueError(Errors.E084.format(label=label)) | ||||
|         self.label = label | ||||
|  | @ -601,6 +603,8 @@ cdef class Span: | |||
|         """RETURNS (unicode): The span's label.""" | ||||
|         def __get__(self): | ||||
|             return self.doc.vocab.strings[self.label] | ||||
|         def __set__(self, unicode label_): | ||||
|             self.label = self.doc.vocab.strings.add(label_) | ||||
| 
 | ||||
| 
 | ||||
| cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user