mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-29 23:17:59 +03:00 
			
		
		
		
	Add Token.lex
This commit is contained in:
		
							parent
							
								
									933a7cf8d1
								
							
						
					
					
						commit
						c099f6eece
					
				|  | @ -2,6 +2,7 @@ import pytest | |||
| import numpy | ||||
| from spacy.tokens import Doc, Span | ||||
| from spacy.vocab import Vocab | ||||
| from spacy.lexeme import Lexeme | ||||
| from spacy.lang.en import English | ||||
| from spacy.attrs import ENT_TYPE, ENT_IOB, SENT_START, HEAD, DEP, MORPH | ||||
| 
 | ||||
|  | @ -389,3 +390,11 @@ def test_doc_lang(en_vocab): | |||
|     assert doc.lang == en_vocab.strings["en"] | ||||
|     assert doc[0].lang_ == "en" | ||||
|     assert doc[0].lang == en_vocab.strings["en"] | ||||
| 
 | ||||
| 
 | ||||
| def test_token_lexeme(en_vocab): | ||||
|     """Test that tokens expose their lexeme.""" | ||||
|     token = Doc(en_vocab, words=["Hello", "world"])[0] | ||||
|     assert isinstance(token.lex, Lexeme) | ||||
|     assert token.lex.text == token.text | ||||
|     assert en_vocab[token.orth] == token.lex | ||||
|  |  | |||
|  | @ -226,6 +226,11 @@ cdef class Token: | |||
|             cdef hash_t key = self.vocab.morphology.add(features) | ||||
|             self.c.morph = key | ||||
| 
 | ||||
|     @property | ||||
|     def lex(self): | ||||
|         """RETURNS (Lexeme): The underlying lexeme.""" | ||||
|         return self.vocab[self.c.lex.orth] | ||||
| 
 | ||||
|     @property | ||||
|     def lex_id(self): | ||||
|         """RETURNS (int): Sequential ID of the token's lexical type.""" | ||||
|  |  | |||
|  | @ -393,8 +393,9 @@ The L2 norm of the token's vector representation. | |||
| ## Attributes {#attributes} | ||||
| 
 | ||||
| | Name                                         | Type                    | Description                                                                                                                                                                                                                                                    | | ||||
| | -------------------------------------------- | --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | -------------------------------------------- | ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `doc`                                        | `Doc`                   | The parent document.                                                                                                                                                                                                                                           | | ||||
| | `lex` <Tag variant="new">3</Tag>             | [`Lexeme`](/api/lexeme) | The underlying lexeme.                                                                                                                                                                                                                                         | | ||||
| | `sent` <Tag variant="new">2.0.12</Tag>       | `Span`                  | The sentence span that this token is a part of.                                                                                                                                                                                                                | | ||||
| | `text`                                       | str                     | Verbatim text content.                                                                                                                                                                                                                                         | | ||||
| | `text_with_ws`                               | str                     | Text content, with trailing space character if present.                                                                                                                                                                                                        | | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user