mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Add Token.lex
This commit is contained in:
parent
933a7cf8d1
commit
c099f6eece
|
@ -2,6 +2,7 @@ import pytest
|
|||
import numpy
|
||||
from spacy.tokens import Doc, Span
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.lexeme import Lexeme
|
||||
from spacy.lang.en import English
|
||||
from spacy.attrs import ENT_TYPE, ENT_IOB, SENT_START, HEAD, DEP, MORPH
|
||||
|
||||
|
@ -389,3 +390,11 @@ def test_doc_lang(en_vocab):
|
|||
assert doc.lang == en_vocab.strings["en"]
|
||||
assert doc[0].lang_ == "en"
|
||||
assert doc[0].lang == en_vocab.strings["en"]
|
||||
|
||||
|
||||
def test_token_lexeme(en_vocab):
|
||||
"""Test that tokens expose their lexeme."""
|
||||
token = Doc(en_vocab, words=["Hello", "world"])[0]
|
||||
assert isinstance(token.lex, Lexeme)
|
||||
assert token.lex.text == token.text
|
||||
assert en_vocab[token.orth] == token.lex
|
||||
|
|
|
@ -226,6 +226,11 @@ cdef class Token:
|
|||
cdef hash_t key = self.vocab.morphology.add(features)
|
||||
self.c.morph = key
|
||||
|
||||
@property
|
||||
def lex(self):
|
||||
"""RETURNS (Lexeme): The underlying lexeme."""
|
||||
return self.vocab[self.c.lex.orth]
|
||||
|
||||
@property
|
||||
def lex_id(self):
|
||||
"""RETURNS (int): Sequential ID of the token's lexical type."""
|
||||
|
|
|
@ -393,8 +393,9 @@ The L2 norm of the token's vector representation.
|
|||
## Attributes {#attributes}
|
||||
|
||||
| Name | Type | Description |
|
||||
| -------------------------------------------- | --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| -------------------------------------------- | ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `doc` | `Doc` | The parent document. |
|
||||
| `lex` <Tag variant="new">3</Tag> | [`Lexeme`](/api/lexeme) | The underlying lexeme. |
|
||||
| `sent` <Tag variant="new">2.0.12</Tag> | `Span` | The sentence span that this token is a part of. |
|
||||
| `text` | str | Verbatim text content. |
|
||||
| `text_with_ws` | str | Text content, with trailing space character if present. |
|
||||
|
|
Loading…
Reference in New Issue
Block a user