title |
teaser |
next |
menu |
Cython Structs |
C-language objects that let you group variables together |
/api/cython-classes |
|
TokenC
Cython data container for the Token
object.
Example
token = &doc.c[3]
token_ptr = &doc.c[3]
Name |
Type |
Description |
lex |
const LexemeC* |
A pointer to the lexeme for the token. |
morph |
uint64_t |
An ID allowing lookup of morphological attributes. |
pos |
univ_pos_t |
Coarse-grained part-of-speech tag. |
spacy |
bint |
A binary value indicating whether the token has trailing whitespace. |
tag |
attr_t |
Fine-grained part-of-speech tag. |
idx |
int |
The character offset of the token within the parent document. |
lemma |
attr_t |
Base form of the token, with no inflectional suffixes. |
sense |
attr_t |
Space for storing a word sense ID, currently unused. |
head |
int |
Offset of the syntactic parent relative to the token. |
dep |
attr_t |
Syntactic dependency relation. |
l_kids |
uint32_t |
Number of left children. |
r_kids |
uint32_t |
Number of right children. |
l_edge |
uint32_t |
Offset of the leftmost token of this token's syntactic descendants. |
r_edge |
uint32_t |
Offset of the rightmost token of this token's syntactic descendants. |
sent_start |
int |
Ternary value indicating whether the token is the first word of a sentence. 0 indicates a missing value, -1 indicates False and 1 indicates True . The default value, 0, is interpreted as no sentence break. Sentence boundary detectors will usually set 0 for all tokens except tokens that follow a sentence boundary. |
ent_iob |
int |
IOB code of named entity tag. 0 indicates a missing value, 1 indicates I , 2 indicates 0 and 3 indicates B . |
ent_type |
attr_t |
Named entity type. |
ent_id |
attr_t |
ID of the entity the token is an instance of, if any. Currently not used, but potentially for coreference resolution. |
Token.get_struct_attr
Get the value of an attribute from the TokenC
struct by attribute ID.
Example
from spacy.attrs cimport IS_ALPHA
from spacy.tokens cimport Token
is_alpha = Token.get_struct_attr(&doc.c[3], IS_ALPHA)
Name |
Type |
Description |
token |
const TokenC* |
A pointer to a TokenC struct. |
feat_name |
attr_id_t |
The ID of the attribute to look up. The attributes are enumerated in spacy.typedefs . |
RETURNS |
attr_t |
The value of the attribute. |
Token.set_struct_attr
Set the value of an attribute of the TokenC
struct by attribute ID.
Example
from spacy.attrs cimport TAG
from spacy.tokens cimport Token
token = &doc.c[3]
Token.set_struct_attr(token, TAG, 0)
Name |
Type |
Description |
token |
const TokenC* |
A pointer to a TokenC struct. |
feat_name |
attr_id_t |
The ID of the attribute to look up. The attributes are enumerated in spacy.typedefs . |
value |
attr_t |
The value to set. |
token_by_start
Find a token in a TokenC*
array by the offset of its first character.
Example
from spacy.tokens.doc cimport Doc, token_by_start
from spacy.vocab cimport Vocab
doc = Doc(Vocab(), words=[u'hello', u'world'])
assert token_by_start(doc.c, doc.length, 6) == 1
assert token_by_start(doc.c, doc.length, 4) == -1
Name |
Type |
Description |
tokens |
const TokenC* |
A TokenC* array. |
length |
int |
The number of tokens in the array. |
start_char |
int |
The start index to search for. |
RETURNS |
int |
The index of the token in the array or -1 if not found. |
token_by_end
Find a token in a TokenC*
array by the offset of its final character.
Example
from spacy.tokens.doc cimport Doc, token_by_end
from spacy.vocab cimport Vocab
doc = Doc(Vocab(), words=[u'hello', u'world'])
assert token_by_end(doc.c, doc.length, 5) == 0
assert token_by_end(doc.c, doc.length, 1) == -1
Name |
Type |
Description |
tokens |
const TokenC* |
A TokenC* array. |
length |
int |
The number of tokens in the array. |
end_char |
int |
The end index to search for. |
RETURNS |
int |
The index of the token in the array or -1 if not found. |
set_children_from_heads
Set attributes that allow lookup of syntactic children on a TokenC*
array.
This function must be called after making changes to the TokenC.head
attribute, in order to make the parse tree navigation consistent.
Example
from spacy.tokens.doc cimport Doc, set_children_from_heads
from spacy.vocab cimport Vocab
doc = Doc(Vocab(), words=[u'Baileys', u'from', u'a', u'shoe'])
doc.c[0].head = 0
doc.c[1].head = 0
doc.c[2].head = 3
doc.c[3].head = 1
set_children_from_heads(doc.c, doc.length)
assert doc.c[3].l_kids == 1
Name |
Type |
Description |
tokens |
const TokenC* |
A TokenC* array. |
length |
int |
The number of tokens in the array. |
LexemeC
Struct holding information about a lexical type. LexemeC
structs are usually
owned by the Vocab
, and accessed through a read-only pointer on the TokenC
struct.
Example
lex = doc.c[3].lex
Name |
Type |
Description |
flags |
flags_t |
Bit-field for binary lexical flag values. |
id |
attr_t |
Usually used to map lexemes to rows in a matrix, e.g. for word vectors. Does not need to be unique, so currently misnamed. |
length |
attr_t |
Number of unicode characters in the lexeme. |
orth |
attr_t |
ID of the verbatim text content. |
lower |
attr_t |
ID of the lowercase form of the lexeme. |
norm |
attr_t |
ID of the lexeme's norm, i.e. a normalized form of the text. |
shape |
attr_t |
Transform of the lexeme's string, to show orthographic features. |
prefix |
attr_t |
Length-N substring from the start of the lexeme. Defaults to N=1 . |
suffix |
attr_t |
Length-N substring from the end of the lexeme. Defaults to N=3 . |
cluster |
attr_t |
Brown cluster ID. |
prob |
float |
Smoothed log probability estimate of the lexeme's word type (context-independent entry in the vocabulary). |
sentiment |
float |
A scalar value indicating positivity or negativity. |
Lexeme.get_struct_attr
Get the value of an attribute from the LexemeC
struct by attribute ID.
Example
from spacy.attrs cimport IS_ALPHA
from spacy.lexeme cimport Lexeme
lexeme = doc.c[3].lex
is_alpha = Lexeme.get_struct_attr(lexeme, IS_ALPHA)
Name |
Type |
Description |
lex |
const LexemeC* |
A pointer to a LexemeC struct. |
feat_name |
attr_id_t |
The ID of the attribute to look up. The attributes are enumerated in spacy.typedefs . |
RETURNS |
attr_t |
The value of the attribute. |
Lexeme.set_struct_attr
Set the value of an attribute of the LexemeC
struct by attribute ID.
Example
from spacy.attrs cimport NORM
from spacy.lexeme cimport Lexeme
lexeme = doc.c[3].lex
Lexeme.set_struct_attr(lexeme, NORM, lexeme.lower)
Name |
Type |
Description |
lex |
const LexemeC* |
A pointer to a LexemeC struct. |
feat_name |
attr_id_t |
The ID of the attribute to look up. The attributes are enumerated in spacy.typedefs . |
value |
attr_t |
The value to set. |
Lexeme.c_check_flag
Check the value of a binary flag attribute.
Example
from spacy.attrs cimport IS_STOP
from spacy.lexeme cimport Lexeme
lexeme = doc.c[3].lex
is_stop = Lexeme.c_check_flag(lexeme, IS_STOP)
Name |
Type |
Description |
lexeme |
const LexemeC* |
A pointer to a LexemeC struct. |
flag_id |
attr_id_t |
The ID of the flag to look up. The flag IDs are enumerated in spacy.typedefs . |
RETURNS |
bint |
The boolean value of the flag. |
Lexeme.c_set_flag
Set the value of a binary flag attribute.
Example
from spacy.attrs cimport IS_STOP
from spacy.lexeme cimport Lexeme
lexeme = doc.c[3].lex
Lexeme.c_set_flag(lexeme, IS_STOP, 0)
Name |
Type |
Description |
lexeme |
const LexemeC* |
A pointer to a LexemeC struct. |
flag_id |
attr_id_t |
The ID of the flag to look up. The flag IDs are enumerated in spacy.typedefs . |
value |
bint |
The value to set. |