mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-14 19:46:26 +03:00
89 lines
2.4 KiB
Plaintext
89 lines
2.4 KiB
Plaintext
|
//- 💫 DOCS > API > CYTHON > CLASSES > VOCAB
|
||
|
|
||
|
p
|
||
|
| A Cython class providing access and methods for a vocabulary and other
|
||
|
| data shared across a language.
|
||
|
|
||
|
+infobox
|
||
|
| This section documents the extra C-level attributes and methods that
|
||
|
| can't be accessed from Python. For the Python documentation, see
|
||
|
| #[+api("vocab") #[code Vocab]].
|
||
|
|
||
|
+h(3, "vocab_attributes") Attributes
|
||
|
|
||
|
+table(["Name", "Type", "Description"])
|
||
|
+row
|
||
|
+cell #[code mem]
|
||
|
+cell #[code cymem.Pool]
|
||
|
+cell
|
||
|
| A memory pool. Allocated memory will be freed once the
|
||
|
| #[code Vocab] object is garbage collected.
|
||
|
|
||
|
+row
|
||
|
+cell #[code strings]
|
||
|
+cell #[code StringStore]
|
||
|
+cell
|
||
|
| A #[code StringStore] that maps string to hash values and vice
|
||
|
| versa.
|
||
|
|
||
|
+row
|
||
|
+cell #[code length]
|
||
|
+cell #[code int]
|
||
|
+cell The number of entries in the vocabulary.
|
||
|
|
||
|
+h(3, "vocab_get") Vocab.get
|
||
|
+tag method
|
||
|
|
||
|
p
|
||
|
| Retrieve a #[+api("cython-structs#lexemec") #[code LexemeC*]] pointer
|
||
|
| from the vocabulary.
|
||
|
|
||
|
+aside-code("Example").
|
||
|
lexeme = vocab.get(vocab.mem, u'hello')
|
||
|
|
||
|
+table(["Name", "Type", "Description"])
|
||
|
+row
|
||
|
+cell #[code mem]
|
||
|
+cell #[code cymem.Pool]
|
||
|
+cell
|
||
|
| A memory pool. Allocated memory will be freed once the
|
||
|
| #[code Vocab] object is garbage collected.
|
||
|
|
||
|
+row
|
||
|
+cell #[code string]
|
||
|
+cell #[code unicode]
|
||
|
+cell The string of the word to look up.
|
||
|
|
||
|
+row("foot")
|
||
|
+cell returns
|
||
|
+cell #[code const LexemeC*]
|
||
|
+cell The lexeme in the vocabulary.
|
||
|
|
||
|
+h(3, "vocab_get_by_orth") Vocab.get_by_orth
|
||
|
+tag method
|
||
|
|
||
|
p
|
||
|
| Retrieve a #[+api("cython-structs#lexemec") #[code LexemeC*]] pointer
|
||
|
| from the vocabulary.
|
||
|
|
||
|
+aside-code("Example").
|
||
|
lexeme = vocab.get_by_orth(doc[0].lex.norm)
|
||
|
|
||
|
+table(["Name", "Type", "Description"])
|
||
|
+row
|
||
|
+cell #[code mem]
|
||
|
+cell #[code cymem.Pool]
|
||
|
+cell
|
||
|
| A memory pool. Allocated memory will be freed once the
|
||
|
| #[code Vocab] object is garbage collected.
|
||
|
|
||
|
+row
|
||
|
+cell #[code orth]
|
||
|
+cell #[+abbr("uint64_t") #[code attr_t]]
|
||
|
+cell ID of the verbatim text content.
|
||
|
|
||
|
+row("foot")
|
||
|
+cell returns
|
||
|
+cell #[code const LexemeC*]
|
||
|
+cell The lexeme in the vocabulary.
|