mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	## Description This PR adds the most relevant documentation of spaCy's Cython API. (Todo for when we publish this: rewrite `/api/#section-cython` and `/api/#cython` to `/api/cython#conventions`.) ### Types of change docs ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
		
			
				
	
	
		
			89 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			89 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| //- 💫 DOCS > API > CYTHON > CLASSES > VOCAB
 | |
| 
 | |
| p
 | |
|     |  A Cython class providing access and methods for a vocabulary and other
 | |
|     |  data shared across a language.
 | |
| 
 | |
| +infobox
 | |
|     |  This section documents the extra C-level attributes and methods that
 | |
|     |  can't be accessed from Python. For the Python documentation, see
 | |
|     |  #[+api("vocab") #[code Vocab]].
 | |
| 
 | |
| +h(3, "vocab_attributes") Attributes
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code mem]
 | |
|         +cell #[code cymem.Pool]
 | |
|         +cell
 | |
|             |  A memory pool. Allocated memory will be freed once the
 | |
|             |  #[code Vocab] object is garbage collected.
 | |
| 
 | |
|     +row
 | |
|         +cell #[code strings]
 | |
|         +cell #[code StringStore]
 | |
|         +cell
 | |
|             |  A #[code StringStore] that maps string to hash values and vice
 | |
|             |  versa.
 | |
| 
 | |
|     +row
 | |
|         +cell #[code length]
 | |
|         +cell #[code int]
 | |
|         +cell The number of entries in the vocabulary.
 | |
| 
 | |
| +h(3, "vocab_get") Vocab.get
 | |
|     +tag method
 | |
| 
 | |
| p
 | |
|     |  Retrieve a #[+api("cython-structs#lexemec") #[code LexemeC*]] pointer
 | |
|     |  from the vocabulary.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     lexeme = vocab.get(vocab.mem, u'hello')
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code mem]
 | |
|         +cell #[code cymem.Pool]
 | |
|         +cell
 | |
|             |  A memory pool. Allocated memory will be freed once the
 | |
|             |  #[code Vocab] object is garbage collected.
 | |
| 
 | |
|     +row
 | |
|         +cell #[code string]
 | |
|         +cell #[code unicode]
 | |
|         +cell The string of the word to look up.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell #[code const LexemeC*]
 | |
|         +cell The lexeme in the vocabulary.
 | |
| 
 | |
| +h(3, "vocab_get_by_orth") Vocab.get_by_orth
 | |
|     +tag method
 | |
| 
 | |
| p
 | |
|     |  Retrieve a #[+api("cython-structs#lexemec") #[code LexemeC*]] pointer
 | |
|     |  from the vocabulary.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     lexeme = vocab.get_by_orth(doc[0].lex.norm)
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code mem]
 | |
|         +cell #[code cymem.Pool]
 | |
|         +cell
 | |
|             |  A memory pool. Allocated memory will be freed once the
 | |
|             |  #[code Vocab] object is garbage collected.
 | |
| 
 | |
|     +row
 | |
|         +cell #[code orth]
 | |
|         +cell #[+abbr("uint64_t") #[code attr_t]]
 | |
|         +cell ID of the verbatim text content.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell #[code const LexemeC*]
 | |
|         +cell The lexeme in the vocabulary.
 |