mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	## Description This PR adds the most relevant documentation of spaCy's Cython API. (Todo for when we publish this: rewrite `/api/#section-cython` and `/api/#cython` to `/api/cython#conventions`.) ### Types of change docs ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
		
			
				
	
	
		
			89 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			89 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
//- 💫 DOCS > API > CYTHON > CLASSES > VOCAB
 | 
						|
 | 
						|
p
 | 
						|
    |  A Cython class providing access and methods for a vocabulary and other
 | 
						|
    |  data shared across a language.
 | 
						|
 | 
						|
+infobox
 | 
						|
    |  This section documents the extra C-level attributes and methods that
 | 
						|
    |  can't be accessed from Python. For the Python documentation, see
 | 
						|
    |  #[+api("vocab") #[code Vocab]].
 | 
						|
 | 
						|
+h(3, "vocab_attributes") Attributes
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code mem]
 | 
						|
        +cell #[code cymem.Pool]
 | 
						|
        +cell
 | 
						|
            |  A memory pool. Allocated memory will be freed once the
 | 
						|
            |  #[code Vocab] object is garbage collected.
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code strings]
 | 
						|
        +cell #[code StringStore]
 | 
						|
        +cell
 | 
						|
            |  A #[code StringStore] that maps string to hash values and vice
 | 
						|
            |  versa.
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code length]
 | 
						|
        +cell #[code int]
 | 
						|
        +cell The number of entries in the vocabulary.
 | 
						|
 | 
						|
+h(3, "vocab_get") Vocab.get
 | 
						|
    +tag method
 | 
						|
 | 
						|
p
 | 
						|
    |  Retrieve a #[+api("cython-structs#lexemec") #[code LexemeC*]] pointer
 | 
						|
    |  from the vocabulary.
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    lexeme = vocab.get(vocab.mem, u'hello')
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code mem]
 | 
						|
        +cell #[code cymem.Pool]
 | 
						|
        +cell
 | 
						|
            |  A memory pool. Allocated memory will be freed once the
 | 
						|
            |  #[code Vocab] object is garbage collected.
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code string]
 | 
						|
        +cell #[code unicode]
 | 
						|
        +cell The string of the word to look up.
 | 
						|
 | 
						|
    +row("foot")
 | 
						|
        +cell returns
 | 
						|
        +cell #[code const LexemeC*]
 | 
						|
        +cell The lexeme in the vocabulary.
 | 
						|
 | 
						|
+h(3, "vocab_get_by_orth") Vocab.get_by_orth
 | 
						|
    +tag method
 | 
						|
 | 
						|
p
 | 
						|
    |  Retrieve a #[+api("cython-structs#lexemec") #[code LexemeC*]] pointer
 | 
						|
    |  from the vocabulary.
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    lexeme = vocab.get_by_orth(doc[0].lex.norm)
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code mem]
 | 
						|
        +cell #[code cymem.Pool]
 | 
						|
        +cell
 | 
						|
            |  A memory pool. Allocated memory will be freed once the
 | 
						|
            |  #[code Vocab] object is garbage collected.
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code orth]
 | 
						|
        +cell #[+abbr("uint64_t") #[code attr_t]]
 | 
						|
        +cell ID of the verbatim text content.
 | 
						|
 | 
						|
    +row("foot")
 | 
						|
        +cell returns
 | 
						|
        +cell #[code const LexemeC*]
 | 
						|
        +cell The lexeme in the vocabulary.
 |