mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	## Description This PR adds the most relevant documentation of spaCy's Cython API. (Todo for when we publish this: rewrite `/api/#section-cython` and `/api/#cython` to `/api/cython#conventions`.) ### Types of change docs ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
		
			
				
	
	
		
			72 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			72 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
//- 💫 DOCS > API > CYTHON > CLASSES > DOC
 | 
						|
 | 
						|
p
 | 
						|
    |  The #[code Doc] object holds an array of
 | 
						|
    |  #[+api("cython-structs#tokenc") #[code TokenC]] structs.
 | 
						|
 | 
						|
+infobox
 | 
						|
    |  This section documents the extra C-level attributes and methods that
 | 
						|
    |  can't be accessed from Python. For the Python documentation, see
 | 
						|
    |  #[+api("doc") #[code Doc]].
 | 
						|
 | 
						|
+h(3, "doc_attributes") Attributes
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code mem]
 | 
						|
        +cell #[code cymem.Pool]
 | 
						|
        +cell
 | 
						|
            |  A memory pool. Allocated memory will be freed once the
 | 
						|
            |  #[code Doc] object is garbage collected.
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code vocab]
 | 
						|
        +cell #[code Vocab]
 | 
						|
        +cell A reference to the shared #[code Vocab] object.
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code c]
 | 
						|
        +cell #[code TokenC*]
 | 
						|
        +cell
 | 
						|
            |  A pointer to a #[+api("cython-structs#tokenc") #[code TokenC]]
 | 
						|
            |  struct.
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code length]
 | 
						|
        +cell #[code int]
 | 
						|
        +cell The number of tokens in the document.
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code max_length]
 | 
						|
        +cell #[code int]
 | 
						|
        +cell The underlying size of the #[code Doc.c] array.
 | 
						|
 | 
						|
+h(3, "doc_push_back") Doc.push_back
 | 
						|
    +tag method
 | 
						|
 | 
						|
p
 | 
						|
    |  Append a token to the #[code Doc]. The token can be provided as a
 | 
						|
    |  #[+api("cython-structs#lexemec") #[code LexemeC]] or
 | 
						|
    |  #[+api("cython-structs#tokenc") #[code TokenC]] pointer, using Cython's
 | 
						|
    |  #[+a("http://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html") fused types].
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    from spacy.tokens cimport Doc
 | 
						|
    from spacy.vocab cimport Vocab
 | 
						|
 | 
						|
    doc = Doc(Vocab())
 | 
						|
    lexeme = doc.vocab.get(u'hello')
 | 
						|
    doc.push_back(lexeme, True)
 | 
						|
    assert doc.text == u'hello '
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code lex_or_tok]
 | 
						|
        +cell #[code LexemeOrToken]
 | 
						|
        +cell The word to append to the #[code Doc].
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code has_space]
 | 
						|
        +cell #[code bint]
 | 
						|
        +cell Whether the word has trailing whitespace.
 |