mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Update docstrings and API docs for Lexeme
This commit is contained in:
		
							parent
							
								
									7ed8a92ed1
								
							
						
					
					
						commit
						27de0834b2
					
				|  | @ -30,17 +30,14 @@ memset(&EMPTY_LEXEME, 0, sizeof(LexemeC)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| cdef class Lexeme: | cdef class Lexeme: | ||||||
|     """ |     """An entry in the vocabulary. A `Lexeme` has no string context – it's a | ||||||
|     An entry in the vocabulary.  A Lexeme has no string context --- it's a |  | ||||||
|     word-type, as opposed to a word token.  It therefore has no part-of-speech |     word-type, as opposed to a word token.  It therefore has no part-of-speech | ||||||
|     tag, dependency parse, or lemma (lemmatization depends on the part-of-speech |     tag, dependency parse, or lemma (lemmatization depends on the part-of-speech | ||||||
|     tag). |     tag). | ||||||
|     """ |     """ | ||||||
|     def __init__(self, Vocab vocab, int orth): |     def __init__(self, Vocab vocab, int orth): | ||||||
|         """ |         """Create a Lexeme object. | ||||||
|         Create a Lexeme object. |  | ||||||
| 
 | 
 | ||||||
|         Arguments: |  | ||||||
|         vocab (Vocab): The parent vocabulary |         vocab (Vocab): The parent vocabulary | ||||||
|         orth (int): The orth id of the lexeme. |         orth (int): The orth id of the lexeme. | ||||||
|         Returns (Lexeme): The newly constructd object. |         Returns (Lexeme): The newly constructd object. | ||||||
|  | @ -82,35 +79,28 @@ cdef class Lexeme: | ||||||
|         return self.c.orth |         return self.c.orth | ||||||
| 
 | 
 | ||||||
|     def set_flag(self, attr_id_t flag_id, bint value): |     def set_flag(self, attr_id_t flag_id, bint value): | ||||||
|         """ |         """Change the value of a boolean flag. | ||||||
|         Change the value of a boolean flag. |  | ||||||
| 
 | 
 | ||||||
|         Arguments: |  | ||||||
|         flag_id (int): The attribute ID of the flag to set. |         flag_id (int): The attribute ID of the flag to set. | ||||||
|         value (bool): The new value of the flag. |         value (bool): The new value of the flag. | ||||||
|         """ |         """ | ||||||
|         Lexeme.c_set_flag(self.c, flag_id, value) |         Lexeme.c_set_flag(self.c, flag_id, value) | ||||||
| 
 | 
 | ||||||
|     def check_flag(self, attr_id_t flag_id): |     def check_flag(self, attr_id_t flag_id): | ||||||
|         """ |         """Check the value of a boolean flag. | ||||||
|         Check the value of a boolean flag. |  | ||||||
| 
 | 
 | ||||||
|         Arguments: |  | ||||||
|         flag_id (int): The attribute ID of the flag to query. |         flag_id (int): The attribute ID of the flag to query. | ||||||
|         Returns (bool): The value of the flag. |         RETURNS (bool): The value of the flag. | ||||||
|         """ |         """ | ||||||
|         return True if Lexeme.c_check_flag(self.c, flag_id) else False |         return True if Lexeme.c_check_flag(self.c, flag_id) else False | ||||||
| 
 | 
 | ||||||
|     def similarity(self, other): |     def similarity(self, other): | ||||||
|         """ |         """Compute a semantic similarity estimate. Defaults to cosine over | ||||||
|         Compute a semantic similarity estimate. Defaults to cosine over vectors. |         vectors. | ||||||
| 
 | 
 | ||||||
|         Arguments: |         other (object): The object to compare with. By default, accepts `Doc`, | ||||||
|             other: |             `Span`, `Token` and `Lexeme` objects. | ||||||
|                 The object to compare with. By default, accepts Doc, Span, |         RETURNS (float): A scalar similarity score. Higher is more similar. | ||||||
|                 Token and Lexeme objects. |  | ||||||
|         Returns: |  | ||||||
|             score (float): A scalar similarity score. Higher is more similar. |  | ||||||
|         """ |         """ | ||||||
|         if self.vector_norm == 0 or other.vector_norm == 0: |         if self.vector_norm == 0 or other.vector_norm == 0: | ||||||
|             return 0.0 |             return 0.0 | ||||||
|  | @ -140,6 +130,11 @@ cdef class Lexeme: | ||||||
|         self.orth = self.c.orth |         self.orth = self.c.orth | ||||||
| 
 | 
 | ||||||
|     property has_vector: |     property has_vector: | ||||||
|  |         """A boolean value indicating whether a word vector is associated with | ||||||
|  |         the object. | ||||||
|  | 
 | ||||||
|  |         RETURNS (bool): Whether a word vector is associated with the object. | ||||||
|  |         """ | ||||||
|         def __get__(self): |         def __get__(self): | ||||||
|             cdef int i |             cdef int i | ||||||
|             for i in range(self.vocab.vectors_length): |             for i in range(self.vocab.vectors_length): | ||||||
|  | @ -149,6 +144,10 @@ cdef class Lexeme: | ||||||
|                 return False |                 return False | ||||||
| 
 | 
 | ||||||
|     property vector_norm: |     property vector_norm: | ||||||
|  |         """The L2 norm of the lexeme's vector representation. | ||||||
|  | 
 | ||||||
|  |         RETURNS (float): The L2 norm of the vector representation. | ||||||
|  |         """ | ||||||
|         def __get__(self): |         def __get__(self): | ||||||
|             return self.c.l2_norm |             return self.c.l2_norm | ||||||
| 
 | 
 | ||||||
|  | @ -156,6 +155,11 @@ cdef class Lexeme: | ||||||
|             self.c.l2_norm = value |             self.c.l2_norm = value | ||||||
| 
 | 
 | ||||||
|     property vector: |     property vector: | ||||||
|  |         """A real-valued meaning representation. | ||||||
|  | 
 | ||||||
|  |         RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array | ||||||
|  |             representing the lexeme's semantics. | ||||||
|  |         """ | ||||||
|         def __get__(self): |         def __get__(self): | ||||||
|             cdef int length = self.vocab.vectors_length |             cdef int length = self.vocab.vectors_length | ||||||
|             if length == 0: |             if length == 0: | ||||||
|  | @ -196,6 +200,14 @@ cdef class Lexeme: | ||||||
|         def __get__(self): |         def __get__(self): | ||||||
|             return self.vocab.strings[self.c.orth] |             return self.vocab.strings[self.c.orth] | ||||||
| 
 | 
 | ||||||
|  |     property text: | ||||||
|  |         """A unicode representation of the token text. | ||||||
|  | 
 | ||||||
|  |         RETURNS (unicode): The original verbatim text of the token. | ||||||
|  |         """ | ||||||
|  |         def __get__(self): | ||||||
|  |             return self.orth_ | ||||||
|  | 
 | ||||||
|     property lower: |     property lower: | ||||||
|         def __get__(self): return self.c.lower |         def __get__(self): return self.c.lower | ||||||
|         def __set__(self, int x): self.c.lower = x |         def __set__(self, int x): self.c.lower = x | ||||||
|  |  | ||||||
|  | @ -2,7 +2,154 @@ | ||||||
| 
 | 
 | ||||||
| include ../../_includes/_mixins | include ../../_includes/_mixins | ||||||
| 
 | 
 | ||||||
| p An entry in the vocabulary. | p | ||||||
|  |     |  An entry in the vocabulary. A #[code Lexeme] has no string context – it's | ||||||
|  |     |  a word-type, as opposed to a word token. It therefore has no | ||||||
|  |     |  part-of-speech tag, dependency parse, or lemma (if lemmatization depends | ||||||
|  |     |  on the part-of-speech tag). | ||||||
|  | 
 | ||||||
|  | +h(2, "init") Lexeme.__init__ | ||||||
|  |     +tag method | ||||||
|  | 
 | ||||||
|  | p Create a #[code Lexeme] object. | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code vocab] | ||||||
|  |         +cell #[code Vocab] | ||||||
|  |         +cell The parent vocabulary. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code orth] | ||||||
|  |         +cell int | ||||||
|  |         +cell The orth id of the lexeme. | ||||||
|  | 
 | ||||||
|  |     +footrow | ||||||
|  |         +cell returns | ||||||
|  |         +cell #[code Lexeme] | ||||||
|  |         +cell The newly constructed object. | ||||||
|  | 
 | ||||||
|  | +h(2, "set_flag") Lexeme.set_flag | ||||||
|  |     +tag method | ||||||
|  | 
 | ||||||
|  | p Change the value of a boolean flag. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     COOL_FLAG = nlp.vocab.add_flag(lambda text: False) | ||||||
|  |     nlp.vocab[u'spaCy'].set_flag(COOL_FLAG, True) | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code flag_id] | ||||||
|  |         +cell int | ||||||
|  |         +cell The attribute ID of the flag to set. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code value] | ||||||
|  |         +cell bool | ||||||
|  |         +cell The new value of the flag. | ||||||
|  | 
 | ||||||
|  | +h(2, "check_flag") Lexeme.check_flag | ||||||
|  |     +tag method | ||||||
|  | 
 | ||||||
|  | p Check the value of a boolean flag. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     is_my_library = lambda text: text in ['spaCy', 'Thinc'] | ||||||
|  |     MY_LIBRARY = nlp.vocab.add_flag(is_my_library) | ||||||
|  |     assert nlp.vocab[u'spaCy'].check_flag(MY_LIBRARY) == True | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code flag_id] | ||||||
|  |         +cell int | ||||||
|  |         +cell The attribute ID of the flag to query. | ||||||
|  | 
 | ||||||
|  |     +footrow | ||||||
|  |         +cell returns | ||||||
|  |         +cell bool | ||||||
|  |         +cell The value of the flag. | ||||||
|  | 
 | ||||||
|  | +h(2, "similarity") Lexeme.similarity | ||||||
|  |     +tag method | ||||||
|  |     +tag-model("vectors") | ||||||
|  | 
 | ||||||
|  | p Compute a semantic similarity estimate. Defaults to cosine over vectors. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     apple = nlp.vocab[u'apple'] | ||||||
|  |     orange = nlp.vocab[u'orange'] | ||||||
|  |     apple_orange = apple.similarity(orange) | ||||||
|  |     orange_apple = orange.similarity(apple) | ||||||
|  |     assert apple_orange == orange_apple | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell other | ||||||
|  |         +cell - | ||||||
|  |         +cell | ||||||
|  |             |  The object to compare with. By default, accepts #[code Doc], | ||||||
|  |             |  #[code Span], #[code Token] and #[code Lexeme] objects. | ||||||
|  | 
 | ||||||
|  |     +footrow | ||||||
|  |         +cell returns | ||||||
|  |         +cell float | ||||||
|  |         +cell A scalar similarity score. Higher is more similar. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | +h(2, "has_vector") Lexeme.has_vector | ||||||
|  |     +tag property | ||||||
|  |     +tag-model("vectors") | ||||||
|  | 
 | ||||||
|  | p | ||||||
|  |     |  A boolean value indicating whether a word vector is associated with the | ||||||
|  |     |  lexeme. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     apple = nlp.vocab[u'apple'] | ||||||
|  |     assert apple.has_vector | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +footrow | ||||||
|  |         +cell returns | ||||||
|  |         +cell bool | ||||||
|  |         +cell Whether the lexeme has a vector data attached. | ||||||
|  | 
 | ||||||
|  | +h(2, "vector") Lexeme.vector | ||||||
|  |     +tag property | ||||||
|  |     +tag-model("vectors") | ||||||
|  | 
 | ||||||
|  | p A real-valued meaning representation. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     apple = nlp.vocab[u'apple'] | ||||||
|  |     assert apple.vector.dtype == 'float32' | ||||||
|  |     assert apple.vector.shape == (300,) | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +footrow | ||||||
|  |         +cell returns | ||||||
|  |         +cell #[code numpy.ndarray[ndim=1, dtype='float32']] | ||||||
|  |         +cell A 1D numpy array representing the lexeme's semantics. | ||||||
|  | 
 | ||||||
|  | +h(2, "vector_norm") Lexeme.vector_norm | ||||||
|  |     +tag property | ||||||
|  |     +tag-model("vectors") | ||||||
|  | 
 | ||||||
|  | p The L2 norm of the lexeme's vector representation. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     apple = nlp.vocab[u'apple'] | ||||||
|  |     pasta = nlp.vocab[u'pasta'] | ||||||
|  |     apple.vector_norm # 7.1346845626831055 | ||||||
|  |     pasta.vector_norm # 7.759851932525635 | ||||||
|  |     assert apple.vector_norm != pasta.vector_norm | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +footrow | ||||||
|  |         +cell returns | ||||||
|  |         +cell float | ||||||
|  |         +cell The L2 norm of the vector representation. | ||||||
| 
 | 
 | ||||||
| +h(2, "attributes") Attributes | +h(2, "attributes") Attributes | ||||||
| 
 | 
 | ||||||
|  | @ -12,6 +159,16 @@ p An entry in the vocabulary. | ||||||
|         +cell #[code Vocab] |         +cell #[code Vocab] | ||||||
|         +cell |         +cell | ||||||
| 
 | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code text] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell Verbatim text content. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code lex_id] | ||||||
|  |         +cell int | ||||||
|  |         +cell ID of the lexeme's lexical type. | ||||||
|  | 
 | ||||||
|     +row |     +row | ||||||
|         +cell #[code lower] |         +cell #[code lower] | ||||||
|         +cell int |         +cell int | ||||||
|  | @ -124,116 +281,9 @@ p An entry in the vocabulary. | ||||||
|     +row |     +row | ||||||
|         +cell #[code prob] |         +cell #[code prob] | ||||||
|         +cell float |         +cell float | ||||||
|         +cell Smoothed log probability estimate of token's type. |         +cell Smoothed log probability estimate of lexeme's type. | ||||||
| 
 | 
 | ||||||
|     +row |     +row | ||||||
|         +cell #[code sentiment] |         +cell #[code sentiment] | ||||||
|         +cell float |         +cell float | ||||||
|         +cell A scalar value indicating the positivity or negativity of the token. |         +cell A scalar value indicating the positivity or negativity of the lexeme. | ||||||
|     +row |  | ||||||
|         +cell #[code lex_id] |  | ||||||
|         +cell int |  | ||||||
|         +cell ID of the token's lexical type. |  | ||||||
| 
 |  | ||||||
|     +row |  | ||||||
|         +cell #[code text] |  | ||||||
|         +cell unicode |  | ||||||
|         +cell Verbatim text content. |  | ||||||
| 
 |  | ||||||
| +h(2, "init") Lexeme.__init__ |  | ||||||
|     +tag method |  | ||||||
| 
 |  | ||||||
| p Create a #[code Lexeme] object. |  | ||||||
| 
 |  | ||||||
| +table(["Name", "Type", "Description"]) |  | ||||||
|     +row |  | ||||||
|         +cell #[code vocab] |  | ||||||
|         +cell #[code Vocab] |  | ||||||
|         +cell The parent vocabulary. |  | ||||||
| 
 |  | ||||||
|     +row |  | ||||||
|         +cell #[code orth] |  | ||||||
|         +cell int |  | ||||||
|         +cell The orth id of the lexeme. |  | ||||||
| 
 |  | ||||||
|     +footrow |  | ||||||
|         +cell returns |  | ||||||
|         +cell #[code Lexeme] |  | ||||||
|         +cell The newly constructed object. |  | ||||||
| 
 |  | ||||||
| +h(2, "set_flag") Lexeme.set_flag |  | ||||||
|     +tag method |  | ||||||
| 
 |  | ||||||
| p Change the value of a boolean flag. |  | ||||||
| 
 |  | ||||||
| +table(["Name", "Type", "Description"]) |  | ||||||
|     +row |  | ||||||
|         +cell #[code flag_id] |  | ||||||
|         +cell int |  | ||||||
|         +cell The attribute ID of the flag to set. |  | ||||||
| 
 |  | ||||||
|     +row |  | ||||||
|         +cell #[code value] |  | ||||||
|         +cell bool |  | ||||||
|         +cell The new value of the flag. |  | ||||||
| 
 |  | ||||||
|     +footrow |  | ||||||
|         +cell returns |  | ||||||
|         +cell #[code None] |  | ||||||
|         +cell - |  | ||||||
| 
 |  | ||||||
| +h(2, "check_flag") Lexeme.check_flag |  | ||||||
|     +tag method |  | ||||||
| 
 |  | ||||||
| p Check the value of a boolean flag. |  | ||||||
| 
 |  | ||||||
| +table(["Name", "Type", "Description"]) |  | ||||||
|     +row |  | ||||||
|         +cell #[code flag_id] |  | ||||||
|         +cell int |  | ||||||
|         +cell The attribute ID of the flag to query. |  | ||||||
| 
 |  | ||||||
|     +footrow |  | ||||||
|         +cell returns |  | ||||||
|         +cell bool |  | ||||||
|         +cell The value of the flag. |  | ||||||
| 
 |  | ||||||
| +h(2, "similarity") Lexeme.similarity |  | ||||||
|     +tag method |  | ||||||
| 
 |  | ||||||
| p Compute a semantic similarity estimate. Defaults to cosine over vectors. |  | ||||||
| 
 |  | ||||||
| +table(["Name", "Type", "Description"]) |  | ||||||
|     +row |  | ||||||
|         +cell #[code other] |  | ||||||
|         +cell - |  | ||||||
|         +cell |  | ||||||
|             |  The object to compare with. By default, accepts #[code Doc], |  | ||||||
|             |  #[code Span], #[code Token] and #[code Lexeme] objects. |  | ||||||
| 
 |  | ||||||
|     +footrow |  | ||||||
|         +cell returns |  | ||||||
|         +cell float |  | ||||||
|         +cell A scalar similarity score. Higher is more similar. |  | ||||||
| 
 |  | ||||||
| +h(2, "vector") Lexeme.vector |  | ||||||
|     +tag property |  | ||||||
| 
 |  | ||||||
| p A real-valued meaning representation. |  | ||||||
| 
 |  | ||||||
| +table(["Name", "Type", "Description"]) |  | ||||||
|     +footrow |  | ||||||
|         +cell returns |  | ||||||
|         +cell #[code numpy.ndarray[ndim=1, dtype='float32']] |  | ||||||
|         +cell A real-valued meaning representation. |  | ||||||
| 
 |  | ||||||
| +h(2, "has_vector") Lexeme.has_vector |  | ||||||
|     +tag property |  | ||||||
| 
 |  | ||||||
| p A boolean value indicating whether a word vector is associated with the object. |  | ||||||
| 
 |  | ||||||
| +table(["Name", "Type", "Description"]) |  | ||||||
|     +footrow |  | ||||||
|         +cell returns |  | ||||||
|         +cell bool |  | ||||||
|         +cell Whether a word vector is associated with the object. |  | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user