diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index c8e0f2965..3564b6e42 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -163,7 +163,7 @@ cdef class Lexeme: self.vocab.set_vector(self.c.orth, vector) property rank: - """RETURNS (str): Sequential ID of the lexemes's lexical type, used + """RETURNS (str): Sequential ID of the lexeme's lexical type, used to index into tables, e.g. for word vectors.""" def __get__(self): return self.c.id @@ -205,7 +205,7 @@ cdef class Lexeme: self.c.lower = x property norm: - """RETURNS (uint64): The lexemes's norm, i.e. a normalised form of the + """RETURNS (uint64): The lexeme's norm, i.e. a normalised form of the lexeme text. """ def __get__(self): @@ -288,7 +288,7 @@ cdef class Lexeme: self.c.lower = self.vocab.strings.add(x) property norm_: - """RETURNS (str): The lexemes's norm, i.e. a normalised form of the + """RETURNS (str): The lexeme's norm, i.e. a normalised form of the lexeme text. """ def __get__(self): diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 605108a7b..3fcfda691 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -329,7 +329,7 @@ cdef class Token: @property def shape(self): """RETURNS (uint64): ID of the token's shape, a transform of the - tokens's string, to show orthographic features (e.g. "Xxxx", "dd"). + token's string, to show orthographic features (e.g. "Xxxx", "dd"). """ return self.c.lex.shape @@ -825,7 +825,7 @@ cdef class Token: @property def shape_(self): - """RETURNS (str): Transform of the tokens's string, to show + """RETURNS (str): Transform of the token's string, to show orthographic features. For example, "Xxxx" or "dd". """ return self.vocab.strings[self.c.lex.shape] diff --git a/website/docs/api/lexeme.md b/website/docs/api/lexeme.md index c99f19482..c5d4b7544 100644 --- a/website/docs/api/lexeme.md +++ b/website/docs/api/lexeme.md @@ -127,14 +127,14 @@ The L2 norm of the lexeme's vector representation. | `text` | Verbatim text content. ~~str~~ | | `orth` | ID of the verbatim text content. ~~int~~ | | `orth_` | Verbatim text content (identical to `Lexeme.text`). Exists mostly for consistency with the other attributes. ~~str~~ | -| `rank` | Sequential ID of the lexemes's lexical type, used to index into tables, e.g. for word vectors. ~~int~~ | +| `rank` | Sequential ID of the lexeme's lexical type, used to index into tables, e.g. for word vectors. ~~int~~ | | `flags` | Container of the lexeme's binary flags. ~~int~~ | -| `norm` | The lexemes's norm, i.e. a normalized form of the lexeme text. ~~int~~ | -| `norm_` | The lexemes's norm, i.e. a normalized form of the lexeme text. ~~str~~ | +| `norm` | The lexeme's norm, i.e. a normalized form of the lexeme text. ~~int~~ | +| `norm_` | The lexeme's norm, i.e. a normalized form of the lexeme text. ~~str~~ | | `lower` | Lowercase form of the word. ~~int~~ | | `lower_` | Lowercase form of the word. ~~str~~ | -| `shape` | Transform of the words's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. ~~int~~ | -| `shape_` | Transform of the word's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. ~~str~~ | +| `shape` | Transform of the word's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. ~~int~~ | +| `shape_` | Transform of the word's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. ~~str~~ | | `prefix` | Length-N substring from the start of the word. Defaults to `N=1`. ~~int~~ | | `prefix_` | Length-N substring from the start of the word. Defaults to `N=1`. ~~str~~ | | `suffix` | Length-N substring from the end of the word. Defaults to `N=3`. ~~int~~ | diff --git a/website/docs/api/token.md b/website/docs/api/token.md index ecf7bcc8e..c272b1fce 100644 --- a/website/docs/api/token.md +++ b/website/docs/api/token.md @@ -431,7 +431,7 @@ The L2 norm of the token's vector representation. | `orth` | ID of the verbatim text content. ~~int~~ | | `orth_` | Verbatim text content (identical to `Token.text`). Exists mostly for consistency with the other attributes. ~~str~~ | | `vocab` | The vocab object of the parent `Doc`. ~~vocab~~ | -| `tensor` 2.1.7 | The tokens's slice of the parent `Doc`'s tensor. ~~numpy.ndarray~~ | +| `tensor` 2.1.7 | The token's slice of the parent `Doc`'s tensor. ~~numpy.ndarray~~ | | `head` | The syntactic parent, or "governor", of this token. ~~Token~~ | | `left_edge` | The leftmost token of this token's syntactic descendants. ~~Token~~ | | `right_edge` | The rightmost token of this token's syntactic descendants. ~~Token~~ | @@ -450,8 +450,8 @@ The L2 norm of the token's vector representation. | `norm_` | The token's norm, i.e. a normalized form of the token text. Can be set in the language's [tokenizer exceptions](/usage/linguistic-features#language-data). ~~str~~ | | `lower` | Lowercase form of the token. ~~int~~ | | `lower_` | Lowercase form of the token text. Equivalent to `Token.text.lower()`. ~~str~~ | -| `shape` | Transform of the tokens's string to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. ~~int~~ | -| `shape_` | Transform of the tokens's string to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. ~~str~~ | +| `shape` | Transform of the token's string to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. ~~int~~ | +| `shape_` | Transform of the token's string to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. ~~str~~ | | `prefix` | Hash value of a length-N substring from the start of the token. Defaults to `N=1`. ~~int~~ | | `prefix_` | A length-N substring from the start of the token. Defaults to `N=1`. ~~str~~ | | `suffix` | Hash value of a length-N substring from the end of the token. Defaults to `N=3`. ~~int~~ | diff --git a/website/docs/usage/models.md b/website/docs/usage/models.md index c661c8f15..6b5b33c30 100644 --- a/website/docs/usage/models.md +++ b/website/docs/usage/models.md @@ -382,7 +382,7 @@ doc = nlp("This is a sentence.") You can use the [`info`](/api/cli#info) command or [`spacy.info()`](/api/top-level#spacy.info) method to print a pipeline -packages's meta data before loading it. Each `Language` object with a loaded +package's meta data before loading it. Each `Language` object with a loaded pipeline also exposes the pipeline's meta data as the attribute `meta`. For example, `nlp.meta['version']` will return the package version.