mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
parent
cbc2cee2c8
commit
573e543e4a
|
@ -375,7 +375,7 @@ cdef class Lexeme:
|
||||||
Lexeme.c_set_flag(self.c, IS_STOP, x)
|
Lexeme.c_set_flag(self.c, IS_STOP, x)
|
||||||
|
|
||||||
property is_alpha:
|
property is_alpha:
|
||||||
"""RETURNS (bool): Whether the lexeme consists of alphanumeric
|
"""RETURNS (bool): Whether the lexeme consists of alphabetic
|
||||||
characters. Equivalent to `lexeme.text.isalpha()`.
|
characters. Equivalent to `lexeme.text.isalpha()`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
|
|
|
@ -111,7 +111,7 @@ TOKEN_PATTERN_SCHEMA = {
|
||||||
"$ref": "#/definitions/integer_value",
|
"$ref": "#/definitions/integer_value",
|
||||||
},
|
},
|
||||||
"IS_ALPHA": {
|
"IS_ALPHA": {
|
||||||
"title": "Token consists of alphanumeric characters",
|
"title": "Token consists of alphabetic characters",
|
||||||
"$ref": "#/definitions/boolean_value",
|
"$ref": "#/definitions/boolean_value",
|
||||||
},
|
},
|
||||||
"IS_ASCII": {
|
"IS_ASCII": {
|
||||||
|
|
|
@ -163,7 +163,7 @@ rule-based matching are:
|
||||||
| `TEXT` <Tag variant="new">2.1</Tag> | unicode | The exact verbatim text of a token. |
|
| `TEXT` <Tag variant="new">2.1</Tag> | unicode | The exact verbatim text of a token. |
|
||||||
| `LOWER` | unicode | The lowercase form of the token text. |
|
| `LOWER` | unicode | The lowercase form of the token text. |
|
||||||
| `LENGTH` | int | The length of the token text. |
|
| `LENGTH` | int | The length of the token text. |
|
||||||
| `IS_ALPHA`, `IS_ASCII`, `IS_DIGIT` | bool | Token text consists of alphanumeric characters, ASCII characters, digits. |
|
| `IS_ALPHA`, `IS_ASCII`, `IS_DIGIT` | bool | Token text consists of alphabetic characters, ASCII characters, digits. |
|
||||||
| `IS_LOWER`, `IS_UPPER`, `IS_TITLE` | bool | Token text is in lowercase, uppercase, titlecase. |
|
| `IS_LOWER`, `IS_UPPER`, `IS_TITLE` | bool | Token text is in lowercase, uppercase, titlecase. |
|
||||||
| `IS_PUNCT`, `IS_SPACE`, `IS_STOP` | bool | Token is punctuation, whitespace, stop word. |
|
| `IS_PUNCT`, `IS_SPACE`, `IS_STOP` | bool | Token is punctuation, whitespace, stop word. |
|
||||||
| `LIKE_NUM`, `LIKE_URL`, `LIKE_EMAIL` | bool | Token text resembles a number, URL, email. |
|
| `LIKE_NUM`, `LIKE_URL`, `LIKE_EMAIL` | bool | Token text resembles a number, URL, email. |
|
||||||
|
|
|
@ -573,7 +573,7 @@ apple = doc[0]
|
||||||
print("Fine-grained POS tag", apple.pos_, apple.pos)
|
print("Fine-grained POS tag", apple.pos_, apple.pos)
|
||||||
print("Coarse-grained POS tag", apple.tag_, apple.tag)
|
print("Coarse-grained POS tag", apple.tag_, apple.tag)
|
||||||
print("Word shape", apple.shape_, apple.shape)
|
print("Word shape", apple.shape_, apple.shape)
|
||||||
print("Alphanumeric characters?", apple.is_alpha)
|
print("Alphabetic characters?", apple.is_alpha)
|
||||||
print("Punctuation mark?", apple.is_punct)
|
print("Punctuation mark?", apple.is_punct)
|
||||||
|
|
||||||
billion = doc[10]
|
billion = doc[10]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user