mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
4a615cacd2
* Consolidate and freeze symbols Instead of having symbol values defined in three potentially conflicting places (`spacy.attrs`, `spacy.parts_of_speech`, `spacy.symbols`), define all symbols in `spacy.symbols` and reference those values in `spacy.attrs` and `spacy.parts_of_speech`. Remove deprecated and placeholder symbols from `spacy.attrs.IDS`. Make `spacy.attrs.NAMES` and `spacy.symbols.NAMES` reverse dicts rather than lists in order to support future use of hash values in `attr_id_t`. Minor changes: * Use `uint64_t` for attrs in `Doc.to_array` to support future use of hash values * Remove unneeded attrs filter for error message in `Doc.to_array` * Remove unused attr `SENT_END` * Handle dynamic size of attr_id_t in Doc.to_array * Undo added warnings * Refactor to make Doc.to_array more similar to Doc.from_array * Improve refactoring
50 lines
1.2 KiB
Cython
50 lines
1.2 KiB
Cython
from . cimport symbols
|
|
|
|
cdef enum attr_id_t:
|
|
NULL_ATTR = 0
|
|
IS_ALPHA = symbols.IS_ALPHA
|
|
IS_ASCII = symbols.IS_ASCII
|
|
IS_DIGIT = symbols.IS_DIGIT
|
|
IS_LOWER = symbols.IS_LOWER
|
|
IS_PUNCT = symbols.IS_PUNCT
|
|
IS_SPACE = symbols.IS_SPACE
|
|
IS_TITLE = symbols.IS_TITLE
|
|
IS_UPPER = symbols.IS_UPPER
|
|
LIKE_URL = symbols.LIKE_URL
|
|
LIKE_NUM = symbols.LIKE_NUM
|
|
LIKE_EMAIL = symbols.LIKE_EMAIL
|
|
IS_STOP = symbols.IS_STOP
|
|
IS_BRACKET = symbols.IS_BRACKET
|
|
IS_QUOTE = symbols.IS_QUOTE
|
|
IS_LEFT_PUNCT = symbols.IS_LEFT_PUNCT
|
|
IS_RIGHT_PUNCT = symbols.IS_RIGHT_PUNCT
|
|
IS_CURRENCY = symbols.IS_CURRENCY
|
|
|
|
ID = symbols.ID
|
|
ORTH = symbols.ORTH
|
|
LOWER = symbols.LOWER
|
|
NORM = symbols.NORM
|
|
SHAPE = symbols.SHAPE
|
|
PREFIX = symbols.PREFIX
|
|
SUFFIX = symbols.SUFFIX
|
|
|
|
LENGTH = symbols.LENGTH
|
|
CLUSTER = symbols.CLUSTER
|
|
LEMMA = symbols.LEMMA
|
|
POS = symbols.POS
|
|
TAG = symbols.TAG
|
|
DEP = symbols.DEP
|
|
ENT_IOB = symbols.ENT_IOB
|
|
ENT_TYPE = symbols.ENT_TYPE
|
|
HEAD = symbols.HEAD
|
|
SENT_START = symbols.SENT_START
|
|
SPACY = symbols.SPACY
|
|
PROB = symbols.PROB
|
|
|
|
LANG = symbols.LANG
|
|
ENT_KB_ID = symbols.ENT_KB_ID
|
|
MORPH = symbols.MORPH
|
|
ENT_ID = symbols.ENT_ID
|
|
|
|
IDX = symbols.IDX
|