# cython: profile=False from .errors import Errors IOB_STRINGS = ("", "I", "O", "B") IDS = { "": NULL_ATTR, "IS_ALPHA": IS_ALPHA, "IS_ASCII": IS_ASCII, "IS_DIGIT": IS_DIGIT, "IS_LOWER": IS_LOWER, "IS_PUNCT": IS_PUNCT, "IS_SPACE": IS_SPACE, "IS_TITLE": IS_TITLE, "IS_UPPER": IS_UPPER, "LIKE_URL": LIKE_URL, "LIKE_NUM": LIKE_NUM, "LIKE_EMAIL": LIKE_EMAIL, "IS_STOP": IS_STOP, "IS_BRACKET": IS_BRACKET, "IS_QUOTE": IS_QUOTE, "IS_LEFT_PUNCT": IS_LEFT_PUNCT, "IS_RIGHT_PUNCT": IS_RIGHT_PUNCT, "IS_CURRENCY": IS_CURRENCY, "ID": ID, "ORTH": ORTH, "LOWER": LOWER, "NORM": NORM, "SHAPE": SHAPE, "PREFIX": PREFIX, "SUFFIX": SUFFIX, "LENGTH": LENGTH, "LEMMA": LEMMA, "POS": POS, "TAG": TAG, "DEP": DEP, "ENT_IOB": ENT_IOB, "ENT_TYPE": ENT_TYPE, "ENT_ID": ENT_ID, "ENT_KB_ID": ENT_KB_ID, "HEAD": HEAD, "SENT_START": SENT_START, "SPACY": SPACY, "LANG": LANG, "MORPH": MORPH, "IDX": IDX, } NAMES = {v: k for k, v in IDS.items()} locals().update(IDS) def intify_attrs(stringy_attrs, strings_map=None): """ Normalize a dictionary of attributes, converting them to ints. stringy_attrs (dict): Dictionary keyed by attribute string names. Values can be ints or strings. strings_map (StringStore): Defaults to None. If provided, encodes string values into ints. RETURNS (dict): Attributes dictionary with keys and optionally values converted to ints. """ inty_attrs = {} for name, value in stringy_attrs.items(): int_key = intify_attr(name) if int_key is not None: if int_key == ENT_IOB: if value in IOB_STRINGS: value = IOB_STRINGS.index(value) elif isinstance(value, str): raise ValueError(Errors.E1025.format(value=value)) if strings_map is not None and isinstance(value, str): if hasattr(strings_map, "add"): value = strings_map.add(value) else: value = strings_map[value] inty_attrs[int_key] = value return inty_attrs def intify_attr(name): """ Normalize an attribute name, converting it to int. stringy_attr (string): Attribute string name. Can also be int (will then be left unchanged) RETURNS (int): int representation of the attribute, or None if it couldn't be converted. """ if isinstance(name, int): return name elif name in IDS: return IDS[name] elif name.upper() in IDS: return IDS[name.upper()] return None