import copy import functools from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union from ..errors import Errors if TYPE_CHECKING: from .doc import Doc from .span import Span from .token import Token class Underscore: mutable_types = (dict, list, set) doc_extensions: Dict[Any, Any] = {} span_extensions: Dict[Any, Any] = {} token_extensions: Dict[Any, Any] = {} _extensions: Dict[str, Any] _obj: Union["Doc", "Span", "Token"] _start: Optional[int] _end: Optional[int] def __init__( self, extensions: Dict[str, Any], obj: Union["Doc", "Span", "Token"], start: Optional[int] = None, end: Optional[int] = None, label: int = 0, kb_id: int = 0, span_id: int = 0, ): object.__setattr__(self, "_extensions", extensions) object.__setattr__(self, "_obj", obj) # Assumption is that for doc values, _start and _end will both be None # Span will set non-None values for _start and _end # Token will have _start be non-None, _end be None # This lets us key everything into the doc.user_data dictionary, # (see _get_key), and lets us use a single Underscore class. object.__setattr__(self, "_doc", obj.doc) object.__setattr__(self, "_start", start) object.__setattr__(self, "_end", end) # We used to check if obj is a span, however, this introduces an # import cycle between the span and underscore modeles. So we # do a structural type check instead. if hasattr(obj, "id") and hasattr(obj, "label") and hasattr(obj, "kb_id"): object.__setattr__(self, "_label", label) object.__setattr__(self, "_kb_id", kb_id) object.__setattr__(self, "_span_id", span_id) def __dir__(self) -> List[str]: # Hack to enable autocomplete on custom extensions extensions = list(self._extensions.keys()) return ["set", "get", "has"] + extensions def __getattr__(self, name: str) -> Any: if name not in self._extensions: raise AttributeError(Errors.E046.format(name=name)) default, method, getter, setter = self._extensions[name] if getter is not None: return getter(self._obj) elif method is not None: method_partial = functools.partial(method, self._obj) # Hack to port over docstrings of the original function # See https://stackoverflow.com/q/27362727/6400719 method_docstring = method.__doc__ or "" method_docstring_prefix = ( "This method is a partial function and its first argument " "(the object it's called on) will be filled automatically. " ) method_partial.__doc__ = method_docstring_prefix + method_docstring return method_partial else: key = self._get_key(name) if key in self._doc.user_data: return self._doc.user_data[key] elif isinstance(default, self.mutable_types): # Handle mutable default arguments (see #2581) new_default = copy.copy(default) self.__setattr__(name, new_default) return new_default return default def __setattr__(self, name: str, value: Any): if name not in self._extensions: raise AttributeError(Errors.E047.format(name=name)) default, method, getter, setter = self._extensions[name] if setter is not None: return setter(self._obj, value) else: self._doc.user_data[self._get_key(name)] = value def set(self, name: str, value: Any): return self.__setattr__(name, value) def get(self, name: str) -> Any: return self.__getattr__(name) def has(self, name: str) -> bool: return name in self._extensions def _get_key( self, name: str ) -> Union[ Tuple[str, str, Optional[int], Optional[int]], Tuple[str, str, Optional[int], Optional[int], int, int, int], ]: if hasattr(self, "_label"): return ( "._.", name, self._start, self._end, self._label, self._kb_id, self._span_id, ) else: return "._.", name, self._start, self._end @staticmethod def _replace_keys(old_underscore: "Underscore", new_underscore: "Underscore"): """ This function is called by Span when its kb_id or label are re-assigned. It checks if any user_data is stored for this span and replaces the keys """ for name in old_underscore._extensions: old_key = old_underscore._get_key(name) old_doc = old_underscore._doc new_key = new_underscore._get_key(name) if old_key != new_key and old_key in old_doc.user_data: old_underscore._doc.user_data[ new_key ] = old_underscore._doc.user_data.pop(old_key) @classmethod def get_state(cls) -> Tuple[Dict[Any, Any], Dict[Any, Any], Dict[Any, Any]]: return cls.token_extensions, cls.span_extensions, cls.doc_extensions @classmethod def load_state( cls, state: Tuple[Dict[Any, Any], Dict[Any, Any], Dict[Any, Any]] ) -> None: cls.token_extensions, cls.span_extensions, cls.doc_extensions = state def get_ext_args(**kwargs: Any): """Validate and convert arguments. Reused in Doc, Token and Span.""" default = kwargs.get("default") getter = kwargs.get("getter") setter = kwargs.get("setter") method = kwargs.get("method") if getter is None and setter is not None: raise ValueError(Errors.E089) valid_opts = ("default" in kwargs, method is not None, getter is not None) nr_defined = sum(t is True for t in valid_opts) if nr_defined != 1: raise ValueError(Errors.E083.format(nr_defined=nr_defined)) if setter is not None and not hasattr(setter, "__call__"): raise ValueError(Errors.E091.format(name="setter", value=repr(setter))) if getter is not None and not hasattr(getter, "__call__"): raise ValueError(Errors.E091.format(name="getter", value=repr(getter))) if method is not None and not hasattr(method, "__call__"): raise ValueError(Errors.E091.format(name="method", value=repr(method))) return (default, method, getter, setter) def is_writable_attr(ext): """Check if an extension attribute is writable. ext (tuple): The (default, getter, setter, method) tuple available via {Doc,Span,Token}.get_extension. RETURNS (bool): Whether the attribute is writable. """ default, method, getter, setter = ext # Extension is writable if it has a setter (getter + setter), if it has a # default value (or, if its default value is none, none of the other values # should be set). if setter is not None or default is not None or all(e is None for e in ext): return True return False