mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Pass extensions into Underscore class
This commit is contained in:
parent
1289129fd9
commit
668a0ea640
|
@ -30,7 +30,7 @@ from ..util import normalize_slice
|
|||
from ..compat import is_config
|
||||
from .. import about
|
||||
from .. import util
|
||||
|
||||
from .underscore import Underscore
|
||||
|
||||
DEF PADDING = 5
|
||||
|
||||
|
@ -64,6 +64,7 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil:
|
|||
else:
|
||||
return Lexeme.get_struct_attr(token.lex, feat_name)
|
||||
|
||||
|
||||
def _get_chunker(lang):
|
||||
try:
|
||||
cls = util.get_lang_class(lang)
|
||||
|
@ -73,6 +74,7 @@ def _get_chunker(lang):
|
|||
return None
|
||||
return cls.Defaults.syntax_iterators.get(u'noun_chunks')
|
||||
|
||||
|
||||
cdef class Doc:
|
||||
"""A sequence of Token objects. Access sentences and named entities, export
|
||||
annotations to numpy arrays, losslessly serialize to compressed binary strings.
|
||||
|
@ -87,6 +89,21 @@ cdef class Doc:
|
|||
>>> from spacy.tokens import Doc
|
||||
>>> doc = Doc(nlp.vocab, words=[u'hello', u'world', u'!'], spaces=[True, False, False])
|
||||
"""
|
||||
@classmethod
|
||||
def set_extension(cls, name, default=None, method=None,
|
||||
getter=None, setter=None):
|
||||
nr_defined = sum(t is not None for t in (default, getter, setter, method))
|
||||
assert nr_defined == 1
|
||||
Underscore.doc_extensions[name] = (default, method, getter, setter)
|
||||
|
||||
@classmethod
|
||||
def get_extension(cls, name):
|
||||
return Underscore.doc_extensions.get(name)
|
||||
|
||||
@classmethod
|
||||
def has_extension(cls, name):
|
||||
return name in Underscore.doc_extensions
|
||||
|
||||
def __init__(self, Vocab vocab, words=None, spaces=None, orths_and_spaces=None):
|
||||
"""Create a Doc object.
|
||||
|
||||
|
@ -159,6 +176,10 @@ cdef class Doc:
|
|||
self.is_tagged = True
|
||||
self.is_parsed = True
|
||||
|
||||
@property
|
||||
def _(self):
|
||||
return Underscore(Underscore.doc_extensions, self)
|
||||
|
||||
def __getitem__(self, object i):
|
||||
"""Get a `Token` or `Span` object.
|
||||
|
||||
|
|
|
@ -17,10 +17,24 @@ from ..attrs cimport IS_PUNCT, IS_SPACE
|
|||
from ..lexeme cimport Lexeme
|
||||
from ..compat import is_config
|
||||
from .. import about
|
||||
from .underscore import Underscore
|
||||
|
||||
|
||||
cdef class Span:
|
||||
"""A slice from a Doc object."""
|
||||
@classmethod
|
||||
def set_extension(cls, name, default=None, method=None,
|
||||
getter=None, setter=None):
|
||||
Underscore.span_extensions[name] = (default, method, getter, setter)
|
||||
|
||||
@classmethod
|
||||
def get_extension(cls, name):
|
||||
return Underscore.span_extensions.get(name)
|
||||
|
||||
@classmethod
|
||||
def has_extension(cls, name):
|
||||
return name in Underscore.span_extensions
|
||||
|
||||
def __cinit__(self, Doc doc, int start, int end, attr_t label=0, vector=None,
|
||||
vector_norm=None):
|
||||
"""Create a `Span` object from the slice `doc[start : end]`.
|
||||
|
@ -111,6 +125,11 @@ cdef class Span:
|
|||
for i in range(self.start, self.end):
|
||||
yield self.doc[i]
|
||||
|
||||
@property
|
||||
def _(self):
|
||||
return Underscore(Underscore.span_extensions, self,
|
||||
start=self.start_char, end=self.end_char)
|
||||
|
||||
def merge(self, *args, **attributes):
|
||||
"""Retokenize the document, such that the span is merged into a single
|
||||
token.
|
||||
|
|
|
@ -20,10 +20,24 @@ from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUST
|
|||
from ..attrs cimport LEMMA, POS, TAG, DEP
|
||||
from ..compat import is_config
|
||||
from .. import about
|
||||
from .underscore import Underscore
|
||||
|
||||
|
||||
cdef class Token:
|
||||
"""An individual token – i.e. a word, punctuation symbol, whitespace, etc."""
|
||||
@classmethod
|
||||
def set_extension(cls, name, default=None, method=None,
|
||||
getter=None, setter=None):
|
||||
Underscore.span_extensions[name] = (default, method, getter, setter)
|
||||
|
||||
@classmethod
|
||||
def get_extension(cls, name):
|
||||
return Underscore.span_extensions.get(name)
|
||||
|
||||
@classmethod
|
||||
def has_extension(cls, name):
|
||||
return name in Underscore.span_extensions
|
||||
|
||||
def __cinit__(self, Vocab vocab, Doc doc, int offset):
|
||||
"""Construct a `Token` object.
|
||||
|
||||
|
@ -87,6 +101,11 @@ cdef class Token:
|
|||
else:
|
||||
raise ValueError(op)
|
||||
|
||||
@property
|
||||
def _(self):
|
||||
return Underscore(Underscore.token_extensions, self,
|
||||
start=self.idx, end=None)
|
||||
|
||||
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
|
||||
"""Check the value of a boolean flag.
|
||||
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
class Undercore(object):
|
||||
class Underscore(object):
|
||||
doc_extensions = {}
|
||||
span_extensions = {}
|
||||
token_extensions = {}
|
||||
|
||||
def __init__(self, obj, start=None, end=None):
|
||||
def __init__(self, extensions, obj, start=None, end=None):
|
||||
object.__setattr__(self, '_extensions', extensions)
|
||||
object.__setattr__(self, '_obj', obj)
|
||||
# Assumption is that for doc values, _start and _end will both be None
|
||||
# Span will set non-None values for _start and _end
|
||||
|
@ -12,23 +13,23 @@ class Undercore(object):
|
|||
# (see _get_key), and lets us use a single Underscore class.
|
||||
object.__setattr__(self, '_doc', obj.doc)
|
||||
object.__setattr__(self, '_start', start)
|
||||
object.__setattr__(self, '_end', start)
|
||||
object.__setattr__(self, '_end', end)
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name not in self.__class__.extensions:
|
||||
if name not in self._extensions:
|
||||
raise AttributeError(name)
|
||||
default, method, getter, setter = self.__class__.extensions[name]
|
||||
default, method, getter, setter = self._extensions[name]
|
||||
if getter is not None:
|
||||
return getter(self._obj)
|
||||
elif method is not None:
|
||||
return method)
|
||||
return method
|
||||
else:
|
||||
return self._doc.user_data.get(self._get_key(name), default)
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name not in self.__class__.extensions:
|
||||
if name not in self._extensions:
|
||||
raise AttributeError(name)
|
||||
default, method, getter, setter = self.__class__.extensions[name]
|
||||
default, method, getter, setter = self._extensions[name]
|
||||
if setter is not None:
|
||||
return setter(self._obj, value)
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue
Block a user