mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Pass extensions into Underscore class
This commit is contained in:
parent
1289129fd9
commit
668a0ea640
|
@ -30,7 +30,7 @@ from ..util import normalize_slice
|
||||||
from ..compat import is_config
|
from ..compat import is_config
|
||||||
from .. import about
|
from .. import about
|
||||||
from .. import util
|
from .. import util
|
||||||
|
from .underscore import Underscore
|
||||||
|
|
||||||
DEF PADDING = 5
|
DEF PADDING = 5
|
||||||
|
|
||||||
|
@ -64,6 +64,7 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil:
|
||||||
else:
|
else:
|
||||||
return Lexeme.get_struct_attr(token.lex, feat_name)
|
return Lexeme.get_struct_attr(token.lex, feat_name)
|
||||||
|
|
||||||
|
|
||||||
def _get_chunker(lang):
|
def _get_chunker(lang):
|
||||||
try:
|
try:
|
||||||
cls = util.get_lang_class(lang)
|
cls = util.get_lang_class(lang)
|
||||||
|
@ -73,6 +74,7 @@ def _get_chunker(lang):
|
||||||
return None
|
return None
|
||||||
return cls.Defaults.syntax_iterators.get(u'noun_chunks')
|
return cls.Defaults.syntax_iterators.get(u'noun_chunks')
|
||||||
|
|
||||||
|
|
||||||
cdef class Doc:
|
cdef class Doc:
|
||||||
"""A sequence of Token objects. Access sentences and named entities, export
|
"""A sequence of Token objects. Access sentences and named entities, export
|
||||||
annotations to numpy arrays, losslessly serialize to compressed binary strings.
|
annotations to numpy arrays, losslessly serialize to compressed binary strings.
|
||||||
|
@ -87,6 +89,21 @@ cdef class Doc:
|
||||||
>>> from spacy.tokens import Doc
|
>>> from spacy.tokens import Doc
|
||||||
>>> doc = Doc(nlp.vocab, words=[u'hello', u'world', u'!'], spaces=[True, False, False])
|
>>> doc = Doc(nlp.vocab, words=[u'hello', u'world', u'!'], spaces=[True, False, False])
|
||||||
"""
|
"""
|
||||||
|
@classmethod
|
||||||
|
def set_extension(cls, name, default=None, method=None,
|
||||||
|
getter=None, setter=None):
|
||||||
|
nr_defined = sum(t is not None for t in (default, getter, setter, method))
|
||||||
|
assert nr_defined == 1
|
||||||
|
Underscore.doc_extensions[name] = (default, method, getter, setter)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_extension(cls, name):
|
||||||
|
return Underscore.doc_extensions.get(name)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def has_extension(cls, name):
|
||||||
|
return name in Underscore.doc_extensions
|
||||||
|
|
||||||
def __init__(self, Vocab vocab, words=None, spaces=None, orths_and_spaces=None):
|
def __init__(self, Vocab vocab, words=None, spaces=None, orths_and_spaces=None):
|
||||||
"""Create a Doc object.
|
"""Create a Doc object.
|
||||||
|
|
||||||
|
@ -159,6 +176,10 @@ cdef class Doc:
|
||||||
self.is_tagged = True
|
self.is_tagged = True
|
||||||
self.is_parsed = True
|
self.is_parsed = True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _(self):
|
||||||
|
return Underscore(Underscore.doc_extensions, self)
|
||||||
|
|
||||||
def __getitem__(self, object i):
|
def __getitem__(self, object i):
|
||||||
"""Get a `Token` or `Span` object.
|
"""Get a `Token` or `Span` object.
|
||||||
|
|
||||||
|
|
|
@ -17,10 +17,24 @@ from ..attrs cimport IS_PUNCT, IS_SPACE
|
||||||
from ..lexeme cimport Lexeme
|
from ..lexeme cimport Lexeme
|
||||||
from ..compat import is_config
|
from ..compat import is_config
|
||||||
from .. import about
|
from .. import about
|
||||||
|
from .underscore import Underscore
|
||||||
|
|
||||||
|
|
||||||
cdef class Span:
|
cdef class Span:
|
||||||
"""A slice from a Doc object."""
|
"""A slice from a Doc object."""
|
||||||
|
@classmethod
|
||||||
|
def set_extension(cls, name, default=None, method=None,
|
||||||
|
getter=None, setter=None):
|
||||||
|
Underscore.span_extensions[name] = (default, method, getter, setter)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_extension(cls, name):
|
||||||
|
return Underscore.span_extensions.get(name)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def has_extension(cls, name):
|
||||||
|
return name in Underscore.span_extensions
|
||||||
|
|
||||||
def __cinit__(self, Doc doc, int start, int end, attr_t label=0, vector=None,
|
def __cinit__(self, Doc doc, int start, int end, attr_t label=0, vector=None,
|
||||||
vector_norm=None):
|
vector_norm=None):
|
||||||
"""Create a `Span` object from the slice `doc[start : end]`.
|
"""Create a `Span` object from the slice `doc[start : end]`.
|
||||||
|
@ -111,6 +125,11 @@ cdef class Span:
|
||||||
for i in range(self.start, self.end):
|
for i in range(self.start, self.end):
|
||||||
yield self.doc[i]
|
yield self.doc[i]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _(self):
|
||||||
|
return Underscore(Underscore.span_extensions, self,
|
||||||
|
start=self.start_char, end=self.end_char)
|
||||||
|
|
||||||
def merge(self, *args, **attributes):
|
def merge(self, *args, **attributes):
|
||||||
"""Retokenize the document, such that the span is merged into a single
|
"""Retokenize the document, such that the span is merged into a single
|
||||||
token.
|
token.
|
||||||
|
|
|
@ -20,10 +20,24 @@ from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUST
|
||||||
from ..attrs cimport LEMMA, POS, TAG, DEP
|
from ..attrs cimport LEMMA, POS, TAG, DEP
|
||||||
from ..compat import is_config
|
from ..compat import is_config
|
||||||
from .. import about
|
from .. import about
|
||||||
|
from .underscore import Underscore
|
||||||
|
|
||||||
|
|
||||||
cdef class Token:
|
cdef class Token:
|
||||||
"""An individual token – i.e. a word, punctuation symbol, whitespace, etc."""
|
"""An individual token – i.e. a word, punctuation symbol, whitespace, etc."""
|
||||||
|
@classmethod
|
||||||
|
def set_extension(cls, name, default=None, method=None,
|
||||||
|
getter=None, setter=None):
|
||||||
|
Underscore.span_extensions[name] = (default, method, getter, setter)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_extension(cls, name):
|
||||||
|
return Underscore.span_extensions.get(name)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def has_extension(cls, name):
|
||||||
|
return name in Underscore.span_extensions
|
||||||
|
|
||||||
def __cinit__(self, Vocab vocab, Doc doc, int offset):
|
def __cinit__(self, Vocab vocab, Doc doc, int offset):
|
||||||
"""Construct a `Token` object.
|
"""Construct a `Token` object.
|
||||||
|
|
||||||
|
@ -87,6 +101,11 @@ cdef class Token:
|
||||||
else:
|
else:
|
||||||
raise ValueError(op)
|
raise ValueError(op)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _(self):
|
||||||
|
return Underscore(Underscore.token_extensions, self,
|
||||||
|
start=self.idx, end=None)
|
||||||
|
|
||||||
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
|
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
|
||||||
"""Check the value of a boolean flag.
|
"""Check the value of a boolean flag.
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
class Undercore(object):
|
class Underscore(object):
|
||||||
doc_extensions = {}
|
doc_extensions = {}
|
||||||
span_extensions = {}
|
span_extensions = {}
|
||||||
token_extensions = {}
|
token_extensions = {}
|
||||||
|
|
||||||
def __init__(self, obj, start=None, end=None):
|
def __init__(self, extensions, obj, start=None, end=None):
|
||||||
|
object.__setattr__(self, '_extensions', extensions)
|
||||||
object.__setattr__(self, '_obj', obj)
|
object.__setattr__(self, '_obj', obj)
|
||||||
# Assumption is that for doc values, _start and _end will both be None
|
# Assumption is that for doc values, _start and _end will both be None
|
||||||
# Span will set non-None values for _start and _end
|
# Span will set non-None values for _start and _end
|
||||||
|
@ -12,23 +13,23 @@ class Undercore(object):
|
||||||
# (see _get_key), and lets us use a single Underscore class.
|
# (see _get_key), and lets us use a single Underscore class.
|
||||||
object.__setattr__(self, '_doc', obj.doc)
|
object.__setattr__(self, '_doc', obj.doc)
|
||||||
object.__setattr__(self, '_start', start)
|
object.__setattr__(self, '_start', start)
|
||||||
object.__setattr__(self, '_end', start)
|
object.__setattr__(self, '_end', end)
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
if name not in self.__class__.extensions:
|
if name not in self._extensions:
|
||||||
raise AttributeError(name)
|
raise AttributeError(name)
|
||||||
default, method, getter, setter = self.__class__.extensions[name]
|
default, method, getter, setter = self._extensions[name]
|
||||||
if getter is not None:
|
if getter is not None:
|
||||||
return getter(self._obj)
|
return getter(self._obj)
|
||||||
elif method is not None:
|
elif method is not None:
|
||||||
return method)
|
return method
|
||||||
else:
|
else:
|
||||||
return self._doc.user_data.get(self._get_key(name), default)
|
return self._doc.user_data.get(self._get_key(name), default)
|
||||||
|
|
||||||
def __setattr__(self, name, value):
|
def __setattr__(self, name, value):
|
||||||
if name not in self.__class__.extensions:
|
if name not in self._extensions:
|
||||||
raise AttributeError(name)
|
raise AttributeError(name)
|
||||||
default, method, getter, setter = self.__class__.extensions[name]
|
default, method, getter, setter = self._extensions[name]
|
||||||
if setter is not None:
|
if setter is not None:
|
||||||
return setter(self._obj, value)
|
return setter(self._obj, value)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user