mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Add SENT_START attribute, for custom sentence boundary detection
This commit is contained in:
parent
fb0ff0272f
commit
d68dd1f251
|
@ -83,6 +83,7 @@ cpdef enum attr_id_t:
|
|||
ENT_IOB
|
||||
ENT_TYPE
|
||||
HEAD
|
||||
SENT_START
|
||||
SPACY
|
||||
PROB
|
||||
|
||||
|
|
|
@ -85,6 +85,7 @@ IDS = {
|
|||
"ENT_IOB": ENT_IOB,
|
||||
"ENT_TYPE": ENT_TYPE,
|
||||
"HEAD": HEAD,
|
||||
"SENT_START": SENT_START,
|
||||
"SPACY": SPACY,
|
||||
"PROB": PROB,
|
||||
"LANG": LANG,
|
||||
|
|
|
@ -82,6 +82,7 @@ cpdef enum symbol_t:
|
|||
ENT_IOB
|
||||
ENT_TYPE
|
||||
HEAD
|
||||
SENT_START
|
||||
SPACY
|
||||
PROB
|
||||
|
||||
|
|
|
@ -84,6 +84,7 @@ IDS = {
|
|||
"ENT_IOB": ENT_IOB,
|
||||
"ENT_TYPE": ENT_TYPE,
|
||||
"HEAD": HEAD,
|
||||
"SENT_START": SENT_START,
|
||||
"SPACY": SPACY,
|
||||
"PROB": PROB,
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ from ..typedefs cimport attr_t, flags_t
|
|||
from ..attrs cimport attr_id_t
|
||||
from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
||||
from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
|
||||
from ..attrs cimport SENT_START
|
||||
from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
|
||||
from ..syntax.iterators import CHUNKERS
|
||||
from ..util import normalize_slice
|
||||
|
@ -52,6 +53,8 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil:
|
|||
return token.dep
|
||||
elif feat_name == HEAD:
|
||||
return token.head
|
||||
elif feat_name == SENT_START:
|
||||
return token.sent_start
|
||||
elif feat_name == SPACY:
|
||||
return token.spacy
|
||||
elif feat_name == ENT_IOB:
|
||||
|
@ -559,6 +562,7 @@ cdef class Doc:
|
|||
for i in range(self.length):
|
||||
self.c[i] = parsed[i]
|
||||
|
||||
<<<<<<< HEAD
|
||||
def from_array(self, attrs, int[:, :] array):
|
||||
"""Load attributes from a numpy array. Write to a `Doc` object, from an
|
||||
`(M, N)` array of attributes.
|
||||
|
@ -567,6 +571,18 @@ cdef class Doc:
|
|||
array (numpy.ndarray[ndim=2, dtype='int32']) The attribute values to load.
|
||||
RETURNS (Doc): Itself.
|
||||
"""
|
||||
=======
|
||||
def from_array(self, attrs, array):
|
||||
if SENT_START in attrs and HEAD in attrs:
|
||||
raise ValueError(
|
||||
"Conflicting attributes specified in doc.from_array():\n"
|
||||
"(HEAD, SENT_START)\n"
|
||||
"The HEAD attribute currently sets sentence boundaries implicitly,\n"
|
||||
"based on the tree structure. This means the HEAD attribute would "
|
||||
"potentially override the sentence boundaries set by SENT_START.\n"
|
||||
"See https://github.com/spacy-io/spaCy/issues/235 for details and "
|
||||
"workarounds, and to propose solutions.")
|
||||
>>>>>>> 45ad8684... * Add SENT_START attribute
|
||||
cdef int i, col
|
||||
cdef attr_id_t attr_id
|
||||
cdef TokenC* tokens = self.c
|
||||
|
|
Loading…
Reference in New Issue
Block a user