mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Add support for sent_start to GoldParse
This commit is contained in:
parent
44589fb38c
commit
4bb6bc3f9e
|
@ -9,6 +9,7 @@ cdef struct GoldParseC:
|
||||||
int* tags
|
int* tags
|
||||||
int* heads
|
int* heads
|
||||||
int* has_dep
|
int* has_dep
|
||||||
|
int* sent_start
|
||||||
attr_t* labels
|
attr_t* labels
|
||||||
int** brackets
|
int** brackets
|
||||||
Transition* ner
|
Transition* ner
|
||||||
|
|
|
@ -426,6 +426,7 @@ cdef class GoldParse:
|
||||||
self.c.heads = <int*>self.mem.alloc(len(doc), sizeof(int))
|
self.c.heads = <int*>self.mem.alloc(len(doc), sizeof(int))
|
||||||
self.c.labels = <attr_t*>self.mem.alloc(len(doc), sizeof(attr_t))
|
self.c.labels = <attr_t*>self.mem.alloc(len(doc), sizeof(attr_t))
|
||||||
self.c.has_dep = <int*>self.mem.alloc(len(doc), sizeof(int))
|
self.c.has_dep = <int*>self.mem.alloc(len(doc), sizeof(int))
|
||||||
|
self.c.sent_start = <int*>self.mem.alloc(len(doc), sizeof(int))
|
||||||
self.c.ner = <Transition*>self.mem.alloc(len(doc), sizeof(Transition))
|
self.c.ner = <Transition*>self.mem.alloc(len(doc), sizeof(Transition))
|
||||||
|
|
||||||
self.cats = list(cats)
|
self.cats = list(cats)
|
||||||
|
@ -482,6 +483,10 @@ cdef class GoldParse:
|
||||||
"""
|
"""
|
||||||
return not nonproj.is_nonproj_tree(self.heads)
|
return not nonproj.is_nonproj_tree(self.heads)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def sent_starts(self):
|
||||||
|
return [self.c.sent_start[i] for i in range(self.length)]
|
||||||
|
|
||||||
|
|
||||||
def biluo_tags_from_offsets(doc, entities, missing='O'):
|
def biluo_tags_from_offsets(doc, entities, missing='O'):
|
||||||
"""Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out
|
"""Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out
|
||||||
|
|
Loading…
Reference in New Issue
Block a user