Fix gold.pyx for 1.0

This commit is contained in:
Matthew Honnibal 2016-11-25 08:57:59 -06:00
parent 314bc8d34f
commit cc7e607a8a
2 changed files with 7 additions and 1 deletions

View File

@ -19,6 +19,7 @@ cdef class GoldParse:
cdef int length cdef int length
cdef readonly int loss cdef readonly int loss
cdef readonly list words
cdef readonly list tags cdef readonly list tags
cdef readonly list heads cdef readonly list heads
cdef readonly list labels cdef readonly list labels

View File

@ -19,6 +19,8 @@ def tags_to_entities(tags):
entities = [] entities = []
start = None start = None
for i, tag in enumerate(tags): for i, tag in enumerate(tags):
if tag is None:
continue
if tag.startswith('O'): if tag.startswith('O'):
# TODO: We shouldn't be getting these malformed inputs. Fix this. # TODO: We shouldn't be getting these malformed inputs. Fix this.
if start is not None: if start is not None:
@ -229,7 +231,7 @@ cdef class GoldParse:
if deps is None: if deps is None:
deps = [None for _ in doc] deps = [None for _ in doc]
if entities is None: if entities is None:
entities = [None for _ in doc] entities = ['-' for _ in doc]
elif len(entities) == 0: elif len(entities) == 0:
entities = ['O' for _ in doc] entities = ['O' for _ in doc]
elif not isinstance(entities[0], basestring): elif not isinstance(entities[0], basestring):
@ -246,6 +248,7 @@ cdef class GoldParse:
self.c.labels = <int*>self.mem.alloc(len(doc), sizeof(int)) self.c.labels = <int*>self.mem.alloc(len(doc), sizeof(int))
self.c.ner = <Transition*>self.mem.alloc(len(doc), sizeof(Transition)) self.c.ner = <Transition*>self.mem.alloc(len(doc), sizeof(Transition))
self.words = [None] * len(doc)
self.tags = [None] * len(doc) self.tags = [None] * len(doc)
self.heads = [None] * len(doc) self.heads = [None] * len(doc)
self.labels = [''] * len(doc) self.labels = [''] * len(doc)
@ -259,6 +262,7 @@ cdef class GoldParse:
for i, gold_i in enumerate(self.cand_to_gold): for i, gold_i in enumerate(self.cand_to_gold):
if doc[i].text.isspace(): if doc[i].text.isspace():
self.words[i] = doc[i].text
self.tags[i] = 'SP' self.tags[i] = 'SP'
self.heads[i] = None self.heads[i] = None
self.labels[i] = None self.labels[i] = None
@ -266,6 +270,7 @@ cdef class GoldParse:
if gold_i is None: if gold_i is None:
pass pass
else: else:
self.words[i] = words[gold_i]
self.tags[i] = tags[gold_i] self.tags[i] = tags[gold_i]
self.heads[i] = self.gold_to_cand[heads[gold_i]] self.heads[i] = self.gold_to_cand[heads[gold_i]]
self.labels[i] = deps[gold_i] self.labels[i] = deps[gold_i]