* Fix tag handling in doc.merge, and assign sent_start when setting heads.

This commit is contained in:
Matthew Honnibal 2015-11-03 18:14:53 +11:00
parent 068222c09a
commit 09664177d7

View File

@ -471,7 +471,10 @@ cdef class Doc:
# Update fields # Update fields
token.lex = lex token.lex = lex
token.spacy = self.data[end-1].spacy token.spacy = self.data[end-1].spacy
if tag in self.vocab.morphology.tag_map:
self.vocab.morphology.assign_tag(token, self.vocab.strings[tag]) self.vocab.morphology.assign_tag(token, self.vocab.strings[tag])
else:
token.tag = self.vocab.strings[tag]
token.tag = self.vocab.strings[tag] token.tag = self.vocab.strings[tag]
token.lemma = self.vocab.strings[lemma] token.lemma = self.vocab.strings[lemma]
if ent_type == 'O': if ent_type == 'O':
@ -545,3 +548,9 @@ cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
if child.r_edge > head.r_edge: if child.r_edge > head.r_edge:
head.r_edge = child.r_edge head.r_edge = child.r_edge
head.r_kids += 1 head.r_kids += 1
# Set sentence starts
for i in range(length):
if tokens[i].head == 0 and tokens[i].dep != 0:
tokens[tokens[i].l_edge].sent_start = True