From 8e08c378fe537ede6da89855ae92e80aef5a9e9e Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 25 Mar 2018 22:16:01 +0200 Subject: [PATCH] Fix entity IOB and tag in span merging --- spacy/tokens/doc.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index b06c7433c..cfc2686a2 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -460,10 +460,7 @@ cdef class Doc: cdef int i for i in range(self.length): self.c[i].ent_type = 0 - # At this point we don't know whether the NER has run over the - # Doc. If the ent_iob is missing, leave it missing. - if self.c[i].ent_iob != 0: - self.c[i].ent_iob = 2 # Means O. Non-O are set from ents. + self.c[i].ent_iob = 2 # Means O. Non-O are set from ents. cdef attr_t ent_type cdef int start, end for ent_info in ents: @@ -978,6 +975,8 @@ cdef class Doc: self.c[i].head = start elif head_idx >= end: self.c[i].head -= offset + token.ent_iob = span[0].ent_iob + token.ent_type = span[0].ent_type # Now compress the token array for i in range(end, self.length): self.c[i - offset] = self.c[i] @@ -988,6 +987,7 @@ cdef class Doc: for i in range(self.length): # ...And, set heads back to a relative position self.c[i].head -= i + # TODO: Fix entity IOB # Set the left/right children, left/right edges set_children_from_heads(self.c, self.length) # Clear the cached Python objects