From 4ca31b4d879de9a3499ad2bb52f83a4656aead64 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 26 Oct 2016 13:13:56 +0200 Subject: [PATCH] Fix clobbering of 'missing' named ent values after assigning ents. --- spacy/tokens/doc.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 52abf47c0..66759d271 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -323,7 +323,10 @@ cdef class Doc: cdef int i for i in range(self.length): self.c[i].ent_type = 0 - self.c[i].ent_iob = 2 # Means O, not missing! + # At this point we don't know whether the NER has run over the + # Doc. If the ent_iob is missing, leave it missing. + if self.c[i].ent_iob != 0: + self.c[i].ent_iob = 2 # Means O. Non-O are set from ents. cdef attr_t ent_type cdef int start, end for ent_info in ents: