From 652f31d3ee1021f528b9b543de1d82b5c59b1262 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 20 Jun 2020 20:12:54 +0200 Subject: [PATCH] Update DocBin --- spacy/tokens/_serialize.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index 3072787ae..febfbd670 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -14,7 +14,6 @@ ALL_ATTRS = ( "TAG", "HEAD", "DEP", - "SENT_START", "ENT_IOB", "ENT_TYPE", "LEMMA", @@ -112,8 +111,7 @@ class DocBin(object): for i in range(len(self.tokens)): tokens = self.tokens[i] spaces = self.spaces[i] - words = [vocab.strings[orth] for orth in tokens[:, orth_col]] - doc = Doc(vocab, words=words, spaces=spaces) + doc = Doc(vocab, words=tokens[:, orth_col], spaces=spaces) doc = doc.from_array(self.attrs, tokens) doc.cats = self.cats[i] if self.store_user_data: