From 311a985fe03d9b2efa97cf349444ac1d9e966beb Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 16 Oct 2016 18:16:42 +0200 Subject: [PATCH] Add input error handling in Doc --- spacy/tokens/doc.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index b4518b09b..9a164864a 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -120,6 +120,12 @@ cdef class Doc: if orths_and_spaces is None and words is not None: if spaces is None: spaces = [True] * len(words) + elif len(spaces) != len(words): + raise ValueError( + "Arguments 'words' and 'spaces' should be sequences of the " + "same length, or 'spaces' should be left default at None. " + "spaces should be a sequence of booleans, with True meaning " + "that the word owns a ' ' character following it.") orths_and_spaces = zip(words, spaces) if orths_and_spaces is not None: for orth_space in orths_and_spaces: