From f32927efbf130a6504065af990e56e0af3b1ea5a Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 21 Sep 2015 18:35:40 +1000 Subject: [PATCH] * Raise exceptions if attempt to access parse, but data is not installed. This partly but not fully addresses Issue #97. Still need exceptions on the various Token attributes that access the parse tree, e.g. token.head, token.lefts, token.rights, etc. Exceptions should be centralized, too. --- spacy/lexeme.pyx | 8 ++++++++ spacy/tokens/doc.pyx | 13 +++++++++++++ spacy/tokens/token.pyx | 7 +++++++ 3 files changed, 28 insertions(+) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 625fea76b..eadcd7722 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -55,6 +55,14 @@ cdef class Lexeme: property vector: def __get__(self): cdef int length = self.vocab.vectors_length + if length == 0: + raise ValueError( + "Word vectors set to length 0. This may be because the " + "data is not installed. If you haven't already, run" + "\npython -m spacy.en.download all\n" + "to install the data." + ) + repvec_view = self.c.repvec return numpy.asarray(repvec_view) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 5bdd5b22f..19cff3a90 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -232,6 +232,13 @@ cdef class Doc: @property def noun_chunks(self): """Yield spans for base noun phrases.""" + if not self.is_parsed: + raise ValueError( + "noun_chunks requires the dependency parse, which " + "requires data to be installed. If you haven't done so, run: " + "\npython -m spacy.en.download all\n" + "to install the data") + cdef const TokenC* word labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr', 'conj'] np_deps = [self.vocab.strings[label] for label in labels] @@ -246,6 +253,12 @@ cdef class Doc: """ Yield a list of sentence Span objects, calculated from the dependency parse. """ + if not self.is_parsed: + raise ValueError( + "sentence boundary detection requires the dependency parse, which " + "requires data to be installed. If you haven't done so, run: " + "\npython -m spacy.en.download all\n" + "to install the data") cdef int i start = 0 for i in range(1, self.length): diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 364047835..86e9f1d2b 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -131,6 +131,13 @@ cdef class Token: property vector: def __get__(self): cdef int length = self.vocab.vectors_length + if length == 0: + raise ValueError( + "Word vectors set to length 0. This may be because the " + "data is not installed. If you haven't already, run" + "\npython -m spacy.en.download all\n" + "to install the data." + ) repvec_view = self.c.lex.repvec return numpy.asarray(repvec_view)