diff --git a/examples/training/train_textcat.py b/examples/training/train_textcat.py index 6fa79e75b..d1cf3ab8a 100644 --- a/examples/training/train_textcat.py +++ b/examples/training/train_textcat.py @@ -26,7 +26,7 @@ from spacy.pipeline import TextCategorizer @plac.annotations( model=("Model name. Defaults to blank 'en' model.", "option", "m", str), output_dir=("Optional output directory", "option", "o", Path), - n_examples=("Number of texts to train from", "option", "N", int), + n_texts=("Number of texts to train from", "option", "t", int), n_iter=("Number of training iterations", "option", "n", int)) def main(model=None, output_dir=None, n_iter=20, n_texts=2000): if model is not None: @@ -39,20 +39,19 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000): # add the text classifier to the pipeline if it doesn't exist # nlp.create_pipe works for built-ins that are registered with spaCy if 'textcat' not in nlp.pipe_names: - # textcat = nlp.create_pipe('textcat') - textcat = TextCategorizer(nlp.vocab, labels=['POSITIVE']) + textcat = nlp.create_pipe('textcat') nlp.add_pipe(textcat, last=True) # otherwise, get it, so we can add labels to it else: textcat = nlp.get_pipe('textcat') # add label to text classifier - # textcat.add_label('POSITIVE') + textcat.add_label('POSITIVE') # load the IMBD dataset print("Loading IMDB data...") - print("Using %d training examples" % n_texts) (train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts) + print("Using %d training examples" % n_texts) train_docs = [nlp.tokenizer(text) for text in train_texts] train_gold = [GoldParse(doc, cats=cats) for doc, cats in zip(train_docs, train_cats)] diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index a2321d1ad..e5f426453 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -441,11 +441,12 @@ class Tagger(Pipe): def add_label(self, label): if label in self.labels: return 0 - smaller = self.model[-1]._layers[-1] - larger = Softmax(len(self.labels)+1, smaller.nI) - copy_array(larger.W[:smaller.nO], smaller.W) - copy_array(larger.b[:smaller.nO], smaller.b) - self.model[-1]._layers[-1] = larger + if self.model not in (True, False, None): + smaller = self.model._layers[-1] + larger = Softmax(len(self.labels)+1, smaller.nI) + copy_array(larger.W[:smaller.nO], smaller.W) + copy_array(larger.b[:smaller.nO], smaller.b) + self.model._layers[-1] = larger self.labels.append(label) return 1 @@ -759,11 +760,12 @@ class TextCategorizer(Pipe): def add_label(self, label): if label in self.labels: return 0 - smaller = self.model[-1]._layers[-1] - larger = Affine(len(self.labels)+1, smaller.nI) - copy_array(larger.W[:smaller.nO], smaller.W) - copy_array(larger.b[:smaller.nO], smaller.b) - self.model[-1]._layers[-1] = larger + if self.model not in (None, True, False): + smaller = self.model._layers[-1] + larger = Affine(len(self.labels)+1, smaller.nI) + copy_array(larger.W[:smaller.nO], smaller.W) + copy_array(larger.b[:smaller.nO], smaller.b) + self.model._layers[-1] = larger self.labels.append(label) return 1 diff --git a/spacy/tests/spans/test_span.py b/spacy/tests/doc/test_span.py similarity index 100% rename from spacy/tests/spans/test_span.py rename to spacy/tests/doc/test_span.py diff --git a/spacy/tests/spans/test_merge.py b/spacy/tests/doc/test_span_merge.py similarity index 100% rename from spacy/tests/spans/test_merge.py rename to spacy/tests/doc/test_span_merge.py diff --git a/spacy/tests/spans/__init__.py b/spacy/tests/spans/__init__.py deleted file mode 100644 index e69de29bb..000000000