From 8c2260e18c65217bc3f5d8e8e21ce71ec969ef06 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 1 Nov 2017 16:56:35 +0100 Subject: [PATCH 1/3] Move span tests to /doc --- spacy/tests/{spans => doc}/test_span.py | 0 spacy/tests/{spans/test_merge.py => doc/test_span_merge.py} | 0 spacy/tests/spans/__init__.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename spacy/tests/{spans => doc}/test_span.py (100%) rename spacy/tests/{spans/test_merge.py => doc/test_span_merge.py} (100%) delete mode 100644 spacy/tests/spans/__init__.py diff --git a/spacy/tests/spans/test_span.py b/spacy/tests/doc/test_span.py similarity index 100% rename from spacy/tests/spans/test_span.py rename to spacy/tests/doc/test_span.py diff --git a/spacy/tests/spans/test_merge.py b/spacy/tests/doc/test_span_merge.py similarity index 100% rename from spacy/tests/spans/test_merge.py rename to spacy/tests/doc/test_span_merge.py diff --git a/spacy/tests/spans/__init__.py b/spacy/tests/spans/__init__.py deleted file mode 100644 index e69de29bb..000000000 From 7ae1aacdb88b9e8e50eb6dc852265835e407b364 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 1 Nov 2017 17:06:43 +0100 Subject: [PATCH 2/3] Fix add_label methods --- spacy/pipeline.pyx | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index a2321d1ad..e5f426453 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -441,11 +441,12 @@ class Tagger(Pipe): def add_label(self, label): if label in self.labels: return 0 - smaller = self.model[-1]._layers[-1] - larger = Softmax(len(self.labels)+1, smaller.nI) - copy_array(larger.W[:smaller.nO], smaller.W) - copy_array(larger.b[:smaller.nO], smaller.b) - self.model[-1]._layers[-1] = larger + if self.model not in (True, False, None): + smaller = self.model._layers[-1] + larger = Softmax(len(self.labels)+1, smaller.nI) + copy_array(larger.W[:smaller.nO], smaller.W) + copy_array(larger.b[:smaller.nO], smaller.b) + self.model._layers[-1] = larger self.labels.append(label) return 1 @@ -759,11 +760,12 @@ class TextCategorizer(Pipe): def add_label(self, label): if label in self.labels: return 0 - smaller = self.model[-1]._layers[-1] - larger = Affine(len(self.labels)+1, smaller.nI) - copy_array(larger.W[:smaller.nO], smaller.W) - copy_array(larger.b[:smaller.nO], smaller.b) - self.model[-1]._layers[-1] = larger + if self.model not in (None, True, False): + smaller = self.model._layers[-1] + larger = Affine(len(self.labels)+1, smaller.nI) + copy_array(larger.W[:smaller.nO], smaller.W) + copy_array(larger.b[:smaller.nO], smaller.b) + self.model._layers[-1] = larger self.labels.append(label) return 1 From 8f1d3fc3ee2b366a280c9ff61afb31102982bbae Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 1 Nov 2017 17:09:22 +0100 Subject: [PATCH 3/3] Update textcat example --- examples/training/train_textcat.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/training/train_textcat.py b/examples/training/train_textcat.py index 6fa79e75b..d1cf3ab8a 100644 --- a/examples/training/train_textcat.py +++ b/examples/training/train_textcat.py @@ -26,7 +26,7 @@ from spacy.pipeline import TextCategorizer @plac.annotations( model=("Model name. Defaults to blank 'en' model.", "option", "m", str), output_dir=("Optional output directory", "option", "o", Path), - n_examples=("Number of texts to train from", "option", "N", int), + n_texts=("Number of texts to train from", "option", "t", int), n_iter=("Number of training iterations", "option", "n", int)) def main(model=None, output_dir=None, n_iter=20, n_texts=2000): if model is not None: @@ -39,20 +39,19 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000): # add the text classifier to the pipeline if it doesn't exist # nlp.create_pipe works for built-ins that are registered with spaCy if 'textcat' not in nlp.pipe_names: - # textcat = nlp.create_pipe('textcat') - textcat = TextCategorizer(nlp.vocab, labels=['POSITIVE']) + textcat = nlp.create_pipe('textcat') nlp.add_pipe(textcat, last=True) # otherwise, get it, so we can add labels to it else: textcat = nlp.get_pipe('textcat') # add label to text classifier - # textcat.add_label('POSITIVE') + textcat.add_label('POSITIVE') # load the IMBD dataset print("Loading IMDB data...") - print("Using %d training examples" % n_texts) (train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts) + print("Using %d training examples" % n_texts) train_docs = [nlp.tokenizer(text) for text in train_texts] train_gold = [GoldParse(doc, cats=cats) for doc, cats in zip(train_docs, train_cats)]