Merge branch 'develop' of https://github.com/explosion/spaCy into develop

2025-11-07 11:27:37 +03:00 · 2017-11-01 19:00:19 +01:00 · 2017-11-01 19:00:19 +01:00 · 759cc79185
commit 759cc79185
parent 2ef7b59eb0 8f1d3fc3ee
5 changed files with 16 additions and 15 deletions
--- a/examples/training/train_textcat.py
+++ b/examples/training/train_textcat.py
@ -26,7 +26,7 @@ from spacy.pipeline import TextCategorizer
@plac.annotations(
    model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
    output_dir=("Optional output directory", "option", "o", Path),
-    n_examples=("Number of texts to train from", "option", "N", int),
+    n_texts=("Number of texts to train from", "option", "t", int),
    n_iter=("Number of training iterations", "option", "n", int))
 def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
    if model is not None:
@ -39,20 +39,19 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
    # add the text classifier to the pipeline if it doesn't exist
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'textcat' not in nlp.pipe_names:
-        # textcat = nlp.create_pipe('textcat')
-        textcat = TextCategorizer(nlp.vocab, labels=['POSITIVE'])
+        textcat = nlp.create_pipe('textcat')
        nlp.add_pipe(textcat, last=True)
    # otherwise, get it, so we can add labels to it
    else:
        textcat = nlp.get_pipe('textcat')

    # add label to text classifier
-    # textcat.add_label('POSITIVE')
+    textcat.add_label('POSITIVE')

    # load the IMBD dataset
    print("Loading IMDB data...")
-    print("Using %d training examples" % n_texts)
    (train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts)
+    print("Using %d training examples" % n_texts)
    train_docs = [nlp.tokenizer(text) for text in train_texts]
    train_gold = [GoldParse(doc, cats=cats) for doc, cats in
                  zip(train_docs, train_cats)]
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@ -441,11 +441,12 @@ class Tagger(Pipe):
    def add_label(self, label):
        if label in self.labels:
            return 0
-        smaller = self.model[-1]._layers[-1]
+        if self.model not in (True, False, None):
+            smaller = self.model._layers[-1]
            larger = Softmax(len(self.labels)+1, smaller.nI)
            copy_array(larger.W[:smaller.nO], smaller.W)
            copy_array(larger.b[:smaller.nO], smaller.b)
-        self.model[-1]._layers[-1] = larger
+            self.model._layers[-1] = larger
        self.labels.append(label)
        return 1

@ -759,11 +760,12 @@ class TextCategorizer(Pipe):
    def add_label(self, label):
        if label in self.labels:
            return 0
-        smaller = self.model[-1]._layers[-1]
+        if self.model not in (None, True, False):
+            smaller = self.model._layers[-1]
            larger = Affine(len(self.labels)+1, smaller.nI)
            copy_array(larger.W[:smaller.nO], smaller.W)
            copy_array(larger.b[:smaller.nO], smaller.b)
-        self.model[-1]._layers[-1] = larger
+            self.model._layers[-1] = larger
        self.labels.append(label)
        return 1

--- a/spacy/tests/spans/test_span.py
+++ b/spacy/tests/spans/test_span.py
--- a/spacy/tests/doc/test_span_merge.py
+++ b/spacy/tests/doc/test_span_merge.py
--- a/spacy/tests/spans/init.py
+++ b/spacy/tests/spans/init.py