Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-11-01 19:00:19 +01:00
commit 759cc79185
5 changed files with 16 additions and 15 deletions

View File

@ -26,7 +26,7 @@ from spacy.pipeline import TextCategorizer
@plac.annotations( @plac.annotations(
model=("Model name. Defaults to blank 'en' model.", "option", "m", str), model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
output_dir=("Optional output directory", "option", "o", Path), output_dir=("Optional output directory", "option", "o", Path),
n_examples=("Number of texts to train from", "option", "N", int), n_texts=("Number of texts to train from", "option", "t", int),
n_iter=("Number of training iterations", "option", "n", int)) n_iter=("Number of training iterations", "option", "n", int))
def main(model=None, output_dir=None, n_iter=20, n_texts=2000): def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
if model is not None: if model is not None:
@ -39,20 +39,19 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
# add the text classifier to the pipeline if it doesn't exist # add the text classifier to the pipeline if it doesn't exist
# nlp.create_pipe works for built-ins that are registered with spaCy # nlp.create_pipe works for built-ins that are registered with spaCy
if 'textcat' not in nlp.pipe_names: if 'textcat' not in nlp.pipe_names:
# textcat = nlp.create_pipe('textcat') textcat = nlp.create_pipe('textcat')
textcat = TextCategorizer(nlp.vocab, labels=['POSITIVE'])
nlp.add_pipe(textcat, last=True) nlp.add_pipe(textcat, last=True)
# otherwise, get it, so we can add labels to it # otherwise, get it, so we can add labels to it
else: else:
textcat = nlp.get_pipe('textcat') textcat = nlp.get_pipe('textcat')
# add label to text classifier # add label to text classifier
# textcat.add_label('POSITIVE') textcat.add_label('POSITIVE')
# load the IMBD dataset # load the IMBD dataset
print("Loading IMDB data...") print("Loading IMDB data...")
print("Using %d training examples" % n_texts)
(train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts) (train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts)
print("Using %d training examples" % n_texts)
train_docs = [nlp.tokenizer(text) for text in train_texts] train_docs = [nlp.tokenizer(text) for text in train_texts]
train_gold = [GoldParse(doc, cats=cats) for doc, cats in train_gold = [GoldParse(doc, cats=cats) for doc, cats in
zip(train_docs, train_cats)] zip(train_docs, train_cats)]

View File

@ -441,11 +441,12 @@ class Tagger(Pipe):
def add_label(self, label): def add_label(self, label):
if label in self.labels: if label in self.labels:
return 0 return 0
smaller = self.model[-1]._layers[-1] if self.model not in (True, False, None):
smaller = self.model._layers[-1]
larger = Softmax(len(self.labels)+1, smaller.nI) larger = Softmax(len(self.labels)+1, smaller.nI)
copy_array(larger.W[:smaller.nO], smaller.W) copy_array(larger.W[:smaller.nO], smaller.W)
copy_array(larger.b[:smaller.nO], smaller.b) copy_array(larger.b[:smaller.nO], smaller.b)
self.model[-1]._layers[-1] = larger self.model._layers[-1] = larger
self.labels.append(label) self.labels.append(label)
return 1 return 1
@ -759,11 +760,12 @@ class TextCategorizer(Pipe):
def add_label(self, label): def add_label(self, label):
if label in self.labels: if label in self.labels:
return 0 return 0
smaller = self.model[-1]._layers[-1] if self.model not in (None, True, False):
smaller = self.model._layers[-1]
larger = Affine(len(self.labels)+1, smaller.nI) larger = Affine(len(self.labels)+1, smaller.nI)
copy_array(larger.W[:smaller.nO], smaller.W) copy_array(larger.W[:smaller.nO], smaller.W)
copy_array(larger.b[:smaller.nO], smaller.b) copy_array(larger.b[:smaller.nO], smaller.b)
self.model[-1]._layers[-1] = larger self.model._layers[-1] = larger
self.labels.append(label) self.labels.append(label)
return 1 return 1