mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-14 19:46:26 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
759cc79185
|
@ -26,7 +26,7 @@ from spacy.pipeline import TextCategorizer
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
||||||
output_dir=("Optional output directory", "option", "o", Path),
|
output_dir=("Optional output directory", "option", "o", Path),
|
||||||
n_examples=("Number of texts to train from", "option", "N", int),
|
n_texts=("Number of texts to train from", "option", "t", int),
|
||||||
n_iter=("Number of training iterations", "option", "n", int))
|
n_iter=("Number of training iterations", "option", "n", int))
|
||||||
def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
|
def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
|
||||||
if model is not None:
|
if model is not None:
|
||||||
|
@ -39,20 +39,19 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
|
||||||
# add the text classifier to the pipeline if it doesn't exist
|
# add the text classifier to the pipeline if it doesn't exist
|
||||||
# nlp.create_pipe works for built-ins that are registered with spaCy
|
# nlp.create_pipe works for built-ins that are registered with spaCy
|
||||||
if 'textcat' not in nlp.pipe_names:
|
if 'textcat' not in nlp.pipe_names:
|
||||||
# textcat = nlp.create_pipe('textcat')
|
textcat = nlp.create_pipe('textcat')
|
||||||
textcat = TextCategorizer(nlp.vocab, labels=['POSITIVE'])
|
|
||||||
nlp.add_pipe(textcat, last=True)
|
nlp.add_pipe(textcat, last=True)
|
||||||
# otherwise, get it, so we can add labels to it
|
# otherwise, get it, so we can add labels to it
|
||||||
else:
|
else:
|
||||||
textcat = nlp.get_pipe('textcat')
|
textcat = nlp.get_pipe('textcat')
|
||||||
|
|
||||||
# add label to text classifier
|
# add label to text classifier
|
||||||
# textcat.add_label('POSITIVE')
|
textcat.add_label('POSITIVE')
|
||||||
|
|
||||||
# load the IMBD dataset
|
# load the IMBD dataset
|
||||||
print("Loading IMDB data...")
|
print("Loading IMDB data...")
|
||||||
print("Using %d training examples" % n_texts)
|
|
||||||
(train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts)
|
(train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts)
|
||||||
|
print("Using %d training examples" % n_texts)
|
||||||
train_docs = [nlp.tokenizer(text) for text in train_texts]
|
train_docs = [nlp.tokenizer(text) for text in train_texts]
|
||||||
train_gold = [GoldParse(doc, cats=cats) for doc, cats in
|
train_gold = [GoldParse(doc, cats=cats) for doc, cats in
|
||||||
zip(train_docs, train_cats)]
|
zip(train_docs, train_cats)]
|
||||||
|
|
|
@ -441,11 +441,12 @@ class Tagger(Pipe):
|
||||||
def add_label(self, label):
|
def add_label(self, label):
|
||||||
if label in self.labels:
|
if label in self.labels:
|
||||||
return 0
|
return 0
|
||||||
smaller = self.model[-1]._layers[-1]
|
if self.model not in (True, False, None):
|
||||||
|
smaller = self.model._layers[-1]
|
||||||
larger = Softmax(len(self.labels)+1, smaller.nI)
|
larger = Softmax(len(self.labels)+1, smaller.nI)
|
||||||
copy_array(larger.W[:smaller.nO], smaller.W)
|
copy_array(larger.W[:smaller.nO], smaller.W)
|
||||||
copy_array(larger.b[:smaller.nO], smaller.b)
|
copy_array(larger.b[:smaller.nO], smaller.b)
|
||||||
self.model[-1]._layers[-1] = larger
|
self.model._layers[-1] = larger
|
||||||
self.labels.append(label)
|
self.labels.append(label)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
@ -759,11 +760,12 @@ class TextCategorizer(Pipe):
|
||||||
def add_label(self, label):
|
def add_label(self, label):
|
||||||
if label in self.labels:
|
if label in self.labels:
|
||||||
return 0
|
return 0
|
||||||
smaller = self.model[-1]._layers[-1]
|
if self.model not in (None, True, False):
|
||||||
|
smaller = self.model._layers[-1]
|
||||||
larger = Affine(len(self.labels)+1, smaller.nI)
|
larger = Affine(len(self.labels)+1, smaller.nI)
|
||||||
copy_array(larger.W[:smaller.nO], smaller.W)
|
copy_array(larger.W[:smaller.nO], smaller.W)
|
||||||
copy_array(larger.b[:smaller.nO], smaller.b)
|
copy_array(larger.b[:smaller.nO], smaller.b)
|
||||||
self.model[-1]._layers[-1] = larger
|
self.model._layers[-1] = larger
|
||||||
self.labels.append(label)
|
self.labels.append(label)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user