Update examples

This commit is contained in:
ines 2017-11-07 01:22:30 +01:00
parent 1b1c9105b4
commit 173b1551af
16 changed files with 42 additions and 39 deletions

View File

@ -1,18 +1,24 @@
import plac """
import collections This example shows how to use an LSTM sentiment classification model trained using Keras in spaCy. spaCy splits the document into sentences, and each sentence is classified using the LSTM. The scores for the sentences are then aggregated to give the document score. This kind of hierarchical model is quite difficult in "pure" Keras or Tensorflow, but it's very effective. The Keras example on this dataset performs quite poorly, because it cuts off the documents so that they're a fixed size. This hurts review accuracy a lot, because people often summarise their rating in the final sentence
import random
Prerequisites:
spacy download en_vectors_web_lg
pip install keras==2.0.9
Compatible with: spaCy v2.0.0+
"""
import plac
import random
import pathlib import pathlib
import cytoolz import cytoolz
import numpy import numpy
from keras.models import Sequential, model_from_json from keras.models import Sequential, model_from_json
from keras.layers import LSTM, Dense, Embedding, Dropout, Bidirectional from keras.layers import LSTM, Dense, Embedding, Bidirectional
from keras.layers import TimeDistributed from keras.layers import TimeDistributed
from keras.optimizers import Adam from keras.optimizers import Adam
from spacy.compat import pickle
import thinc.extra.datasets import thinc.extra.datasets
from spacy.compat import pickle
import spacy import spacy
@ -84,8 +90,8 @@ def get_features(docs, max_length):
def train(train_texts, train_labels, dev_texts, dev_labels, def train(train_texts, train_labels, dev_texts, dev_labels,
lstm_shape, lstm_settings, lstm_optimizer, batch_size=100, nb_epoch=5, lstm_shape, lstm_settings, lstm_optimizer, batch_size=100,
by_sentence=True): nb_epoch=5, by_sentence=True):
print("Loading spaCy") print("Loading spaCy")
nlp = spacy.load('en_vectors_web_lg') nlp = spacy.load('en_vectors_web_lg')
nlp.add_pipe(nlp.create_pipe('sentencizer')) nlp.add_pipe(nlp.create_pipe('sentencizer'))
@ -97,7 +103,7 @@ def train(train_texts, train_labels, dev_texts, dev_labels,
if by_sentence: if by_sentence:
train_docs, train_labels = get_labelled_sentences(train_docs, train_labels) train_docs, train_labels = get_labelled_sentences(train_docs, train_labels)
dev_docs, dev_labels = get_labelled_sentences(dev_docs, dev_labels) dev_docs, dev_labels = get_labelled_sentences(dev_docs, dev_labels)
train_X = get_features(train_docs, lstm_shape['max_length']) train_X = get_features(train_docs, lstm_shape['max_length'])
dev_X = get_features(dev_docs, lstm_shape['max_length']) dev_X = get_features(dev_docs, lstm_shape['max_length'])
model.fit(train_X, train_labels, validation_data=(dev_X, dev_labels), model.fit(train_X, train_labels, validation_data=(dev_X, dev_labels),
@ -138,12 +144,12 @@ def evaluate(model_dir, texts, labels, max_length=100):
''' '''
return [nlp.tagger, nlp.parser, SentimentAnalyser.load(model_dir, nlp, return [nlp.tagger, nlp.parser, SentimentAnalyser.load(model_dir, nlp,
max_length=max_length)] max_length=max_length)]
nlp = spacy.load('en') nlp = spacy.load('en')
nlp.pipeline = create_pipeline(nlp) nlp.pipeline = create_pipeline(nlp)
correct = 0 correct = 0
i = 0 i = 0
for doc in nlp.pipe(texts, batch_size=1000, n_threads=4): for doc in nlp.pipe(texts, batch_size=1000, n_threads=4):
correct += bool(doc.sentiment >= 0.5) == bool(labels[i]) correct += bool(doc.sentiment >= 0.5) == bool(labels[i])
i += 1 i += 1

View File

@ -6,7 +6,7 @@ money and currency values (entities labelled as MONEY) and then check the
dependency tree to find the noun phrase they are referring to for example: dependency tree to find the noun phrase they are referring to for example:
$9.4 million --> Net income. $9.4 million --> Net income.
Compatible with: spaCy 2.0.0a18+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -16,7 +16,7 @@ show you how computers understand [language]
I'm assuming that we can use the token.head to build these groups." I'm assuming that we can use the token.head to build these groups."
Compatible with: spaCy 2.0.0a18+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -34,7 +34,7 @@ formatted in jsonl as a sequence of entries like this:
{"text":"Appalachia"} {"text":"Appalachia"}
{"text":"Argentina"} {"text":"Argentina"}
Compatible with: spaCy 2.0.0a17+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import print_function, unicode_literals, division from __future__ import print_function, unicode_literals, division

View File

@ -7,7 +7,7 @@ they're called on is passed in as the first argument.
* Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components * Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components
Compatible with: spaCy 2.0.0a17+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -8,7 +8,7 @@ coordinates. Can be extended with more details from the API.
* REST Countries API: https://restcountries.eu (Mozilla Public License MPL 2.0) * REST Countries API: https://restcountries.eu (Mozilla Public License MPL 2.0)
* Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components * Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components
Compatible with: spaCy 2.0.0a17+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -8,7 +8,7 @@ respectively.
* Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components * Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components
Compatible with: spaCy 2.0.0a17+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -6,7 +6,7 @@ each "sentence" on a newline, and spaces between tokens. Data is loaded from
the IMDB movie reviews dataset and will be loaded automatically via Thinc's the IMDB movie reviews dataset and will be loaded automatically via Thinc's
built-in dataset loader. built-in dataset loader.
Compatible with: spaCy 2.0.0a18+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
from toolz import partition_all from toolz import partition_all

View File

@ -15,7 +15,7 @@ following types of relations: ROOT, PLACE, QUALITY, ATTRIBUTE, TIME, LOCATION.
('hotel', 'PLACE', 'show') --> show PLACE hotel ('hotel', 'PLACE', 'show') --> show PLACE hotel
('berlin', 'LOCATION', 'hotel') --> hotel with LOCATION berlin ('berlin', 'LOCATION', 'hotel') --> hotel with LOCATION berlin
Compatible with: spaCy 2.0.0a20+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -7,7 +7,7 @@ For more details, see the documentation:
* Training: https://alpha.spacy.io/usage/training * Training: https://alpha.spacy.io/usage/training
* NER: https://alpha.spacy.io/usage/linguistic-features#named-entities * NER: https://alpha.spacy.io/usage/linguistic-features#named-entities
Compatible with: spaCy 2.0.0a20+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -23,7 +23,7 @@ For more details, see the documentation:
* Training: https://alpha.spacy.io/usage/training * Training: https://alpha.spacy.io/usage/training
* NER: https://alpha.spacy.io/usage/linguistic-features#named-entities * NER: https://alpha.spacy.io/usage/linguistic-features#named-entities
Compatible with: spaCy 2.0.0a20+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -5,7 +5,7 @@ model or a blank model. For more details, see the documentation:
* Training: https://alpha.spacy.io/usage/training * Training: https://alpha.spacy.io/usage/training
* Dependency Parse: https://alpha.spacy.io/usage/linguistic-features#dependency-parse * Dependency Parse: https://alpha.spacy.io/usage/linguistic-features#dependency-parse
Compatible with: spaCy 2.0.0a20+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -8,7 +8,7 @@ the documentation:
* Training: https://alpha.spacy.io/usage/training * Training: https://alpha.spacy.io/usage/training
* POS Tagging: https://alpha.spacy.io/usage/linguistic-features#pos-tagging * POS Tagging: https://alpha.spacy.io/usage/linguistic-features#pos-tagging
Compatible with: spaCy 2.0.0a20+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -8,7 +8,7 @@ see the documentation:
* Training: https://alpha.spacy.io/usage/training * Training: https://alpha.spacy.io/usage/training
* Text classification: https://alpha.spacy.io/usage/text-classification * Text classification: https://alpha.spacy.io/usage/text-classification
Compatible with: spaCy 2.0.0a20+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
import plac import plac

View File

@ -2,7 +2,7 @@
# coding: utf8 # coding: utf8
"""Load vectors for a language trained using fastText """Load vectors for a language trained using fastText
https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
Compatible with: spaCy v2.0.0a17+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import unicode_literals from __future__ import unicode_literals
import plac import plac

View File

@ -165,18 +165,15 @@ include ../_includes/_mixins
+h(3, "keras") Text classification with Keras +h(3, "keras") Text classification with Keras
p p
| In this example, we're using spaCy to pre-process text for use with | This example shows how to use a #[+a("https://keras.io") Keras]
| a #[+a("https://keras.io") Keras] text classification model. | LSTM sentiment classification model in spaCy. spaCy splits
| the document into sentences, and each sentence is classified using
| the LSTM. The scores for the sentences are then aggregated to give
| the document score. This kind of hierarchical model is quite
| difficult in "pure" Keras or Tensorflow, but it's very effective.
| The Keras example on this dataset performs quite poorly, because it
| cuts off the documents so that they're a fixed size. This hurts
| review accuracy a lot, because people often summarise their rating
| in the final sentence.
+github("spacy", "examples/deep_learning_keras.py") +github("spacy", "examples/deep_learning_keras.py")
+h(3, "keras-parikh-entailment") A decomposable attention model for Natural Language Inference
p
| This example contains an implementation of the entailment prediction
| model described by #[+a("https://arxiv.org/pdf/1606.01933.pdf") Parikh et al. (2016)].
| The model is notable for its competitive performance with very few
| parameters, and was implemented using #[+a("https://keras.io") Keras]
| and spaCy.
+github("spacy", "examples/keras_parikh_entailment/__main__.py", false, "examples/keras_parikh_entailment")