mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Update examples
This commit is contained in:
parent
1b1c9105b4
commit
173b1551af
|
@ -1,18 +1,24 @@
|
||||||
import plac
|
"""
|
||||||
import collections
|
This example shows how to use an LSTM sentiment classification model trained using Keras in spaCy. spaCy splits the document into sentences, and each sentence is classified using the LSTM. The scores for the sentences are then aggregated to give the document score. This kind of hierarchical model is quite difficult in "pure" Keras or Tensorflow, but it's very effective. The Keras example on this dataset performs quite poorly, because it cuts off the documents so that they're a fixed size. This hurts review accuracy a lot, because people often summarise their rating in the final sentence
|
||||||
import random
|
|
||||||
|
|
||||||
|
Prerequisites:
|
||||||
|
spacy download en_vectors_web_lg
|
||||||
|
pip install keras==2.0.9
|
||||||
|
|
||||||
|
Compatible with: spaCy v2.0.0+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import plac
|
||||||
|
import random
|
||||||
import pathlib
|
import pathlib
|
||||||
import cytoolz
|
import cytoolz
|
||||||
import numpy
|
import numpy
|
||||||
from keras.models import Sequential, model_from_json
|
from keras.models import Sequential, model_from_json
|
||||||
from keras.layers import LSTM, Dense, Embedding, Dropout, Bidirectional
|
from keras.layers import LSTM, Dense, Embedding, Bidirectional
|
||||||
from keras.layers import TimeDistributed
|
from keras.layers import TimeDistributed
|
||||||
from keras.optimizers import Adam
|
from keras.optimizers import Adam
|
||||||
from spacy.compat import pickle
|
|
||||||
|
|
||||||
import thinc.extra.datasets
|
import thinc.extra.datasets
|
||||||
|
from spacy.compat import pickle
|
||||||
import spacy
|
import spacy
|
||||||
|
|
||||||
|
|
||||||
|
@ -84,8 +90,8 @@ def get_features(docs, max_length):
|
||||||
|
|
||||||
|
|
||||||
def train(train_texts, train_labels, dev_texts, dev_labels,
|
def train(train_texts, train_labels, dev_texts, dev_labels,
|
||||||
lstm_shape, lstm_settings, lstm_optimizer, batch_size=100, nb_epoch=5,
|
lstm_shape, lstm_settings, lstm_optimizer, batch_size=100,
|
||||||
by_sentence=True):
|
nb_epoch=5, by_sentence=True):
|
||||||
print("Loading spaCy")
|
print("Loading spaCy")
|
||||||
nlp = spacy.load('en_vectors_web_lg')
|
nlp = spacy.load('en_vectors_web_lg')
|
||||||
nlp.add_pipe(nlp.create_pipe('sentencizer'))
|
nlp.add_pipe(nlp.create_pipe('sentencizer'))
|
||||||
|
|
|
@ -6,7 +6,7 @@ money and currency values (entities labelled as MONEY) and then check the
|
||||||
dependency tree to find the noun phrase they are referring to – for example:
|
dependency tree to find the noun phrase they are referring to – for example:
|
||||||
$9.4 million --> Net income.
|
$9.4 million --> Net income.
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a18+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ show you how computers understand [language]
|
||||||
|
|
||||||
I'm assuming that we can use the token.head to build these groups."
|
I'm assuming that we can use the token.head to build these groups."
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a18+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ formatted in jsonl as a sequence of entries like this:
|
||||||
{"text":"Appalachia"}
|
{"text":"Appalachia"}
|
||||||
{"text":"Argentina"}
|
{"text":"Argentina"}
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a17+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import print_function, unicode_literals, division
|
from __future__ import print_function, unicode_literals, division
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ they're called on is passed in as the first argument.
|
||||||
|
|
||||||
* Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components
|
* Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a17+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ coordinates. Can be extended with more details from the API.
|
||||||
* REST Countries API: https://restcountries.eu (Mozilla Public License MPL 2.0)
|
* REST Countries API: https://restcountries.eu (Mozilla Public License MPL 2.0)
|
||||||
* Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components
|
* Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a17+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ respectively.
|
||||||
|
|
||||||
* Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components
|
* Custom pipeline components: https://alpha.spacy.io//usage/processing-pipelines#custom-components
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a17+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ each "sentence" on a newline, and spaces between tokens. Data is loaded from
|
||||||
the IMDB movie reviews dataset and will be loaded automatically via Thinc's
|
the IMDB movie reviews dataset and will be loaded automatically via Thinc's
|
||||||
built-in dataset loader.
|
built-in dataset loader.
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a18+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import print_function, unicode_literals
|
from __future__ import print_function, unicode_literals
|
||||||
from toolz import partition_all
|
from toolz import partition_all
|
||||||
|
|
|
@ -15,7 +15,7 @@ following types of relations: ROOT, PLACE, QUALITY, ATTRIBUTE, TIME, LOCATION.
|
||||||
('hotel', 'PLACE', 'show') --> show PLACE hotel
|
('hotel', 'PLACE', 'show') --> show PLACE hotel
|
||||||
('berlin', 'LOCATION', 'hotel') --> hotel with LOCATION berlin
|
('berlin', 'LOCATION', 'hotel') --> hotel with LOCATION berlin
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a20+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ For more details, see the documentation:
|
||||||
* Training: https://alpha.spacy.io/usage/training
|
* Training: https://alpha.spacy.io/usage/training
|
||||||
* NER: https://alpha.spacy.io/usage/linguistic-features#named-entities
|
* NER: https://alpha.spacy.io/usage/linguistic-features#named-entities
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a20+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ For more details, see the documentation:
|
||||||
* Training: https://alpha.spacy.io/usage/training
|
* Training: https://alpha.spacy.io/usage/training
|
||||||
* NER: https://alpha.spacy.io/usage/linguistic-features#named-entities
|
* NER: https://alpha.spacy.io/usage/linguistic-features#named-entities
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a20+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ model or a blank model. For more details, see the documentation:
|
||||||
* Training: https://alpha.spacy.io/usage/training
|
* Training: https://alpha.spacy.io/usage/training
|
||||||
* Dependency Parse: https://alpha.spacy.io/usage/linguistic-features#dependency-parse
|
* Dependency Parse: https://alpha.spacy.io/usage/linguistic-features#dependency-parse
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a20+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ the documentation:
|
||||||
* Training: https://alpha.spacy.io/usage/training
|
* Training: https://alpha.spacy.io/usage/training
|
||||||
* POS Tagging: https://alpha.spacy.io/usage/linguistic-features#pos-tagging
|
* POS Tagging: https://alpha.spacy.io/usage/linguistic-features#pos-tagging
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a20+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ see the documentation:
|
||||||
* Training: https://alpha.spacy.io/usage/training
|
* Training: https://alpha.spacy.io/usage/training
|
||||||
* Text classification: https://alpha.spacy.io/usage/text-classification
|
* Text classification: https://alpha.spacy.io/usage/text-classification
|
||||||
|
|
||||||
Compatible with: spaCy 2.0.0a20+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
import plac
|
import plac
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
"""Load vectors for a language trained using fastText
|
"""Load vectors for a language trained using fastText
|
||||||
https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
|
https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
|
||||||
Compatible with: spaCy v2.0.0a17+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import plac
|
import plac
|
||||||
|
|
|
@ -165,18 +165,15 @@ include ../_includes/_mixins
|
||||||
+h(3, "keras") Text classification with Keras
|
+h(3, "keras") Text classification with Keras
|
||||||
|
|
||||||
p
|
p
|
||||||
| In this example, we're using spaCy to pre-process text for use with
|
| This example shows how to use a #[+a("https://keras.io") Keras]
|
||||||
| a #[+a("https://keras.io") Keras] text classification model.
|
| LSTM sentiment classification model in spaCy. spaCy splits
|
||||||
|
| the document into sentences, and each sentence is classified using
|
||||||
|
| the LSTM. The scores for the sentences are then aggregated to give
|
||||||
|
| the document score. This kind of hierarchical model is quite
|
||||||
|
| difficult in "pure" Keras or Tensorflow, but it's very effective.
|
||||||
|
| The Keras example on this dataset performs quite poorly, because it
|
||||||
|
| cuts off the documents so that they're a fixed size. This hurts
|
||||||
|
| review accuracy a lot, because people often summarise their rating
|
||||||
|
| in the final sentence.
|
||||||
|
|
||||||
+github("spacy", "examples/deep_learning_keras.py")
|
+github("spacy", "examples/deep_learning_keras.py")
|
||||||
|
|
||||||
+h(3, "keras-parikh-entailment") A decomposable attention model for Natural Language Inference
|
|
||||||
|
|
||||||
p
|
|
||||||
| This example contains an implementation of the entailment prediction
|
|
||||||
| model described by #[+a("https://arxiv.org/pdf/1606.01933.pdf") Parikh et al. (2016)].
|
|
||||||
| The model is notable for its competitive performance with very few
|
|
||||||
| parameters, and was implemented using #[+a("https://keras.io") Keras]
|
|
||||||
| and spaCy.
|
|
||||||
|
|
||||||
+github("spacy", "examples/keras_parikh_entailment/__main__.py", false, "examples/keras_parikh_entailment")
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user