Test and update examples [ci skip]

This commit is contained in:
Ines Montani 2019-03-16 14:15:49 +01:00
parent f55a52a2dd
commit 399987c216
13 changed files with 48 additions and 20 deletions

View File

@ -7,6 +7,7 @@ dependency tree to find the noun phrase they are referring to for example:
$9.4 million --> Net income. $9.4 million --> Net income.
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -17,6 +17,7 @@ show you how computers understand [language]
I'm assuming that we can use the token.head to build these groups." I'm assuming that we can use the token.head to build these groups."
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -8,6 +8,7 @@ they're called on is passed in as the first argument.
* Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components * Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -9,6 +9,7 @@ coordinates. Can be extended with more details from the API.
* Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components * Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
Prerequisites: pip install requests Prerequisites: pip install requests
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -9,6 +9,7 @@ respectively.
* Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components * Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -10,6 +10,9 @@ should also improve the parse quality.
The specific example here is drawn from https://github.com/explosion/spaCy/issues/2627 The specific example here is drawn from https://github.com/explosion/spaCy/issues/2627
Other versions of the model may not make the original mistake, so the specific Other versions of the model may not make the original mistake, so the specific
example might not be apt for future versions. example might not be apt for future versions.
Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
import plac import plac
import spacy import spacy
@ -25,8 +28,10 @@ def prevent_sentence_boundaries(doc):
def can_be_sentence_start(token): def can_be_sentence_start(token):
if token.i == 0: if token.i == 0:
return True return True
elif token.is_title: # We're not checking for is_title here to ignore arbitrary titlecased
return True # tokens within sentences
# elif token.is_title:
# return True
elif token.nbor(-1).is_punct: elif token.nbor(-1).is_punct:
return True return True
elif token.nbor(-1).is_space: elif token.nbor(-1).is_space:
@ -35,16 +40,21 @@ def can_be_sentence_start(token):
return False return False
def main(): @plac.annotations(
nlp = spacy.load("en_core_web_lg") text=("The raw text to process", "positional", None, str),
raw_text = "Been here and I'm loving it." spacy_model=("spaCy model to use (with a parser)", "option", "m", str),
doc = nlp(raw_text) )
sentences = [sent.string.strip() for sent in doc.sents] def main(text="Been here And I'm loving it.", spacy_model="en_core_web_lg"):
print(sentences) print("Using spaCy model '{}'".format(spacy_model))
print("Processing text '{}'".format(text))
nlp = spacy.load(spacy_model)
doc = nlp(text)
sentences = [sent.text.strip() for sent in doc.sents]
print("Before:", sentences)
nlp.add_pipe(prevent_sentence_boundaries, before="parser") nlp.add_pipe(prevent_sentence_boundaries, before="parser")
doc = nlp(raw_text) doc = nlp(text)
sentences = [sent.string.strip() for sent in doc.sents] sentences = [sent.text.strip() for sent in doc.sents]
print(sentences) print("After:", sentences)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,7 +1,14 @@
#!/usr/bin/env python
# coding: utf8
"""Demonstrate adding a rule-based component that forces some tokens to not """Demonstrate adding a rule-based component that forces some tokens to not
be entities, before the NER tagger is applied. This is used to hotfix the issue be entities, before the NER tagger is applied. This is used to hotfix the issue
in https://github.com/explosion/spaCy/issues/2870 , present as of spaCy v2.0.16. in https://github.com/explosion/spaCy/issues/2870, present as of spaCy v2.0.16.
Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
from __future__ import unicode_literals
import spacy import spacy
from spacy.attrs import ENT_IOB from spacy.attrs import ENT_IOB
@ -18,7 +25,7 @@ def fix_space_tags(doc):
def main(): def main():
nlp = spacy.load("en_core_web_sm") nlp = spacy.load("en_core_web_sm")
text = u"""This is some crazy test where I dont need an Apple Watch to make things bug""" text = "This is some crazy test where I dont need an Apple Watch to make things bug"
doc = nlp(text) doc = nlp(text)
print("Before", doc.ents) print("Before", doc.ents)
nlp.add_pipe(fix_space_tags, name="fix-ner", before="ner") nlp.add_pipe(fix_space_tags, name="fix-ner", before="ner")

View File

@ -7,6 +7,8 @@ the IMDB movie reviews dataset and will be loaded automatically via Thinc's
built-in dataset loader. built-in dataset loader.
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
Prerequisites: pip install joblib
""" """
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals

View File

@ -8,6 +8,7 @@ For more details, see the documentation:
* NER: https://spacy.io/usage/linguistic-features#named-entities * NER: https://spacy.io/usage/linguistic-features#named-entities
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -24,6 +24,7 @@ For more details, see the documentation:
* NER: https://spacy.io/usage/linguistic-features#named-entities * NER: https://spacy.io/usage/linguistic-features#named-entities
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
@ -87,7 +88,7 @@ def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
ner.add_label(LABEL) # add new entity label to entity recognizer ner.add_label(LABEL) # add new entity label to entity recognizer
# Adding extraneous labels shouldn't mess anything up # Adding extraneous labels shouldn't mess anything up
ner.add_label('VEGETABLE') ner.add_label("VEGETABLE")
if model is None: if model is None:
optimizer = nlp.begin_training() optimizer = nlp.begin_training()
else: else:
@ -127,7 +128,7 @@ def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
print("Loading from", output_dir) print("Loading from", output_dir)
nlp2 = spacy.load(output_dir) nlp2 = spacy.load(output_dir)
# Check the classes have loaded back consistently # Check the classes have loaded back consistently
assert nlp2.get_pipe('ner').move_names == move_names assert nlp2.get_pipe("ner").move_names == move_names
doc2 = nlp2(test_text) doc2 = nlp2(test_text)
for ent in doc2.ents: for ent in doc2.ents:
print(ent.label_, ent.text) print(ent.label_, ent.text)

View File

@ -6,6 +6,7 @@ model or a blank model. For more details, see the documentation:
* Dependency Parse: https://spacy.io/usage/linguistic-features#dependency-parse * Dependency Parse: https://spacy.io/usage/linguistic-features#dependency-parse
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
@ -40,7 +41,7 @@ TRAIN_DATA = [
output_dir=("Optional output directory", "option", "o", Path), output_dir=("Optional output directory", "option", "o", Path),
n_iter=("Number of training iterations", "option", "n", int), n_iter=("Number of training iterations", "option", "n", int),
) )
def main(model=None, output_dir=None, n_iter=10): def main(model=None, output_dir=None, n_iter=15):
"""Load the model, set up the pipeline and train the parser.""" """Load the model, set up the pipeline and train the parser."""
if model is not None: if model is not None:
nlp = spacy.load(model) # load existing spaCy model nlp = spacy.load(model) # load existing spaCy model

View File

@ -9,6 +9,7 @@ the documentation:
* POS Tagging: https://spacy.io/usage/linguistic-features#pos-tagging * POS Tagging: https://spacy.io/usage/linguistic-features#pos-tagging
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
Last tested with: v2.1.0
""" """
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function

View File

@ -41,9 +41,9 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
# add the text classifier to the pipeline if it doesn't exist # add the text classifier to the pipeline if it doesn't exist
# nlp.create_pipe works for built-ins that are registered with spaCy # nlp.create_pipe works for built-ins that are registered with spaCy
if "textcat" not in nlp.pipe_names: if "textcat" not in nlp.pipe_names:
textcat = nlp.create_pipe("textcat", config={ textcat = nlp.create_pipe(
"architecture": "simple_cnn", "textcat", config={"architecture": "simple_cnn", "exclusive_classes": True}
"exclusive_classes": True}) )
nlp.add_pipe(textcat, last=True) nlp.add_pipe(textcat, last=True)
# otherwise, get it, so we can add labels to it # otherwise, get it, so we can add labels to it
else: else:
@ -140,7 +140,7 @@ def evaluate(tokenizer, textcat, texts, cats):
fn += 1 fn += 1
precision = tp / (tp + fp) precision = tp / (tp + fp)
recall = tp / (tp + fn) recall = tp / (tp + fn)
if (precision+recall) == 0: if (precision + recall) == 0:
f_score = 0.0 f_score = 0.0
else: else:
f_score = 2 * (precision * recall) / (precision + recall) f_score = 2 * (precision * recall) / (precision + recall)