Auto-format [ci skip]

This commit is contained in:
Ines Montani 2019-07-17 12:34:13 +02:00
parent 62ff128888
commit 073013f129
4 changed files with 25 additions and 26 deletions

View File

@ -1,7 +1,6 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest
import spacy import spacy
from spacy.util import minibatch, compounding from spacy.util import minibatch, compounding
@ -9,27 +8,25 @@ from spacy.util import minibatch, compounding
def test_issue3611(): def test_issue3611():
""" Test whether adding n-grams in the textcat works even when n > token length of some docs """ """ Test whether adding n-grams in the textcat works even when n > token length of some docs """
unique_classes = ["offensive", "inoffensive"] unique_classes = ["offensive", "inoffensive"]
x_train = ["This is an offensive text", x_train = [
"This is the second offensive text", "This is an offensive text",
"inoff"] "This is the second offensive text",
"inoff",
]
y_train = ["offensive", "offensive", "inoffensive"] y_train = ["offensive", "offensive", "inoffensive"]
# preparing the data # preparing the data
pos_cats = list() pos_cats = list()
for train_instance in y_train: for train_instance in y_train:
pos_cats.append({label: label == train_instance for label in unique_classes}) pos_cats.append({label: label == train_instance for label in unique_classes})
train_data = list(zip(x_train, [{'cats': cats} for cats in pos_cats])) train_data = list(zip(x_train, [{"cats": cats} for cats in pos_cats]))
# set up the spacy model with a text categorizer component # set up the spacy model with a text categorizer component
nlp = spacy.blank('en') nlp = spacy.blank("en")
textcat = nlp.create_pipe( textcat = nlp.create_pipe(
"textcat", "textcat",
config={ config={"exclusive_classes": True, "architecture": "bow", "ngram_size": 2},
"exclusive_classes": True,
"architecture": "bow",
"ngram_size": 2
}
) )
for label in unique_classes: for label in unique_classes:
@ -37,7 +34,7 @@ def test_issue3611():
nlp.add_pipe(textcat, last=True) nlp.add_pipe(textcat, last=True)
# training the network # training the network
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'textcat'] other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
with nlp.disable_pipes(*other_pipes): with nlp.disable_pipes(*other_pipes):
optimizer = nlp.begin_training() optimizer = nlp.begin_training()
for i in range(3): for i in range(3):
@ -46,6 +43,10 @@ def test_issue3611():
for batch in batches: for batch in batches:
texts, annotations = zip(*batch) texts, annotations = zip(*batch)
nlp.update(docs=texts, golds=annotations, sgd=optimizer, drop=0.1, losses=losses) nlp.update(
docs=texts,
golds=annotations,
sgd=optimizer,
drop=0.1,
losses=losses,
)

View File

@ -3,8 +3,10 @@ from __future__ import unicode_literals
from spacy.lang.hi import Hindi from spacy.lang.hi import Hindi
def test_issue3625(): def test_issue3625():
"""Test that default punctuation rules applies to hindi unicode characters""" """Test that default punctuation rules applies to hindi unicode characters"""
nlp = Hindi() nlp = Hindi()
doc = nlp(u"hi. how हुए. होटल, होटल") doc = nlp("hi. how हुए. होटल, होटल")
assert [token.text for token in doc] == ['hi', '.', 'how', 'हुए', '.', 'होटल', ',', 'होटल'] expected = ["hi", ".", "how", "हुए", ".", "होटल", ",", "होटल"]
assert [token.text for token in doc] == expected

View File

@ -1,7 +1,6 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest
from spacy.matcher import Matcher from spacy.matcher import Matcher
from spacy.tokens import Doc from spacy.tokens import Doc

View File

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest import pytest
from spacy.attrs import IS_ALPHA from spacy.attrs import IS_ALPHA
from spacy.lang.en import English from spacy.lang.en import English
@ -10,11 +9,11 @@ from spacy.lang.en import English
@pytest.mark.parametrize( @pytest.mark.parametrize(
"sentence", "sentence",
[ [
'The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.', "The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.",
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s #1.', "The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's #1.",
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s number one', "The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's number one",
'Indeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.', "Indeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.",
"It was a missed assignment, but it shouldn't have resulted in a turnover ..." "It was a missed assignment, but it shouldn't have resulted in a turnover ...",
], ],
) )
def test_issue3869(sentence): def test_issue3869(sentence):
@ -27,5 +26,3 @@ def test_issue3869(sentence):
count += token.is_alpha count += token.is_alpha
assert count == doc.count_by(IS_ALPHA).get(1, 0) assert count == doc.count_by(IS_ALPHA).get(1, 0)