Auto-format [ci skip]

This commit is contained in:
Ines Montani 2019-07-17 12:34:13 +02:00
parent 62ff128888
commit 073013f129
4 changed files with 25 additions and 26 deletions

View File

@ -1,7 +1,6 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
import spacy
from spacy.util import minibatch, compounding
@ -9,27 +8,25 @@ from spacy.util import minibatch, compounding
def test_issue3611():
""" Test whether adding n-grams in the textcat works even when n > token length of some docs """
unique_classes = ["offensive", "inoffensive"]
x_train = ["This is an offensive text",
"This is the second offensive text",
"inoff"]
x_train = [
"This is an offensive text",
"This is the second offensive text",
"inoff",
]
y_train = ["offensive", "offensive", "inoffensive"]
# preparing the data
pos_cats = list()
for train_instance in y_train:
pos_cats.append({label: label == train_instance for label in unique_classes})
train_data = list(zip(x_train, [{'cats': cats} for cats in pos_cats]))
train_data = list(zip(x_train, [{"cats": cats} for cats in pos_cats]))
# set up the spacy model with a text categorizer component
nlp = spacy.blank('en')
nlp = spacy.blank("en")
textcat = nlp.create_pipe(
"textcat",
config={
"exclusive_classes": True,
"architecture": "bow",
"ngram_size": 2
}
config={"exclusive_classes": True, "architecture": "bow", "ngram_size": 2},
)
for label in unique_classes:
@ -37,7 +34,7 @@ def test_issue3611():
nlp.add_pipe(textcat, last=True)
# training the network
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'textcat']
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
with nlp.disable_pipes(*other_pipes):
optimizer = nlp.begin_training()
for i in range(3):
@ -46,6 +43,10 @@ def test_issue3611():
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(docs=texts, golds=annotations, sgd=optimizer, drop=0.1, losses=losses)
nlp.update(
docs=texts,
golds=annotations,
sgd=optimizer,
drop=0.1,
losses=losses,
)

View File

@ -3,8 +3,10 @@ from __future__ import unicode_literals
from spacy.lang.hi import Hindi
def test_issue3625():
"""Test that default punctuation rules applies to hindi unicode characters"""
nlp = Hindi()
doc = nlp(u"hi. how हुए. होटल, होटल")
assert [token.text for token in doc] == ['hi', '.', 'how', 'हुए', '.', 'होटल', ',', 'होटल']
doc = nlp("hi. how हुए. होटल, होटल")
expected = ["hi", ".", "how", "हुए", ".", "होटल", ",", "होटल"]
assert [token.text for token in doc] == expected

View File

@ -1,7 +1,6 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
from spacy.matcher import Matcher
from spacy.tokens import Doc

View File

@ -2,7 +2,6 @@
from __future__ import unicode_literals
import pytest
from spacy.attrs import IS_ALPHA
from spacy.lang.en import English
@ -10,11 +9,11 @@ from spacy.lang.en import English
@pytest.mark.parametrize(
"sentence",
[
'The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.',
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s #1.',
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s number one',
'Indeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.',
"It was a missed assignment, but it shouldn't have resulted in a turnover ..."
"The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.",
"The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's #1.",
"The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's number one",
"Indeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.",
"It was a missed assignment, but it shouldn't have resulted in a turnover ...",
],
)
def test_issue3869(sentence):
@ -27,5 +26,3 @@ def test_issue3869(sentence):
count += token.is_alpha
assert count == doc.count_by(IS_ALPHA).get(1, 0)