mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Auto-format [ci skip]
This commit is contained in:
parent
62ff128888
commit
073013f129
|
@ -1,7 +1,6 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
import spacy
|
||||
from spacy.util import minibatch, compounding
|
||||
|
||||
|
@ -9,27 +8,25 @@ from spacy.util import minibatch, compounding
|
|||
def test_issue3611():
|
||||
""" Test whether adding n-grams in the textcat works even when n > token length of some docs """
|
||||
unique_classes = ["offensive", "inoffensive"]
|
||||
x_train = ["This is an offensive text",
|
||||
"This is the second offensive text",
|
||||
"inoff"]
|
||||
x_train = [
|
||||
"This is an offensive text",
|
||||
"This is the second offensive text",
|
||||
"inoff",
|
||||
]
|
||||
y_train = ["offensive", "offensive", "inoffensive"]
|
||||
|
||||
# preparing the data
|
||||
pos_cats = list()
|
||||
for train_instance in y_train:
|
||||
pos_cats.append({label: label == train_instance for label in unique_classes})
|
||||
train_data = list(zip(x_train, [{'cats': cats} for cats in pos_cats]))
|
||||
train_data = list(zip(x_train, [{"cats": cats} for cats in pos_cats]))
|
||||
|
||||
# set up the spacy model with a text categorizer component
|
||||
nlp = spacy.blank('en')
|
||||
nlp = spacy.blank("en")
|
||||
|
||||
textcat = nlp.create_pipe(
|
||||
"textcat",
|
||||
config={
|
||||
"exclusive_classes": True,
|
||||
"architecture": "bow",
|
||||
"ngram_size": 2
|
||||
}
|
||||
config={"exclusive_classes": True, "architecture": "bow", "ngram_size": 2},
|
||||
)
|
||||
|
||||
for label in unique_classes:
|
||||
|
@ -37,7 +34,7 @@ def test_issue3611():
|
|||
nlp.add_pipe(textcat, last=True)
|
||||
|
||||
# training the network
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'textcat']
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
|
||||
with nlp.disable_pipes(*other_pipes):
|
||||
optimizer = nlp.begin_training()
|
||||
for i in range(3):
|
||||
|
@ -46,6 +43,10 @@ def test_issue3611():
|
|||
|
||||
for batch in batches:
|
||||
texts, annotations = zip(*batch)
|
||||
nlp.update(docs=texts, golds=annotations, sgd=optimizer, drop=0.1, losses=losses)
|
||||
|
||||
|
||||
nlp.update(
|
||||
docs=texts,
|
||||
golds=annotations,
|
||||
sgd=optimizer,
|
||||
drop=0.1,
|
||||
losses=losses,
|
||||
)
|
||||
|
|
|
@ -3,8 +3,10 @@ from __future__ import unicode_literals
|
|||
|
||||
from spacy.lang.hi import Hindi
|
||||
|
||||
|
||||
def test_issue3625():
|
||||
"""Test that default punctuation rules applies to hindi unicode characters"""
|
||||
nlp = Hindi()
|
||||
doc = nlp(u"hi. how हुए. होटल, होटल")
|
||||
assert [token.text for token in doc] == ['hi', '.', 'how', 'हुए', '.', 'होटल', ',', 'होटल']
|
||||
doc = nlp("hi. how हुए. होटल, होटल")
|
||||
expected = ["hi", ".", "how", "हुए", ".", "होटल", ",", "होटल"]
|
||||
assert [token.text for token in doc] == expected
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
from spacy.matcher import Matcher
|
||||
from spacy.tokens import Doc
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
from spacy.attrs import IS_ALPHA
|
||||
from spacy.lang.en import English
|
||||
|
||||
|
@ -10,11 +9,11 @@ from spacy.lang.en import English
|
|||
@pytest.mark.parametrize(
|
||||
"sentence",
|
||||
[
|
||||
'The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.',
|
||||
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s #1.',
|
||||
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s number one',
|
||||
'Indeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.',
|
||||
"It was a missed assignment, but it shouldn't have resulted in a turnover ..."
|
||||
"The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.",
|
||||
"The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's #1.",
|
||||
"The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's number one",
|
||||
"Indeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.",
|
||||
"It was a missed assignment, but it shouldn't have resulted in a turnover ...",
|
||||
],
|
||||
)
|
||||
def test_issue3869(sentence):
|
||||
|
@ -27,5 +26,3 @@ def test_issue3869(sentence):
|
|||
count += token.is_alpha
|
||||
|
||||
assert count == doc.count_by(IS_ALPHA).get(1, 0)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user