mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Auto-format [ci skip]
This commit is contained in:
parent
62ff128888
commit
073013f129
|
@ -1,7 +1,6 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
|
||||||
import spacy
|
import spacy
|
||||||
from spacy.util import minibatch, compounding
|
from spacy.util import minibatch, compounding
|
||||||
|
|
||||||
|
@ -9,27 +8,25 @@ from spacy.util import minibatch, compounding
|
||||||
def test_issue3611():
|
def test_issue3611():
|
||||||
""" Test whether adding n-grams in the textcat works even when n > token length of some docs """
|
""" Test whether adding n-grams in the textcat works even when n > token length of some docs """
|
||||||
unique_classes = ["offensive", "inoffensive"]
|
unique_classes = ["offensive", "inoffensive"]
|
||||||
x_train = ["This is an offensive text",
|
x_train = [
|
||||||
|
"This is an offensive text",
|
||||||
"This is the second offensive text",
|
"This is the second offensive text",
|
||||||
"inoff"]
|
"inoff",
|
||||||
|
]
|
||||||
y_train = ["offensive", "offensive", "inoffensive"]
|
y_train = ["offensive", "offensive", "inoffensive"]
|
||||||
|
|
||||||
# preparing the data
|
# preparing the data
|
||||||
pos_cats = list()
|
pos_cats = list()
|
||||||
for train_instance in y_train:
|
for train_instance in y_train:
|
||||||
pos_cats.append({label: label == train_instance for label in unique_classes})
|
pos_cats.append({label: label == train_instance for label in unique_classes})
|
||||||
train_data = list(zip(x_train, [{'cats': cats} for cats in pos_cats]))
|
train_data = list(zip(x_train, [{"cats": cats} for cats in pos_cats]))
|
||||||
|
|
||||||
# set up the spacy model with a text categorizer component
|
# set up the spacy model with a text categorizer component
|
||||||
nlp = spacy.blank('en')
|
nlp = spacy.blank("en")
|
||||||
|
|
||||||
textcat = nlp.create_pipe(
|
textcat = nlp.create_pipe(
|
||||||
"textcat",
|
"textcat",
|
||||||
config={
|
config={"exclusive_classes": True, "architecture": "bow", "ngram_size": 2},
|
||||||
"exclusive_classes": True,
|
|
||||||
"architecture": "bow",
|
|
||||||
"ngram_size": 2
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
for label in unique_classes:
|
for label in unique_classes:
|
||||||
|
@ -37,7 +34,7 @@ def test_issue3611():
|
||||||
nlp.add_pipe(textcat, last=True)
|
nlp.add_pipe(textcat, last=True)
|
||||||
|
|
||||||
# training the network
|
# training the network
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'textcat']
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
|
||||||
with nlp.disable_pipes(*other_pipes):
|
with nlp.disable_pipes(*other_pipes):
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
|
@ -46,6 +43,10 @@ def test_issue3611():
|
||||||
|
|
||||||
for batch in batches:
|
for batch in batches:
|
||||||
texts, annotations = zip(*batch)
|
texts, annotations = zip(*batch)
|
||||||
nlp.update(docs=texts, golds=annotations, sgd=optimizer, drop=0.1, losses=losses)
|
nlp.update(
|
||||||
|
docs=texts,
|
||||||
|
golds=annotations,
|
||||||
|
sgd=optimizer,
|
||||||
|
drop=0.1,
|
||||||
|
losses=losses,
|
||||||
|
)
|
||||||
|
|
|
@ -3,8 +3,10 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from spacy.lang.hi import Hindi
|
from spacy.lang.hi import Hindi
|
||||||
|
|
||||||
|
|
||||||
def test_issue3625():
|
def test_issue3625():
|
||||||
"""Test that default punctuation rules applies to hindi unicode characters"""
|
"""Test that default punctuation rules applies to hindi unicode characters"""
|
||||||
nlp = Hindi()
|
nlp = Hindi()
|
||||||
doc = nlp(u"hi. how हुए. होटल, होटल")
|
doc = nlp("hi. how हुए. होटल, होटल")
|
||||||
assert [token.text for token in doc] == ['hi', '.', 'how', 'हुए', '.', 'होटल', ',', 'होटल']
|
expected = ["hi", ".", "how", "हुए", ".", "होटल", ",", "होटल"]
|
||||||
|
assert [token.text for token in doc] == expected
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy.attrs import IS_ALPHA
|
from spacy.attrs import IS_ALPHA
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
|
|
||||||
|
@ -10,11 +9,11 @@ from spacy.lang.en import English
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"sentence",
|
"sentence",
|
||||||
[
|
[
|
||||||
'The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.',
|
"The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.",
|
||||||
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s #1.',
|
"The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's #1.",
|
||||||
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s number one',
|
"The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's number one",
|
||||||
'Indeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.',
|
"Indeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.",
|
||||||
"It was a missed assignment, but it shouldn't have resulted in a turnover ..."
|
"It was a missed assignment, but it shouldn't have resulted in a turnover ...",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_issue3869(sentence):
|
def test_issue3869(sentence):
|
||||||
|
@ -27,5 +26,3 @@ def test_issue3869(sentence):
|
||||||
count += token.is_alpha
|
count += token.is_alpha
|
||||||
|
|
||||||
assert count == doc.count_by(IS_ALPHA).get(1, 0)
|
assert count == doc.count_by(IS_ALPHA).get(1, 0)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user