mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* More work on reorganising tests, using conftest.py
This commit is contained in:
parent
674ee5dde7
commit
88041f69d1
|
@ -1,31 +0,0 @@
|
||||||
import pytest
|
|
||||||
import os
|
|
||||||
from os import path
|
|
||||||
|
|
||||||
from spacy.munge.read_ontonotes import sgml_extract
|
|
||||||
|
|
||||||
|
|
||||||
text_data = open(path.join(path.dirname(__file__), 'web_sample1.sgm')).read()
|
|
||||||
|
|
||||||
|
|
||||||
def test_example_extract():
|
|
||||||
article = sgml_extract(text_data)
|
|
||||||
assert article['docid'] == 'blogspot.com_alaindewitt_20060924104100_ENG_20060924_104100'
|
|
||||||
assert article['doctype'] == 'BLOG TEXT'
|
|
||||||
assert article['datetime'] == '2006-09-24T10:41:00'
|
|
||||||
assert article['headline'].strip() == 'Devastating Critique of the Arab World by One of Its Own'
|
|
||||||
assert article['poster'] == 'Alain DeWitt'
|
|
||||||
assert article['postdate'] == '2006-09-24T10:41:00'
|
|
||||||
assert article['text'].startswith('Thanks again to my fri'), article['text'][:10]
|
|
||||||
assert article['text'].endswith(' tide will turn."'), article['text'][-10:]
|
|
||||||
assert '<' not in article['text'], article['text'][:10]
|
|
||||||
|
|
||||||
|
|
||||||
def test_directory():
|
|
||||||
context_dir = '/usr/local/data/OntoNotes5/data/english/metadata/context/wb/sel'
|
|
||||||
|
|
||||||
for fn in os.listdir(context_dir):
|
|
||||||
with open(path.join(context_dir, fn)) as file_:
|
|
||||||
text = file_.read()
|
|
||||||
article = sgml_extract(text)
|
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
from spacy.munge import read_ptb
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from os import path
|
|
||||||
|
|
||||||
ptb_loc = path.join(path.dirname(__file__), 'wsj_0001.parse')
|
|
||||||
file3_loc = path.join(path.dirname(__file__), 'wsj_0003.parse')
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def ptb_text():
|
|
||||||
return open(path.join(ptb_loc)).read()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def sentence_strings(ptb_text):
|
|
||||||
return read_ptb.split(ptb_text)
|
|
||||||
|
|
||||||
|
|
||||||
def test_split(sentence_strings):
|
|
||||||
assert len(sentence_strings) == 2
|
|
||||||
assert sentence_strings[0].startswith('(TOP (S (NP-SBJ')
|
|
||||||
assert sentence_strings[0].endswith('(. .)))')
|
|
||||||
assert sentence_strings[1].startswith('(TOP (S (NP-SBJ')
|
|
||||||
assert sentence_strings[1].endswith('(. .)))')
|
|
||||||
|
|
||||||
|
|
||||||
def test_tree_read(sentence_strings):
|
|
||||||
words, brackets = read_ptb.parse(sentence_strings[0])
|
|
||||||
assert len(brackets) == 11
|
|
||||||
string = ("Pierre Vinken , 61 years old , will join the board as a nonexecutive "
|
|
||||||
"director Nov. 29 .")
|
|
||||||
word_strings = string.split()
|
|
||||||
starts = [s for l, s, e in brackets]
|
|
||||||
ends = [e for l, s, e in brackets]
|
|
||||||
assert min(starts) == 0
|
|
||||||
assert max(ends) == len(words)
|
|
||||||
assert brackets[-1] == ('S', 0, len(words))
|
|
||||||
assert ('NP-SBJ', 0, 7) in brackets
|
|
||||||
|
|
||||||
|
|
||||||
def test_traces():
|
|
||||||
sent_strings = sentence_strings(open(file3_loc).read())
|
|
||||||
words, brackets = read_ptb.parse(sent_strings[0])
|
|
||||||
assert len(words) == 36
|
|
|
@ -1,7 +1,6 @@
|
||||||
"""Test the Token.conjuncts property"""
|
"""Test the Token.conjuncts property"""
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,9 +8,8 @@ def orths(tokens):
|
||||||
return [t.orth_ for t in tokens]
|
return [t.orth_ for t in tokens]
|
||||||
|
|
||||||
|
|
||||||
def test_simple_two():
|
def test_simple_two(EN):
|
||||||
nlp = English()
|
tokens = EN('I lost money and pride.', tag=True, parse=True)
|
||||||
tokens = nlp('I lost money and pride.', tag=True, parse=True)
|
|
||||||
pride = tokens[4]
|
pride = tokens[4]
|
||||||
for t in tokens:
|
for t in tokens:
|
||||||
print t.orth_, t.tag_, t.head.orth_
|
print t.orth_, t.tag_, t.head.orth_
|
||||||
|
@ -20,13 +18,12 @@ def test_simple_two():
|
||||||
assert orths(money.conjuncts) == ['money', 'pride']
|
assert orths(money.conjuncts) == ['money', 'pride']
|
||||||
|
|
||||||
|
|
||||||
def test_comma_three():
|
#def test_comma_three(EN):
|
||||||
nlp = English()
|
# tokens = EN('I found my wallet, phone and keys.')
|
||||||
tokens = nlp('I found my wallet, phone and keys.')
|
# keys = tokens[-2]
|
||||||
keys = tokens[-2]
|
# assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
|
||||||
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
|
# wallet = tokens[3]
|
||||||
wallet = tokens[3]
|
# assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
|
||||||
assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
|
|
||||||
|
|
||||||
|
|
||||||
# This is failing due to parse errors
|
# This is failing due to parse errors
|
||||||
|
|
|
@ -1,11 +1,6 @@
|
||||||
from spacy.en import English
|
|
||||||
|
|
||||||
|
def test_simple_types(EN):
|
||||||
nlp = English()
|
tokens = EN(u'Mr. Best flew to New York on Saturday morning.')
|
||||||
|
|
||||||
|
|
||||||
def test_simple_types():
|
|
||||||
tokens = nlp(u'Mr. Best flew to New York on Saturday morning.')
|
|
||||||
ents = list(tokens.ents)
|
ents = list(tokens.ents)
|
||||||
assert ents[0].start == 1
|
assert ents[0].start == 1
|
||||||
assert ents[0].end == 2
|
assert ents[0].end == 2
|
||||||
|
|
|
@ -1,11 +1,7 @@
|
||||||
from spacy.en import English
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
NLP = English()
|
|
||||||
|
|
||||||
|
def test_root(EN):
|
||||||
def test_root():
|
tokens = EN(u"i don't have other assistance")
|
||||||
tokens = NLP(u"i don't have other assistance")
|
|
||||||
for t in tokens:
|
for t in tokens:
|
||||||
assert t.dep != 0, t.orth_
|
assert t.dep != 0, t.orth_
|
||||||
|
|
|
@ -2,8 +2,6 @@ from __future__ import unicode_literals
|
||||||
from os import path
|
from os import path
|
||||||
import codecs
|
import codecs
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@ -14,13 +12,8 @@ def sun_text():
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
def test_consistency(EN, sun_text):
|
||||||
def nlp():
|
tokens = EN(sun_text)
|
||||||
return English()
|
|
||||||
|
|
||||||
|
|
||||||
def test_consistency(nlp, sun_text):
|
|
||||||
tokens = nlp(sun_text)
|
|
||||||
for head in tokens:
|
for head in tokens:
|
||||||
for child in head.lefts:
|
for child in head.lefts:
|
||||||
assert child.head is head
|
assert child.head is head
|
||||||
|
@ -28,8 +21,8 @@ def test_consistency(nlp, sun_text):
|
||||||
assert child.head is head
|
assert child.head is head
|
||||||
|
|
||||||
|
|
||||||
def test_child_consistency(nlp, sun_text):
|
def test_child_consistency(EN, sun_text):
|
||||||
tokens = nlp(sun_text)
|
tokens = EN(sun_text)
|
||||||
|
|
||||||
lefts = {}
|
lefts = {}
|
||||||
rights = {}
|
rights = {}
|
||||||
|
@ -60,9 +53,9 @@ def test_child_consistency(nlp, sun_text):
|
||||||
assert not children
|
assert not children
|
||||||
|
|
||||||
|
|
||||||
def test_edges(nlp):
|
def test_edges(EN):
|
||||||
sun_text = u"Chemically, about three quarters of the Sun's mass consists of hydrogen, while the rest is mostly helium."
|
sun_text = u"Chemically, about three quarters of the Sun's mass consists of hydrogen, while the rest is mostly helium."
|
||||||
tokens = nlp(sun_text)
|
tokens = EN(sun_text)
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
subtree = list(token.subtree)
|
subtree = list(token.subtree)
|
||||||
debug = '\t'.join((token.orth_, token.left_edge.orth_, subtree[0].orth_))
|
debug = '\t'.join((token.orth_, token.left_edge.orth_, subtree[0].orth_))
|
||||||
|
|
|
@ -1,14 +1,8 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from spacy.en import English
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def EN():
|
|
||||||
return English()
|
|
||||||
|
|
||||||
|
|
||||||
def test_single_period(EN):
|
def test_single_period(EN):
|
||||||
string = 'A test sentence.'
|
string = 'A test sentence.'
|
||||||
words = EN(string)
|
words = EN(string)
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from spacy.en import English
|
|
||||||
|
|
||||||
EN = English()
|
|
||||||
|
|
||||||
|
|
||||||
def test_subtrees():
|
def test_subtrees(EN):
|
||||||
sent = EN('The four wheels on the bus turned quickly')
|
sent = EN('The four wheels on the bus turned quickly')
|
||||||
wheels = sent[2]
|
wheels = sent[2]
|
||||||
bus = sent[5]
|
bus = sent[5]
|
||||||
|
|
|
@ -1,14 +1,8 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from spacy.en import English
|
def test_merge_tokens(en_nlp):
|
||||||
|
tokens = en_nlp(u'Los Angeles start.')
|
||||||
NLU = English()
|
|
||||||
|
|
||||||
|
|
||||||
def test_merge_tokens():
|
|
||||||
tokens = NLU(u'Los Angeles start.')
|
|
||||||
assert len(tokens) == 4
|
assert len(tokens) == 4
|
||||||
assert tokens[0].head.orth_ == 'Angeles'
|
assert tokens[0].head.orth_ == 'Angeles'
|
||||||
assert tokens[1].head.orth_ == 'start'
|
assert tokens[1].head.orth_ == 'start'
|
||||||
|
@ -18,8 +12,8 @@ def test_merge_tokens():
|
||||||
assert tokens[0].head.orth_ == 'start'
|
assert tokens[0].head.orth_ == 'start'
|
||||||
|
|
||||||
|
|
||||||
def test_merge_heads():
|
def test_merge_heads(en_nlp):
|
||||||
tokens = NLU(u'I found a pilates class near work.')
|
tokens = en_nlp(u'I found a pilates class near work.')
|
||||||
assert len(tokens) == 8
|
assert len(tokens) == 8
|
||||||
tokens.merge(tokens[3].idx, tokens[4].idx + len(tokens[4]), tokens[4].tag_,
|
tokens.merge(tokens[3].idx, tokens[4].idx + len(tokens[4]), tokens[4].tag_,
|
||||||
'pilates class', 'O')
|
'pilates class', 'O')
|
||||||
|
@ -34,4 +28,4 @@ def test_merge_heads():
|
||||||
|
|
||||||
def test_issue_54():
|
def test_issue_54():
|
||||||
text = u'Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1).'
|
text = u'Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1).'
|
||||||
tokens = NLU(text, merge_mwes=True)
|
tokens = en_nlp(text, merge_mwes=True)
|
||||||
|
|
|
@ -1,16 +1,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def doc():
|
def doc(en_nlp):
|
||||||
EN = English()
|
return en_nlp('This is a sentence. This is another sentence. And a third.')
|
||||||
return EN('This is a sentence. This is another sentence. And a third.')
|
|
||||||
|
|
||||||
|
|
||||||
def test_sent_spans(doc):
|
def test_sent_spans(doc):
|
||||||
|
|
|
@ -1,13 +1,9 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
NLU = English()
|
|
||||||
|
|
||||||
|
def test_am_pm(en_nlp):
|
||||||
def test_am_pm():
|
|
||||||
numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
|
numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
|
||||||
variants = ['a.m.', 'am', 'p.m.', 'pm']
|
variants = ['a.m.', 'am', 'p.m.', 'pm']
|
||||||
spaces = ['', ' ']
|
spaces = ['', ' ']
|
||||||
|
@ -15,7 +11,7 @@ def test_am_pm():
|
||||||
for var in variants:
|
for var in variants:
|
||||||
for space in spaces:
|
for space in spaces:
|
||||||
string = u"The meeting was at %s%s%s wasn't it?" % (num, space, var)
|
string = u"The meeting was at %s%s%s wasn't it?" % (num, space, var)
|
||||||
tokens = NLU(string, merge_mwes=True)
|
tokens = en_nlp(string, merge_mwes=True)
|
||||||
assert tokens[4].orth_ == '%s%s%s' % (num, space, var)
|
assert tokens[4].orth_ == '%s%s%s' % (num, space, var)
|
||||||
ents = list(tokens.ents)
|
ents = list(tokens.ents)
|
||||||
assert len(ents) == 1
|
assert len(ents) == 1
|
||||||
|
|
|
@ -4,9 +4,6 @@ from spacy.en import English
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def EN():
|
|
||||||
return English()
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def tagged(EN):
|
def tagged(EN):
|
||||||
|
|
|
@ -5,12 +5,6 @@ import pytest
|
||||||
|
|
||||||
from spacy.en import English
|
from spacy.en import English
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def EN():
|
|
||||||
return English()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def morph_exc():
|
def morph_exc():
|
||||||
return {
|
return {
|
||||||
|
@ -18,9 +12,11 @@ def morph_exc():
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_load_exc(EN, morph_exc):
|
def test_load_exc(morph_exc):
|
||||||
EN.tagger.load_morph_exceptions(morph_exc)
|
# Do this local as we want to modify it
|
||||||
tokens = EN('I like his style.', tag=True, parse=False)
|
nlp = English()
|
||||||
|
nlp.tagger.load_morph_exceptions(morph_exc)
|
||||||
|
tokens = nlp('I like his style.', tag=True, parse=False)
|
||||||
his = tokens[2]
|
his = tokens[2]
|
||||||
assert his.tag_ == 'PRP$'
|
assert his.tag_ == 'PRP$'
|
||||||
assert his.lemma_ == '-PRP-'
|
assert his.lemma_ == '-PRP-'
|
||||||
|
|
|
@ -2,9 +2,8 @@ from spacy.en import English
|
||||||
import six
|
import six
|
||||||
|
|
||||||
|
|
||||||
def test_tag_names():
|
def test_tag_names(EN):
|
||||||
nlp = English()
|
tokens = EN(u'I ate pizzas with anchovies.', parse=False, tag=True)
|
||||||
tokens = nlp(u'I ate pizzas with anchovies.', parse=True, tag=True)
|
|
||||||
pizza = tokens[2]
|
pizza = tokens[2]
|
||||||
assert type(pizza.pos) == int
|
assert type(pizza.pos) == int
|
||||||
assert isinstance(pizza.pos_, six.text_type)
|
assert isinstance(pizza.pos_, six.text_type)
|
||||||
|
|
|
@ -2,10 +2,6 @@ import pytest
|
||||||
from spacy.en import English
|
from spacy.en import English
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="module")
|
||||||
def EN():
|
|
||||||
return English(load_vectors=False)
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def en_tokenizer(EN):
|
def en_tokenizer(EN):
|
||||||
return EN.tokenizer
|
return EN.tokenizer
|
||||||
|
|
9
tests/tokenizer/test_string_loading.py
Normal file
9
tests/tokenizer/test_string_loading.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
"""Test suspected freeing of strings"""
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
def test_one(en_tokenizer):
|
||||||
|
tokens = en_tokenizer('Betty Botter bought a pound of butter.')
|
||||||
|
assert tokens[0].orth_ == 'Betty'
|
||||||
|
tokens2 = en_tokenizer('Betty also bought a pound of butter.')
|
||||||
|
assert tokens2[0].orth_ == 'Betty'
|
|
@ -1,6 +1,5 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
from spacy.util import utf8open
|
from spacy.util import utf8open
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -16,8 +15,7 @@ def sun_txt():
|
||||||
return utf8open(loc).read()
|
return utf8open(loc).read()
|
||||||
|
|
||||||
|
|
||||||
def test_tokenize(sun_txt):
|
def test_tokenize(sun_txt, EN):
|
||||||
nlp = English()
|
|
||||||
assert len(sun_txt) != 0
|
assert len(sun_txt) != 0
|
||||||
tokens = nlp(sun_txt)
|
tokens = nlp(sun_txt)
|
||||||
assert True
|
assert len(tokens) > 100
|
|
@ -3,13 +3,10 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
from spacy.en import attrs
|
from spacy.en import attrs
|
||||||
|
|
||||||
|
|
||||||
EN = English()
|
def test_attr_of_token(EN):
|
||||||
|
|
||||||
def test_attr_of_token():
|
|
||||||
text = u'An example sentence.'
|
text = u'An example sentence.'
|
||||||
tokens = EN(text, tag=True, parse=False)
|
tokens = EN(text, tag=True, parse=False)
|
||||||
example = EN.vocab[u'example']
|
example = EN.vocab[u'example']
|
||||||
|
@ -18,7 +15,7 @@ def test_attr_of_token():
|
||||||
assert feats_array[0][0] != feats_array[0][1]
|
assert feats_array[0][0] != feats_array[0][1]
|
||||||
|
|
||||||
|
|
||||||
def test_tag():
|
def test_tag(EN):
|
||||||
text = u'A nice sentence.'
|
text = u'A nice sentence.'
|
||||||
tokens = EN(text)
|
tokens = EN(text)
|
||||||
assert tokens[0].tag != tokens[1].tag != tokens[2].tag != tokens[3].tag
|
assert tokens[0].tag != tokens[1].tag != tokens[2].tag != tokens[3].tag
|
||||||
|
@ -29,7 +26,7 @@ def test_tag():
|
||||||
assert feats_array[3][1] == tokens[3].tag
|
assert feats_array[3][1] == tokens[3].tag
|
||||||
|
|
||||||
|
|
||||||
def test_dep():
|
def test_dep(EN):
|
||||||
text = u'A nice sentence.'
|
text = u'A nice sentence.'
|
||||||
tokens = EN(text)
|
tokens = EN(text)
|
||||||
feats_array = tokens.to_array((attrs.ORTH, attrs.DEP))
|
feats_array = tokens.to_array((attrs.ORTH, attrs.DEP))
|
||||||
|
|
|
@ -6,16 +6,10 @@ from spacy.en.attrs import IS_STOP
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
nlp = English()
|
|
||||||
|
|
||||||
|
def test_strings(EN):
|
||||||
@pytest.fixture
|
tokens = EN(u'Give it back! He pleaded.')
|
||||||
def token():
|
token = tokens[0]
|
||||||
tokens = nlp(u'Give it back! He pleaded.')
|
|
||||||
return tokens[0]
|
|
||||||
|
|
||||||
|
|
||||||
def test_strings(token):
|
|
||||||
assert token.orth_ == 'Give'
|
assert token.orth_ == 'Give'
|
||||||
assert token.lower_ == 'give'
|
assert token.lower_ == 'give'
|
||||||
assert token.shape_ == 'Xxxx'
|
assert token.shape_ == 'Xxxx'
|
||||||
|
@ -27,13 +21,16 @@ def test_strings(token):
|
||||||
assert token.dep_ == 'ROOT'
|
assert token.dep_ == 'ROOT'
|
||||||
|
|
||||||
|
|
||||||
def test_flags(token):
|
def test_flags(EN):
|
||||||
|
tokens = EN(u'Give it back! He pleaded.')
|
||||||
|
token = tokens[0]
|
||||||
|
|
||||||
assert token.check_flag(IS_ALPHA)
|
assert token.check_flag(IS_ALPHA)
|
||||||
assert not token.check_flag(IS_DIGIT)
|
assert not token.check_flag(IS_DIGIT)
|
||||||
# TODO: Test more of these, esp. if a bug is found
|
# TODO: Test more of these, esp. if a bug is found
|
||||||
|
|
||||||
|
|
||||||
def test_single_token_string():
|
def test_single_token_string(EN):
|
||||||
nlp = English()
|
|
||||||
tokens = nlp(u'foobar')
|
tokens = EN(u'foobar')
|
||||||
assert tokens[0].string == 'foobar'
|
assert tokens[0].string == 'foobar'
|
||||||
|
|
|
@ -4,9 +4,11 @@ import gc
|
||||||
|
|
||||||
from spacy.en import English
|
from spacy.en import English
|
||||||
|
|
||||||
|
# Let this have its own instances, as we have to be careful about memory here
|
||||||
|
# that's the point, after all
|
||||||
|
|
||||||
def get_orphan_token(text, i):
|
def get_orphan_token(text, i):
|
||||||
nlp = English()
|
nlp = English(load_vectors=False)
|
||||||
tokens = nlp(text)
|
tokens = nlp(text)
|
||||||
gc.collect()
|
gc.collect()
|
||||||
token = tokens[i]
|
token = tokens[i]
|
||||||
|
|
|
@ -1,16 +1,10 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from spacy.en import English
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
def test_getitem(EN):
|
||||||
def tokens():
|
tokens = EN(u'Give it back! He pleaded.')
|
||||||
nlp = English()
|
|
||||||
return nlp(u'Give it back! He pleaded.')
|
|
||||||
|
|
||||||
|
|
||||||
def test_getitem(tokens):
|
|
||||||
assert tokens[0].orth_ == 'Give'
|
assert tokens[0].orth_ == 'Give'
|
||||||
assert tokens[-1].orth_ == '.'
|
assert tokens[-1].orth_ == '.'
|
||||||
with pytest.raises(IndexError):
|
with pytest.raises(IndexError):
|
||||||
|
|
|
@ -5,11 +5,6 @@ from spacy.en import English
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def EN():
|
|
||||||
return English()
|
|
||||||
|
|
||||||
|
|
||||||
def test_vec(EN):
|
def test_vec(EN):
|
||||||
hype = EN.vocab['hype']
|
hype = EN.vocab['hype']
|
||||||
assert hype.orth_ == 'hype'
|
assert hype.orth_ == 'hype'
|
||||||
|
|
|
@ -1,19 +1,12 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
|
def test_range_iter(en_vocab):
|
||||||
|
for i in range(len(en_vocab)):
|
||||||
|
lex = en_vocab[i]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
def test_iter(en_vocab):
|
||||||
def EN():
|
|
||||||
return English()
|
|
||||||
|
|
||||||
|
|
||||||
def test_range_iter(EN):
|
|
||||||
for i in range(len(EN.vocab)):
|
|
||||||
lex = EN.vocab[i]
|
|
||||||
|
|
||||||
|
|
||||||
def test_iter(EN):
|
|
||||||
i = 0
|
i = 0
|
||||||
for lex in EN.vocab:
|
for lex in en_vocab:
|
||||||
i += 1
|
i += 1
|
||||||
|
|
|
@ -2,28 +2,22 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
from spacy.en.attrs import *
|
from spacy.en.attrs import *
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
def test_is_alpha(en_vocab):
|
||||||
def EN():
|
the = en_vocab['the']
|
||||||
return English()
|
|
||||||
|
|
||||||
|
|
||||||
def test_is_alpha(EN):
|
|
||||||
the = EN.vocab['the']
|
|
||||||
assert the.flags & (1 << IS_ALPHA)
|
assert the.flags & (1 << IS_ALPHA)
|
||||||
year = EN.vocab['1999']
|
year = en_vocab['1999']
|
||||||
assert not year.flags & (1 << IS_ALPHA)
|
assert not year.flags & (1 << IS_ALPHA)
|
||||||
mixed = EN.vocab['hello1']
|
mixed = en_vocab['hello1']
|
||||||
assert not mixed.flags & (1 << IS_ALPHA)
|
assert not mixed.flags & (1 << IS_ALPHA)
|
||||||
|
|
||||||
|
|
||||||
def test_is_digit(EN):
|
def test_is_digit(en_vocab):
|
||||||
the = EN.vocab['the']
|
the = en_vocab['the']
|
||||||
assert not the.flags & (1 << IS_DIGIT)
|
assert not the.flags & (1 << IS_DIGIT)
|
||||||
year = EN.vocab['1999']
|
year = en_vocab['1999']
|
||||||
assert year.flags & (1 << IS_DIGIT)
|
assert year.flags & (1 << IS_DIGIT)
|
||||||
mixed = EN.vocab['hello1']
|
mixed = en_vocab['hello1']
|
||||||
assert not mixed.flags & (1 << IS_DIGIT)
|
assert not mixed.flags & (1 << IS_DIGIT)
|
||||||
|
|
|
@ -1,18 +0,0 @@
|
||||||
"""Test suspected freeing of strings"""
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def EN():
|
|
||||||
return English()
|
|
||||||
|
|
||||||
|
|
||||||
def test_one(EN):
|
|
||||||
tokens = EN('Betty Botter bought a pound of butter.')
|
|
||||||
assert tokens[0].orth_ == 'Betty'
|
|
||||||
tokens2 = EN('Betty also bought a pound of butter.')
|
|
||||||
assert tokens2[0].orth_ == 'Betty'
|
|
|
@ -1,34 +1,27 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy.en import English
|
|
||||||
|
def test_neq(en_vocab):
|
||||||
|
addr = en_vocab['Hello']
|
||||||
|
assert en_vocab['bye'].orth != addr.orth
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
def test_eq(en_vocab):
|
||||||
def EN():
|
addr = en_vocab['Hello']
|
||||||
return English()
|
assert en_vocab['Hello'].orth == addr.orth
|
||||||
|
|
||||||
|
|
||||||
def test_neq(EN):
|
def test_case_neq(en_vocab):
|
||||||
addr = EN.vocab['Hello']
|
addr = en_vocab['Hello']
|
||||||
assert EN.vocab['bye'].orth != addr.orth
|
assert en_vocab['hello'].orth != addr.orth
|
||||||
|
|
||||||
|
|
||||||
def test_eq(EN):
|
def test_punct_neq(en_vocab):
|
||||||
addr = EN.vocab['Hello']
|
addr = en_vocab['Hello']
|
||||||
assert EN.vocab['Hello'].orth == addr.orth
|
assert en_vocab['Hello,'].orth != addr.orth
|
||||||
|
|
||||||
|
|
||||||
def test_case_neq(EN):
|
def test_shape_attr(en_vocab):
|
||||||
addr = EN.vocab['Hello']
|
example = en_vocab['example']
|
||||||
assert EN.vocab['hello'].orth != addr.orth
|
|
||||||
|
|
||||||
|
|
||||||
def test_punct_neq(EN):
|
|
||||||
addr = EN.vocab['Hello']
|
|
||||||
assert EN.vocab['Hello,'].orth != addr.orth
|
|
||||||
|
|
||||||
|
|
||||||
def test_shape_attr(EN):
|
|
||||||
example = EN.vocab['example']
|
|
||||||
assert example.orth != example.shape
|
assert example.orth != example.shape
|
||||||
|
|
Loading…
Reference in New Issue
Block a user