From 3e6e1f0251465a1c05ae9df0e82313711a2691e1 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 10 Jan 2017 19:24:10 +0100 Subject: [PATCH] Tidy up regression tests --- spacy/tests/regression/test_issue351.py | 2 ++ spacy/tests/regression/test_issue360.py | 4 +++- spacy/tests/regression/test_issue429.py | 6 ++++-- spacy/tests/regression/test_issue587.py | 6 +++++- spacy/tests/regression/test_issue588.py | 6 +++++- spacy/tests/regression/test_issue589.py | 7 +++++-- spacy/tests/regression/test_issue590.py | 5 ++++- spacy/tests/regression/test_issue595.py | 10 ++++++---- spacy/tests/regression/test_issue599.py | 4 ++++ spacy/tests/regression/test_issue600.py | 4 +++- spacy/tests/regression/test_issue605.py | 11 +++++++---- spacy/tests/regression/test_issue615.py | 8 +++++--- spacy/tests/regression/test_issue617.py | 3 +++ 13 files changed, 56 insertions(+), 20 deletions(-) diff --git a/spacy/tests/regression/test_issue351.py b/spacy/tests/regression/test_issue351.py index 84d4398c5..06f24715c 100644 --- a/spacy/tests/regression/test_issue351.py +++ b/spacy/tests/regression/test_issue351.py @@ -1,4 +1,6 @@ +# coding: utf-8 from __future__ import unicode_literals + from ...en import English import pytest diff --git a/spacy/tests/regression/test_issue360.py b/spacy/tests/regression/test_issue360.py index 018289030..d0b55032a 100644 --- a/spacy/tests/regression/test_issue360.py +++ b/spacy/tests/regression/test_issue360.py @@ -1,4 +1,6 @@ +# coding: utf-8 from __future__ import unicode_literals + from ...en import English import pytest @@ -10,5 +12,5 @@ def en_tokenizer(): def test_big_ellipsis(en_tokenizer): - tokens = en_tokenizer(u'$45...............Asking') + tokens = en_tokenizer('$45...............Asking') assert len(tokens) > 2 diff --git a/spacy/tests/regression/test_issue429.py b/spacy/tests/regression/test_issue429.py index b3e6b2831..1412a54f2 100644 --- a/spacy/tests/regression/test_issue429.py +++ b/spacy/tests/regression/test_issue429.py @@ -1,9 +1,11 @@ +# coding: utf-8 from __future__ import unicode_literals -import pytest import spacy from spacy.attrs import ORTH +import pytest + @pytest.mark.models def test_issue429(): @@ -23,7 +25,7 @@ def test_issue429(): doc = nlp.tokenizer('a b c') nlp.tagger(doc) nlp.matcher(doc) - + for word in doc: print(word.text, word.ent_iob_, word.ent_type_) nlp.entity(doc) diff --git a/spacy/tests/regression/test_issue587.py b/spacy/tests/regression/test_issue587.py index 5b86801d6..8815b346a 100644 --- a/spacy/tests/regression/test_issue587.py +++ b/spacy/tests/regression/test_issue587.py @@ -1,14 +1,18 @@ +# coding: utf-8 +from __future__ import unicode_literals + import spacy import spacy.matcher from spacy.attrs import IS_PUNCT, ORTH import pytest + @pytest.mark.models def test_matcher_segfault(): nlp = spacy.load('en', parser=False, entity=False) matcher = spacy.matcher.Matcher(nlp.vocab) - content = u'''a b; c''' + content = '''a b; c''' matcher.add(entity_key='1', label='TEST', attrs={}, specs=[[{ORTH: 'a'}, {ORTH: 'b'}]]) matcher(nlp(content)) matcher.add(entity_key='2', label='TEST', attrs={}, specs=[[{ORTH: 'a'}, {ORTH: 'b'}, {IS_PUNCT: True}, {ORTH: 'c'}]]) diff --git a/spacy/tests/regression/test_issue588.py b/spacy/tests/regression/test_issue588.py index 0b7defe75..0b05ac74e 100644 --- a/spacy/tests/regression/test_issue588.py +++ b/spacy/tests/regression/test_issue588.py @@ -1,8 +1,12 @@ -import pytest +# coding: utf-8 +from __future__ import unicode_literals + from ...vocab import Vocab from ...tokens import Doc from ...matcher import Matcher +import pytest + def test_issue588(): matcher = Matcher(Vocab()) diff --git a/spacy/tests/regression/test_issue589.py b/spacy/tests/regression/test_issue589.py index bf2fda72a..bcbfb0a6a 100644 --- a/spacy/tests/regression/test_issue589.py +++ b/spacy/tests/regression/test_issue589.py @@ -1,10 +1,13 @@ -import pytest +# coding: utf-8 +from __future__ import unicode_literals from ...vocab import Vocab from ...tokens import Doc +import pytest + def test_issue589(): vocab = Vocab() vocab.strings.set_frozen(True) - doc = Doc(vocab, words=[u'whata']) + doc = Doc(vocab, words=['whata']) diff --git a/spacy/tests/regression/test_issue590.py b/spacy/tests/regression/test_issue590.py index a35d5d1a4..fedc9eaf4 100644 --- a/spacy/tests/regression/test_issue590.py +++ b/spacy/tests/regression/test_issue590.py @@ -1,9 +1,12 @@ +# coding: utf-8 from __future__ import unicode_literals + from ...attrs import * from ...matcher import Matcher from ...tokens import Doc from ...en import English + def test_overlapping_matches(): vocab = English.Defaults.create_vocab() doc = Doc(vocab, words=['n', '=', '1', ';', 'a', ':', '5', '%']) @@ -29,6 +32,6 @@ def test_overlapping_matches(): {ORTH: '='}, {LIKE_NUM: True}, ], label='b') - + matches = matcher(doc) assert len(matches) == 2 diff --git a/spacy/tests/regression/test_issue595.py b/spacy/tests/regression/test_issue595.py index 1f0ed3a3c..e61ff5273 100644 --- a/spacy/tests/regression/test_issue595.py +++ b/spacy/tests/regression/test_issue595.py @@ -1,11 +1,13 @@ +# coding: utf-8 from __future__ import unicode_literals -import pytest from ...symbols import POS, VERB, VerbForm_inf from ...tokens import Doc from ...vocab import Vocab from ...lemmatizer import Lemmatizer +import pytest + @pytest.fixture def index(): @@ -37,6 +39,6 @@ def vocab(lemmatizer, tag_map): def test_not_lemmatize_base_forms(vocab): doc = Doc(vocab, words=["Do", "n't", "feed", "the", "dog"]) feed = doc[2] - feed.tag_ = u'VB' - assert feed.text == u'feed' - assert feed.lemma_ == u'feed' + feed.tag_ = 'VB' + assert feed.text == 'feed' + assert feed.lemma_ == 'feed' diff --git a/spacy/tests/regression/test_issue599.py b/spacy/tests/regression/test_issue599.py index ce35c6db2..9f8721676 100644 --- a/spacy/tests/regression/test_issue599.py +++ b/spacy/tests/regression/test_issue599.py @@ -1,6 +1,10 @@ +# coding: utf-8 +from __future__ import unicode_literals + from ...tokens import Doc from ...vocab import Vocab + def test_issue599(): doc = Doc(Vocab()) doc.is_tagged = True diff --git a/spacy/tests/regression/test_issue600.py b/spacy/tests/regression/test_issue600.py index 90e700aed..5fc1bc68c 100644 --- a/spacy/tests/regression/test_issue600.py +++ b/spacy/tests/regression/test_issue600.py @@ -1,4 +1,6 @@ +# coding: utf-8 from __future__ import unicode_literals + from ...tokens import Doc from ...vocab import Vocab from ...attrs import POS @@ -6,4 +8,4 @@ from ...attrs import POS def test_issue600(): doc = Doc(Vocab(tag_map={'NN': {'pos': 'NOUN'}}), words=['hello']) - doc[0].tag_ = u'NN' + doc[0].tag_ = 'NN' diff --git a/spacy/tests/regression/test_issue605.py b/spacy/tests/regression/test_issue605.py index 64373950e..16bcea472 100644 --- a/spacy/tests/regression/test_issue605.py +++ b/spacy/tests/regression/test_issue605.py @@ -1,3 +1,6 @@ +# coding: utf-8 +from __future__ import unicode_literals + from ...attrs import LOWER, ORTH from ...tokens import Doc from ...vocab import Vocab @@ -9,16 +12,16 @@ def return_false(doc, ent_id, label, start, end): def test_matcher_accept(): - doc = Doc(Vocab(), words=[u'The', u'golf', u'club', u'is', u'broken']) + doc = Doc(Vocab(), words=['The', 'golf', 'club', 'is', 'broken']) - golf_pattern = [ + golf_pattern = [ { ORTH: "golf"}, { ORTH: "club"} ] matcher = Matcher(doc.vocab) - matcher.add_entity(u'Sport_Equipment', acceptor=return_false) - matcher.add_pattern(u"Sport_Equipment", golf_pattern) + matcher.add_entity('Sport_Equipment', acceptor=return_false) + matcher.add_pattern("Sport_Equipment", golf_pattern) match = matcher(doc) assert match == [] diff --git a/spacy/tests/regression/test_issue615.py b/spacy/tests/regression/test_issue615.py index 83f44037b..71c6de366 100644 --- a/spacy/tests/regression/test_issue615.py +++ b/spacy/tests/regression/test_issue615.py @@ -1,5 +1,7 @@ +# coding: utf-8 from __future__ import unicode_literals -import spacy + +import spacy from spacy.attrs import ORTH @@ -17,10 +19,10 @@ def merge_phrases(matcher, doc, i, matches): def test_entity_ID_assignment(): nlp = spacy.en.English() - text = u"""The golf club is broken""" + text = """The golf club is broken""" doc = nlp(text) - golf_pattern = [ + golf_pattern = [ { ORTH: "golf"}, { ORTH: "club"} ] diff --git a/spacy/tests/regression/test_issue617.py b/spacy/tests/regression/test_issue617.py index 600445c2f..0f4d63b97 100644 --- a/spacy/tests/regression/test_issue617.py +++ b/spacy/tests/regression/test_issue617.py @@ -1,3 +1,6 @@ +# coding: utf-8 +from __future__ import unicode_literals + from ...vocab import Vocab