* Upd tests

2025-08-04 04:10:20 +03:00 · 2015-07-23 01:19:11 +02:00 · 2015-07-23 01:19:11 +02:00 · 3a5299faec
commit 3a5299faec
parent 20c2db08b7
8 changed files with 28 additions and 9 deletions
--- a/tests/parser/test_ner.py
+++ b/tests/parser/test_ner.py
@ -1,4 +1,6 @@
+import pytest

+@pytest.mark.models
 def test_simple_types(EN):
    tokens = EN(u'Mr. Best flew to New York on Saturday morning.')
    ents = list(tokens.ents)
--- a/tests/serialize/test_huffman.py
+++ b/tests/serialize/test_huffman.py
@ -96,6 +96,7 @@ def test_rosetta():
    assert my_exp_len == py_exp_len


+@pytest.mark.slow
 def test_vocab(EN):
    codec = HuffmanCodec([(w.orth, numpy.exp(w.prob)) for w in EN.vocab])
    expected_length = 0
@ -105,6 +106,7 @@ def test_vocab(EN):
    assert 8 < expected_length < 15


+@pytest.mark.slow
 def test_freqs():
    freqs = []
    words = []
--- a/tests/serialize/test_packer.py
+++ b/tests/serialize/test_packer.py
@ -120,5 +120,3 @@ def test_packer_annotated(tokenizer):
    assert [t.tag_ for t in result] == ['DT', 'NN', 'VBD']
    assert [t.dep_ for t in result] == ['det', 'nsubj', 'ROOT']
    assert [(t.head.i - t.i) for t in result] == [1, 1, 0]
-
-
--- a/tests/spans/test_merge.py
+++ b/tests/spans/test_merge.py
@ -1,6 +1,8 @@
 from __future__ import unicode_literals
+import pytest


+@pytest.mark.models
 def test_merge_tokens(EN):
    tokens = EN(u'Los Angeles start.')
    assert len(tokens) == 4
@ -12,6 +14,7 @@ def test_merge_tokens(EN):
    assert tokens[0].head.orth_ == 'start'


+@pytest.mark.models
 def test_merge_heads(EN):
    tokens = EN(u'I found a pilates class near work.')
    assert len(tokens) == 8
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@ -1,8 +1,9 @@
 # -*- coding: utf-8 -*-
 """Sphinx doctest is just too hard. Manually paste doctest examples here"""
 from spacy.en.attrs import IS_LOWER
+import pytest

-
+@pytest.mark.models
 def test_1():
    import spacy.en
    from spacy.parts_of_speech import ADV
@ -21,6 +22,7 @@ def test_1():
    assert o == -11.07155704498291


+@pytest.mark.models
 def test2():
    import spacy.en
    from spacy.parts_of_speech import ADV
@ -41,6 +43,7 @@ def test2():
    -11.07155704498291


+@pytest.mark.models
 def test3():
    import spacy.en
    from spacy.parts_of_speech import ADV
--- a/tests/tokens/test_token_api.py
+++ b/tests/tokens/test_token_api.py
@ -7,6 +7,7 @@ from spacy.en.attrs import IS_STOP
 import pytest


+@pytest.mark.models
 def test_strings(EN):
    tokens = EN(u'Give it back! He pleaded.')
    token = tokens[0]
--- a/tests/tokens/test_tokens_api.py
+++ b/tests/tokens/test_tokens_api.py
@ -5,7 +5,7 @@ from spacy.tokens import Doc
 import pytest


-def test_getitem(EN):
+def mest_getitem(EN):
    tokens = EN(u'Give it back! He pleaded.')
    assert tokens[0].orth_ == 'Give'
    assert tokens[-1].orth_ == '.'
@ -13,10 +13,19 @@ def test_getitem(EN):
        tokens[len(tokens)]


-def test_serialize(EN):
-    tokens = EN(u' Give it back! He pleaded. ')
-    packed = tokens.serialize()
-    new_tokens = Doc.deserialize(EN.vocab, packed)
+def mest_serialize(EN):
+    tokens = EN(u'Give it back! He pleaded.')
+    packed = tokens.to_bytes()
+    new_tokens = Doc(EN.vocab).from_bytes(packed)
+    assert tokens.string == new_tokens.string
+    assert [t.orth_ for t in tokens] == [t.orth_ for t in new_tokens]
+    assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
+
+
+def test_serialize_whitespace(EN):
+    tokens = EN(u' Give it back! He pleaded. ')
+    packed = tokens.to_bytes()
+    new_tokens = Doc(EN.vocab).from_bytes(packed)
    assert tokens.string == new_tokens.string
    assert [t.orth_ for t in tokens] == [t.orth_ for t in new_tokens]
    assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
--- a/tests/tokens/test_vec.py
+++ b/tests/tokens/test_vec.py
@ -4,13 +4,14 @@ from spacy.en import English

 import pytest

-
+@pytest.mark.vectors
 def test_vec(EN):
    hype = EN.vocab['hype']
    assert hype.orth_ == 'hype'
    assert 0.08 >= hype.repvec[0] > 0.07


+@pytest.mark.vectors
 def test_capitalized(EN):
    hype = EN.vocab['Hype']
    assert hype.orth_ == 'Hype'