* Fix serializer tests for new attr scheme

2025-07-10 16:22:29 +03:00 · 2015-08-26 19:22:26 +02:00 · 2015-08-26 19:22:26 +02:00 · ff9db9f3ae
commit ff9db9f3ae
parent 658c4a3930
2 changed files with 9 additions and 39 deletions
--- a/tests/serialize/test_codecs.py
+++ b/tests/serialize/test_codecs.py
@ -41,25 +41,10 @@ def test_attribute():
 def test_vocab_codec():
    def get_lex_props(string, prob):
        return {
            'flags': 0,
            'length': len(string),
            'orth': string,
            'lower': string, 
            'norm': string,
            'shape': string,
            'prefix': string[0],
            'suffix': string[-3:],
            'cluster': 0,
            'prob': prob,
            'sentiment': 0
        }
    vocab = Vocab()
-    vocab['dog'] = get_lex_props('dog', 0.001)
+    lex = vocab['dog']
-    vocab['the'] = get_lex_props('the', 0.05)
+    lex = vocab['the']
-    vocab['jumped'] = get_lex_props('jumped', 0.005)
+    lex = vocab['jumped']
    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])
--- a/tests/serialize/test_packer.py
+++ b/tests/serialize/test_packer.py
@ -5,6 +5,7 @@ import re
 import pytest
 import numpy
 from spacy.language import Language
 from spacy.vocab import Vocab
 from spacy.tokens.doc import Doc
 from spacy.tokenizer import Tokenizer
@ -17,30 +18,14 @@ from spacy.serialize.packer import Packer
 from spacy.serialize.bits import BitArray
 def get_lex_props(string, prob=-22, is_oov=False):
    return {
        'flags': 0,
        'length': len(string),
        'orth': string,
        'lower': string, 
        'norm': string,
        'shape': string,
        'prefix': string[0],
        'suffix': string[-3:],
        'cluster': 0,
        'prob': prob,
        'sentiment': 0
    }
@pytest.fixture
 def vocab():
-    vocab = Vocab(get_lex_props=get_lex_props)
+    vocab = Vocab(Language.default_lex_attrs())
-    vocab['dog'] = get_lex_props('dog', 0.001)
+    lex = vocab['dog']
    assert vocab[vocab.strings['dog']].orth_ == 'dog'
-    vocab['the'] = get_lex_props('the', 0.01)
+    lex  = vocab['the']
-    vocab['quick'] = get_lex_props('quick', 0.005)
+    lex = vocab['quick']
-    vocab['jumped'] = get_lex_props('jumped', 0.007)
+    lex = vocab['jumped']
    return vocab