* Fix serializer tests for new attr scheme

This commit is contained in:
Matthew Honnibal 2015-08-26 19:22:26 +02:00
parent 658c4a3930
commit ff9db9f3ae
2 changed files with 9 additions and 39 deletions

View File

@ -41,25 +41,10 @@ def test_attribute():
def test_vocab_codec(): def test_vocab_codec():
def get_lex_props(string, prob):
return {
'flags': 0,
'length': len(string),
'orth': string,
'lower': string,
'norm': string,
'shape': string,
'prefix': string[0],
'suffix': string[-3:],
'cluster': 0,
'prob': prob,
'sentiment': 0
}
vocab = Vocab() vocab = Vocab()
vocab['dog'] = get_lex_props('dog', 0.001) lex = vocab['dog']
vocab['the'] = get_lex_props('the', 0.05) lex = vocab['the']
vocab['jumped'] = get_lex_props('jumped', 0.005) lex = vocab['jumped']
codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab]) codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

View File

@ -5,6 +5,7 @@ import re
import pytest import pytest
import numpy import numpy
from spacy.language import Language
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.tokens.doc import Doc from spacy.tokens.doc import Doc
from spacy.tokenizer import Tokenizer from spacy.tokenizer import Tokenizer
@ -17,30 +18,14 @@ from spacy.serialize.packer import Packer
from spacy.serialize.bits import BitArray from spacy.serialize.bits import BitArray
def get_lex_props(string, prob=-22, is_oov=False):
return {
'flags': 0,
'length': len(string),
'orth': string,
'lower': string,
'norm': string,
'shape': string,
'prefix': string[0],
'suffix': string[-3:],
'cluster': 0,
'prob': prob,
'sentiment': 0
}
@pytest.fixture @pytest.fixture
def vocab(): def vocab():
vocab = Vocab(get_lex_props=get_lex_props) vocab = Vocab(Language.default_lex_attrs())
vocab['dog'] = get_lex_props('dog', 0.001) lex = vocab['dog']
assert vocab[vocab.strings['dog']].orth_ == 'dog' assert vocab[vocab.strings['dog']].orth_ == 'dog'
vocab['the'] = get_lex_props('the', 0.01) lex = vocab['the']
vocab['quick'] = get_lex_props('quick', 0.005) lex = vocab['quick']
vocab['jumped'] = get_lex_props('jumped', 0.007) lex = vocab['jumped']
return vocab return vocab