mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 00:04:15 +03:00
* Upd serialization tests
This commit is contained in:
parent
6a95409cd2
commit
6deb1e84b6
|
@ -17,7 +17,7 @@ from spacy.serialize.packer import Packer
|
||||||
from spacy.serialize.bits import BitArray
|
from spacy.serialize.bits import BitArray
|
||||||
|
|
||||||
|
|
||||||
def get_lex_props(string, prob=-22):
|
def get_lex_props(string, prob=-22, is_oov=False):
|
||||||
return {
|
return {
|
||||||
'flags': 0,
|
'flags': 0,
|
||||||
'length': len(string),
|
'length': len(string),
|
||||||
|
@ -120,3 +120,22 @@ def test_packer_annotated(tokenizer):
|
||||||
assert [t.tag_ for t in result] == ['DT', 'NN', 'VBD']
|
assert [t.tag_ for t in result] == ['DT', 'NN', 'VBD']
|
||||||
assert [t.dep_ for t in result] == ['det', 'nsubj', 'ROOT']
|
assert [t.dep_ for t in result] == ['det', 'nsubj', 'ROOT']
|
||||||
assert [(t.head.i - t.i) for t in result] == [1, 1, 0]
|
assert [(t.head.i - t.i) for t in result] == [1, 1, 0]
|
||||||
|
|
||||||
|
|
||||||
|
def test_packer_bad_chars(tokenizer):
|
||||||
|
string = u'naja gut, is eher bl\xf6d und nicht mit reddit.com/digg.com vergleichbar; vielleicht auf dem weg dahin'
|
||||||
|
packer = Packer(tokenizer.vocab, [])
|
||||||
|
|
||||||
|
doc = tokenizer(string)
|
||||||
|
bits = packer.pack(doc)
|
||||||
|
result = packer.unpack(bits)
|
||||||
|
assert result.string == doc.string
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.models
|
||||||
|
def test_packer_bad_chars(EN):
|
||||||
|
string = u'naja gut, is eher bl\xf6d und nicht mit reddit.com/digg.com vergleichbar; vielleicht auf dem weg dahin'
|
||||||
|
doc = EN(string)
|
||||||
|
byte_string = doc.to_bytes()
|
||||||
|
result = Doc(EN.vocab).from_bytes(byte_string)
|
||||||
|
assert [t.tag_ for t in result] == [t.tag_ for t in doc]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user