mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-15 20:16:23 +03:00
3e8f136ba7
* Improve load_language_data helper * WIP: Add Lookups implementation * Start moving lemma data over to JSON * WIP: move data over for more languages * Convert more languages * Fix lemmatizer fixtures in tests * Finish conversion * Auto-format JSON files * Fix test for now * Make sure tables are stored on instance * Update docstrings * Update docstrings and errors * Update test * Add Lookups.__len__ * Add serialization methods * Add Lookups.remove_table * Use msgpack for serialization to disk * Fix file exists check * Try using OrderedDict for everything * Update .flake8 [ci skip] * Try fixing serialization * Update test_lookups.py * Update test_serialize_vocab_strings.py * Fix serialization for lookups * Fix lookups * Fix lookups * Fix lookups * Try to fix serialization * Try to fix serialization * Try to fix serialization * Try to fix serialization * Give up on serialization test * Xfail more serialization tests for 3.5 * Fix lookups for 2.7
115 lines
3.6 KiB
Python
115 lines
3.6 KiB
Python
# coding: utf-8
|
|
from __future__ import unicode_literals
|
|
|
|
import pytest
|
|
from spacy.lookups import Lookups
|
|
from spacy.vocab import Vocab
|
|
|
|
from ..util import make_tempdir
|
|
|
|
|
|
def test_lookups_api():
|
|
table_name = "test"
|
|
data = {"foo": "bar", "hello": "world"}
|
|
lookups = Lookups()
|
|
lookups.add_table(table_name, data)
|
|
assert len(lookups) == 1
|
|
assert table_name in lookups
|
|
assert lookups.has_table(table_name)
|
|
table = lookups.get_table(table_name)
|
|
assert table.name == table_name
|
|
assert len(table) == 2
|
|
assert table.get("hello") == "world"
|
|
table.set("a", "b")
|
|
assert table.get("a") == "b"
|
|
table = lookups.get_table(table_name)
|
|
assert len(table) == 3
|
|
with pytest.raises(KeyError):
|
|
lookups.get_table("xyz")
|
|
with pytest.raises(ValueError):
|
|
lookups.add_table(table_name)
|
|
table = lookups.remove_table(table_name)
|
|
assert table.name == table_name
|
|
assert len(lookups) == 0
|
|
assert table_name not in lookups
|
|
with pytest.raises(KeyError):
|
|
lookups.get_table(table_name)
|
|
|
|
|
|
# This fails on Python 3.5
|
|
@pytest.mark.xfail
|
|
def test_lookups_to_from_bytes():
|
|
lookups = Lookups()
|
|
lookups.add_table("table1", {"foo": "bar", "hello": "world"})
|
|
lookups.add_table("table2", {"a": 1, "b": 2, "c": 3})
|
|
lookups_bytes = lookups.to_bytes()
|
|
new_lookups = Lookups()
|
|
new_lookups.from_bytes(lookups_bytes)
|
|
assert len(new_lookups) == 2
|
|
assert "table1" in new_lookups
|
|
assert "table2" in new_lookups
|
|
table1 = new_lookups.get_table("table1")
|
|
assert len(table1) == 2
|
|
assert table1.get("foo") == "bar"
|
|
table2 = new_lookups.get_table("table2")
|
|
assert len(table2) == 3
|
|
assert table2.get("b") == 2
|
|
assert new_lookups.to_bytes() == lookups_bytes
|
|
|
|
# This fails on Python 3.5
|
|
@pytest.mark.xfail
|
|
def test_lookups_to_from_disk():
|
|
lookups = Lookups()
|
|
lookups.add_table("table1", {"foo": "bar", "hello": "world"})
|
|
lookups.add_table("table2", {"a": 1, "b": 2, "c": 3})
|
|
with make_tempdir() as tmpdir:
|
|
lookups.to_disk(tmpdir)
|
|
new_lookups = Lookups()
|
|
new_lookups.from_disk(tmpdir)
|
|
assert len(new_lookups) == 2
|
|
assert "table1" in new_lookups
|
|
assert "table2" in new_lookups
|
|
table1 = new_lookups.get_table("table1")
|
|
assert len(table1) == 2
|
|
assert table1.get("foo") == "bar"
|
|
table2 = new_lookups.get_table("table2")
|
|
assert len(table2) == 3
|
|
assert table2.get("b") == 2
|
|
|
|
# This fails on Python 3.5
|
|
@pytest.mark.xfail
|
|
def test_lookups_to_from_bytes_via_vocab():
|
|
table_name = "test"
|
|
vocab = Vocab()
|
|
vocab.lookups.add_table(table_name, {"foo": "bar", "hello": "world"})
|
|
assert len(vocab.lookups) == 1
|
|
assert table_name in vocab.lookups
|
|
vocab_bytes = vocab.to_bytes()
|
|
new_vocab = Vocab()
|
|
new_vocab.from_bytes(vocab_bytes)
|
|
assert len(new_vocab.lookups) == 1
|
|
assert table_name in new_vocab.lookups
|
|
table = new_vocab.lookups.get_table(table_name)
|
|
assert len(table) == 2
|
|
assert table.get("hello") == "world"
|
|
assert new_vocab.to_bytes() == vocab_bytes
|
|
|
|
|
|
# This fails on Python 3.5
|
|
@pytest.mark.xfail
|
|
def test_lookups_to_from_disk_via_vocab():
|
|
table_name = "test"
|
|
vocab = Vocab()
|
|
vocab.lookups.add_table(table_name, {"foo": "bar", "hello": "world"})
|
|
assert len(vocab.lookups) == 1
|
|
assert table_name in vocab.lookups
|
|
with make_tempdir() as tmpdir:
|
|
vocab.to_disk(tmpdir)
|
|
new_vocab = Vocab()
|
|
new_vocab.from_disk(tmpdir)
|
|
assert len(new_vocab.lookups) == 1
|
|
assert table_name in new_vocab.lookups
|
|
table = new_vocab.lookups.get_table(table_name)
|
|
assert len(table) == 2
|
|
assert table.get("hello") == "world"
|