2017-04-23 22:06:46 +03:00
|
|
|
# coding: utf-8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from ..util import ensure_path
|
2017-05-29 11:13:42 +03:00
|
|
|
from ..util import model_to_bytes, model_from_bytes
|
2017-05-29 11:51:19 +03:00
|
|
|
from .. import util
|
|
|
|
from ..displacy import parse_deps, parse_ents
|
|
|
|
from ..tokens import Span
|
|
|
|
from .util import get_doc
|
2017-04-23 22:06:46 +03:00
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
2017-05-29 02:37:57 +03:00
|
|
|
from thinc.neural import Maxout, Softmax
|
|
|
|
from thinc.api import chain
|
2017-04-23 22:06:46 +03:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize('text', ['hello/world', 'hello world'])
|
|
|
|
def test_util_ensure_path_succeeds(text):
|
2017-05-29 11:51:19 +03:00
|
|
|
path = util.ensure_path(text)
|
2017-04-23 22:06:46 +03:00
|
|
|
assert isinstance(path, Path)
|
2017-05-29 02:37:57 +03:00
|
|
|
|
|
|
|
|
|
|
|
def test_simple_model_roundtrip_bytes():
|
|
|
|
model = Maxout(5, 10, pieces=2)
|
|
|
|
model.b += 1
|
|
|
|
data = model_to_bytes(model)
|
|
|
|
model.b -= 1
|
|
|
|
model_from_bytes(model, data)
|
|
|
|
assert model.b[0, 0] == 1
|
|
|
|
|
|
|
|
|
|
|
|
def test_multi_model_roundtrip_bytes():
|
|
|
|
model = chain(Maxout(5, 10, pieces=2), Maxout(2, 3))
|
|
|
|
model._layers[0].b += 1
|
|
|
|
model._layers[1].b += 2
|
|
|
|
data = model_to_bytes(model)
|
|
|
|
model._layers[0].b -= 1
|
|
|
|
model._layers[1].b -= 2
|
|
|
|
model_from_bytes(model, data)
|
|
|
|
assert model._layers[0].b[0, 0] == 1
|
|
|
|
assert model._layers[1].b[0, 0] == 2
|
|
|
|
|
|
|
|
|
2017-05-29 10:27:04 +03:00
|
|
|
def test_multi_model_load_missing_dims():
|
|
|
|
model = chain(Maxout(5, 10, pieces=2), Maxout(2, 3))
|
|
|
|
model._layers[0].b += 1
|
|
|
|
model._layers[1].b += 2
|
|
|
|
data = model_to_bytes(model)
|
|
|
|
|
|
|
|
model2 = chain(Maxout(5), Maxout())
|
|
|
|
model_from_bytes(model2, data)
|
|
|
|
assert model2._layers[0].b[0, 0] == 1
|
|
|
|
assert model2._layers[1].b[0, 0] == 2
|
2017-05-29 13:26:02 +03:00
|
|
|
|
2017-05-29 11:51:19 +03:00
|
|
|
@pytest.mark.parametrize('package', ['thinc'])
|
|
|
|
def test_util_is_package(package):
|
|
|
|
"""Test that an installed package via pip is recognised by util.is_package."""
|
|
|
|
assert util.is_package(package)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize('package', ['thinc'])
|
|
|
|
def test_util_get_package_path(package):
|
|
|
|
"""Test that a Path object is returned for a package name."""
|
|
|
|
path = util.get_package_path(package)
|
|
|
|
assert isinstance(path, Path)
|
|
|
|
|
|
|
|
|
|
|
|
def test_displacy_parse_ents(en_vocab):
|
|
|
|
"""Test that named entities on a Doc are converted into displaCy's format."""
|
|
|
|
doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
|
|
|
|
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings[u'ORG'])]
|
|
|
|
ents = parse_ents(doc)
|
|
|
|
assert isinstance(ents, dict)
|
|
|
|
assert ents['text'] == 'But Google is starting from behind '
|
|
|
|
assert ents['ents'] == [{'start': 4, 'end': 10, 'label': 'ORG'}]
|
|
|
|
|
|
|
|
|
|
|
|
def test_displacy_parse_deps(en_vocab):
|
|
|
|
"""Test that deps and tags on a Doc are converted into displaCy's format."""
|
|
|
|
words = ["This", "is", "a", "sentence"]
|
|
|
|
heads = [1, 0, 1, -2]
|
|
|
|
tags = ['DT', 'VBZ', 'DT', 'NN']
|
|
|
|
deps = ['nsubj', 'ROOT', 'det', 'attr']
|
|
|
|
doc = get_doc(en_vocab, words=words, heads=heads, tags=tags, deps=deps)
|
|
|
|
deps = parse_deps(doc)
|
|
|
|
assert isinstance(deps, dict)
|
|
|
|
assert deps['words'] == [{'text': 'This', 'tag': 'DT'},
|
|
|
|
{'text': 'is', 'tag': 'VBZ'},
|
|
|
|
{'text': 'a', 'tag': 'DT'},
|
|
|
|
{'text': 'sentence', 'tag': 'NN'}]
|
|
|
|
assert deps['arcs'] == [{'start': 0, 'end': 1, 'label': 'nsubj', 'dir': 'left'},
|
|
|
|
{'start': 2, 'end': 3, 'label': 'det', 'dir': 'left'},
|
|
|
|
{'start': 1, 'end': 3, 'label': 'attr', 'dir': 'right'}]
|