mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
fix unicode literals
This commit is contained in:
parent
f4af47ce4a
commit
ddc73b11a9
|
@ -17,13 +17,13 @@ def test_kb_valid_entities(nlp):
|
|||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
||||
|
||||
# adding entities
|
||||
mykb.add_entity(entity=u'Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity=u'Q2', prob=0.5, entity_vector=[2])
|
||||
mykb.add_entity(entity=u'Q3', prob=0.5, entity_vector=[3])
|
||||
mykb.add_entity(entity='Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity='Q2', prob=0.5, entity_vector=[2])
|
||||
mykb.add_entity(entity='Q3', prob=0.5, entity_vector=[3])
|
||||
|
||||
# adding aliases
|
||||
mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.8, 0.2])
|
||||
mykb.add_alias(alias=u'adam', entities=[u'Q2'], probabilities=[0.9])
|
||||
mykb.add_alias(alias='douglas', entities=['Q2', 'Q3'], probabilities=[0.8, 0.2])
|
||||
mykb.add_alias(alias='adam', entities=['Q2'], probabilities=[0.9])
|
||||
|
||||
# test the size of the corresponding KB
|
||||
assert(mykb.get_size_entities() == 3)
|
||||
|
@ -35,13 +35,13 @@ def test_kb_invalid_entities(nlp):
|
|||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
||||
|
||||
# adding entities
|
||||
mykb.add_entity(entity=u'Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity=u'Q2', prob=0.2, entity_vector=[2])
|
||||
mykb.add_entity(entity=u'Q3', prob=0.5, entity_vector=[3])
|
||||
mykb.add_entity(entity='Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity='Q2', prob=0.2, entity_vector=[2])
|
||||
mykb.add_entity(entity='Q3', prob=0.5, entity_vector=[3])
|
||||
|
||||
# adding aliases - should fail because one of the given IDs is not valid
|
||||
with pytest.raises(ValueError):
|
||||
mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q342'], probabilities=[0.8, 0.2])
|
||||
mykb.add_alias(alias='douglas', entities=['Q2', 'Q342'], probabilities=[0.8, 0.2])
|
||||
|
||||
|
||||
def test_kb_invalid_probabilities(nlp):
|
||||
|
@ -49,13 +49,13 @@ def test_kb_invalid_probabilities(nlp):
|
|||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
||||
|
||||
# adding entities
|
||||
mykb.add_entity(entity=u'Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity=u'Q2', prob=0.2, entity_vector=[2])
|
||||
mykb.add_entity(entity=u'Q3', prob=0.5, entity_vector=[3])
|
||||
mykb.add_entity(entity='Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity='Q2', prob=0.2, entity_vector=[2])
|
||||
mykb.add_entity(entity='Q3', prob=0.5, entity_vector=[3])
|
||||
|
||||
# adding aliases - should fail because the sum of the probabilities exceeds 1
|
||||
with pytest.raises(ValueError):
|
||||
mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.8, 0.4])
|
||||
mykb.add_alias(alias='douglas', entities=['Q2', 'Q3'], probabilities=[0.8, 0.4])
|
||||
|
||||
|
||||
def test_kb_invalid_combination(nlp):
|
||||
|
@ -63,13 +63,13 @@ def test_kb_invalid_combination(nlp):
|
|||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
||||
|
||||
# adding entities
|
||||
mykb.add_entity(entity=u'Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity=u'Q2', prob=0.2, entity_vector=[2])
|
||||
mykb.add_entity(entity=u'Q3', prob=0.5, entity_vector=[3])
|
||||
mykb.add_entity(entity='Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity='Q2', prob=0.2, entity_vector=[2])
|
||||
mykb.add_entity(entity='Q3', prob=0.5, entity_vector=[3])
|
||||
|
||||
# adding aliases - should fail because the entities and probabilities vectors are not of equal length
|
||||
with pytest.raises(ValueError):
|
||||
mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.3, 0.4, 0.1])
|
||||
mykb.add_alias(alias='douglas', entities=['Q2', 'Q3'], probabilities=[0.3, 0.4, 0.1])
|
||||
|
||||
|
||||
def test_kb_invalid_entity_vector(nlp):
|
||||
|
@ -77,11 +77,11 @@ def test_kb_invalid_entity_vector(nlp):
|
|||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)
|
||||
|
||||
# adding entities
|
||||
mykb.add_entity(entity=u'Q1', prob=0.9, entity_vector=[1, 2, 3])
|
||||
mykb.add_entity(entity='Q1', prob=0.9, entity_vector=[1, 2, 3])
|
||||
|
||||
# this should fail because the kb's expected entity vector length is 3
|
||||
with pytest.raises(ValueError):
|
||||
mykb.add_entity(entity=u'Q2', prob=0.2, entity_vector=[2])
|
||||
mykb.add_entity(entity='Q2', prob=0.2, entity_vector=[2])
|
||||
|
||||
|
||||
def test_candidate_generation(nlp):
|
||||
|
@ -89,15 +89,15 @@ def test_candidate_generation(nlp):
|
|||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
||||
|
||||
# adding entities
|
||||
mykb.add_entity(entity=u'Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity=u'Q2', prob=0.2, entity_vector=[2])
|
||||
mykb.add_entity(entity=u'Q3', prob=0.5, entity_vector=[3])
|
||||
mykb.add_entity(entity='Q1', prob=0.9, entity_vector=[1])
|
||||
mykb.add_entity(entity='Q2', prob=0.2, entity_vector=[2])
|
||||
mykb.add_entity(entity='Q3', prob=0.5, entity_vector=[3])
|
||||
|
||||
# adding aliases
|
||||
mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.8, 0.2])
|
||||
mykb.add_alias(alias=u'adam', entities=[u'Q2'], probabilities=[0.9])
|
||||
mykb.add_alias(alias='douglas', entities=['Q2', 'Q3'], probabilities=[0.8, 0.2])
|
||||
mykb.add_alias(alias='adam', entities=['Q2'], probabilities=[0.9])
|
||||
|
||||
# test the size of the relevant candidates
|
||||
assert(len(mykb.get_candidates(u'douglas')) == 2)
|
||||
assert(len(mykb.get_candidates(u'adam')) == 1)
|
||||
assert(len(mykb.get_candidates(u'shrubbery')) == 0)
|
||||
assert(len(mykb.get_candidates('douglas')) == 2)
|
||||
assert(len(mykb.get_candidates('adam')) == 1)
|
||||
assert(len(mykb.get_candidates('shrubbery')) == 0)
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..util import make_tempdir
|
||||
from ...util import ensure_path
|
||||
|
||||
|
@ -16,7 +18,6 @@ def test_serialize_kb_disk(en_vocab):
|
|||
if not dir_path.exists():
|
||||
dir_path.mkdir()
|
||||
file_path = dir_path / "kb"
|
||||
print(file_path, type(file_path))
|
||||
kb1.dump(str(file_path))
|
||||
|
||||
kb2 = KnowledgeBase(vocab=en_vocab, entity_vector_length=3)
|
||||
|
@ -29,14 +30,14 @@ def test_serialize_kb_disk(en_vocab):
|
|||
def _get_dummy_kb(vocab):
|
||||
kb = KnowledgeBase(vocab=vocab, entity_vector_length=3)
|
||||
|
||||
kb.add_entity(entity=u'Q53', prob=0.33, entity_vector=[0, 5, 3])
|
||||
kb.add_entity(entity=u'Q17', prob=0.2, entity_vector=[7, 1, 0])
|
||||
kb.add_entity(entity=u'Q007', prob=0.7, entity_vector=[0, 0, 7])
|
||||
kb.add_entity(entity=u'Q44', prob=0.4, entity_vector=[4, 4, 4])
|
||||
kb.add_entity(entity='Q53', prob=0.33, entity_vector=[0, 5, 3])
|
||||
kb.add_entity(entity='Q17', prob=0.2, entity_vector=[7, 1, 0])
|
||||
kb.add_entity(entity='Q007', prob=0.7, entity_vector=[0, 0, 7])
|
||||
kb.add_entity(entity='Q44', prob=0.4, entity_vector=[4, 4, 4])
|
||||
|
||||
kb.add_alias(alias=u'double07', entities=[u'Q17', u'Q007'], probabilities=[0.1, 0.9])
|
||||
kb.add_alias(alias=u'guy', entities=[u'Q53', u'Q007', u'Q17', u'Q44'], probabilities=[0.3, 0.3, 0.2, 0.1])
|
||||
kb.add_alias(alias=u'random', entities=[u'Q007'], probabilities=[1.0])
|
||||
kb.add_alias(alias='double07', entities=['Q17', 'Q007'], probabilities=[0.1, 0.9])
|
||||
kb.add_alias(alias='guy', entities=['Q53', 'Q007', 'Q17', 'Q44'], probabilities=[0.3, 0.3, 0.2, 0.1])
|
||||
kb.add_alias(alias='random', entities=['Q007'], probabilities=[1.0])
|
||||
|
||||
return kb
|
||||
|
||||
|
@ -44,30 +45,30 @@ def _get_dummy_kb(vocab):
|
|||
def _check_kb(kb):
|
||||
# check entities
|
||||
assert kb.get_size_entities() == 4
|
||||
for entity_string in [u'Q53', u'Q17', u'Q007', u'Q44']:
|
||||
for entity_string in ['Q53', 'Q17', 'Q007', 'Q44']:
|
||||
assert entity_string in kb.get_entity_strings()
|
||||
for entity_string in [u'', u'Q0']:
|
||||
for entity_string in ['', 'Q0']:
|
||||
assert entity_string not in kb.get_entity_strings()
|
||||
|
||||
# check aliases
|
||||
assert kb.get_size_aliases() == 3
|
||||
for alias_string in [u'double07', u'guy', u'random']:
|
||||
for alias_string in ['double07', 'guy', 'random']:
|
||||
assert alias_string in kb.get_alias_strings()
|
||||
for alias_string in [u'nothingness', u'', u'randomnoise']:
|
||||
for alias_string in ['nothingness', '', 'randomnoise']:
|
||||
assert alias_string not in kb.get_alias_strings()
|
||||
|
||||
# check candidates & probabilities
|
||||
candidates = sorted(kb.get_candidates(u'double07'), key=lambda x: x.entity_)
|
||||
candidates = sorted(kb.get_candidates('double07'), key=lambda x: x.entity_)
|
||||
assert len(candidates) == 2
|
||||
|
||||
assert candidates[0].entity_ == u'Q007'
|
||||
assert candidates[0].entity_ == 'Q007'
|
||||
assert 0.6999 < candidates[0].entity_freq < 0.701
|
||||
assert candidates[0].entity_vector == [0, 0, 7]
|
||||
assert candidates[0].alias_ == u'double07'
|
||||
assert candidates[0].alias_ == 'double07'
|
||||
assert 0.899 < candidates[0].prior_prob < 0.901
|
||||
|
||||
assert candidates[1].entity_ == u'Q17'
|
||||
assert candidates[1].entity_ == 'Q17'
|
||||
assert 0.199 < candidates[1].entity_freq < 0.201
|
||||
assert candidates[1].entity_vector == [7, 1, 0]
|
||||
assert candidates[1].alias_ == u'double07'
|
||||
assert candidates[1].alias_ == 'double07'
|
||||
assert 0.099 < candidates[1].prior_prob < 0.101
|
||||
|
|
Loading…
Reference in New Issue
Block a user