mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-03 15:23:41 +03:00
avoid empty aliases and improve UX and docs (#6840)
This commit is contained in:
parent
837a4f53c2
commit
24a697abb8
|
@ -470,6 +470,10 @@ class Errors:
|
||||||
"issue tracker: http://github.com/explosion/spaCy/issues")
|
"issue tracker: http://github.com/explosion/spaCy/issues")
|
||||||
|
|
||||||
# TODO: fix numbering after merging develop into master
|
# TODO: fix numbering after merging develop into master
|
||||||
|
E890 = ("Can not add the alias '{alias}' to the Knowledge base. "
|
||||||
|
"Each alias should be a meaningful string.")
|
||||||
|
E891 = ("Alias '{alias}' could not be added to the Knowledge base. "
|
||||||
|
"This is likely a bug in spaCy.")
|
||||||
E892 = ("Unknown function registry: '{name}'.\n\nAvailable names: {available}")
|
E892 = ("Unknown function registry: '{name}'.\n\nAvailable names: {available}")
|
||||||
E893 = ("Could not find function '{name}' in function registry '{reg_name}'. "
|
E893 = ("Could not find function '{name}' in function registry '{reg_name}'. "
|
||||||
"If you're using a custom function, make sure the code is available. "
|
"If you're using a custom function, make sure the code is available. "
|
||||||
|
|
|
@ -187,6 +187,10 @@ cdef class KnowledgeBase:
|
||||||
For a given alias, add its potential entities and prior probabilies to the KB.
|
For a given alias, add its potential entities and prior probabilies to the KB.
|
||||||
Return the alias_hash at the end
|
Return the alias_hash at the end
|
||||||
"""
|
"""
|
||||||
|
if alias is None or len(alias) == 0:
|
||||||
|
raise ValueError(Errors.E890.format(alias=alias))
|
||||||
|
|
||||||
|
previous_alias_nr = self.get_size_aliases()
|
||||||
# Throw an error if the length of entities and probabilities are not the same
|
# Throw an error if the length of entities and probabilities are not the same
|
||||||
if not len(entities) == len(probabilities):
|
if not len(entities) == len(probabilities):
|
||||||
raise ValueError(Errors.E132.format(alias=alias,
|
raise ValueError(Errors.E132.format(alias=alias,
|
||||||
|
@ -220,6 +224,8 @@ cdef class KnowledgeBase:
|
||||||
new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
|
new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
|
||||||
self._alias_index[alias_hash] = new_index
|
self._alias_index[alias_hash] = new_index
|
||||||
|
|
||||||
|
if previous_alias_nr + 1 != self.get_size_aliases():
|
||||||
|
raise RuntimeError(Errors.E891.format(alias=alias))
|
||||||
return alias_hash
|
return alias_hash
|
||||||
|
|
||||||
def append_alias(self, unicode alias, unicode entity, float prior_prob, ignore_warnings=False):
|
def append_alias(self, unicode alias, unicode entity, float prior_prob, ignore_warnings=False):
|
||||||
|
|
23
spacy/tests/regression/test_issue6730.py
Normal file
23
spacy/tests/regression/test_issue6730.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
import pytest
|
||||||
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
def test_issue6730(en_vocab):
|
||||||
|
"""Ensure that the KB does not accept empty strings, but otherwise IO works fine."""
|
||||||
|
from spacy.kb import KnowledgeBase
|
||||||
|
|
||||||
|
kb = KnowledgeBase(en_vocab, entity_vector_length=3)
|
||||||
|
kb.add_entity(entity="1", freq=148, entity_vector=[1, 2, 3])
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
kb.add_alias(alias="", entities=["1"], probabilities=[0.4])
|
||||||
|
assert kb.contains_alias("") is False
|
||||||
|
|
||||||
|
kb.add_alias(alias="x", entities=["1"], probabilities=[0.2])
|
||||||
|
kb.add_alias(alias="y", entities=["1"], probabilities=[0.1])
|
||||||
|
|
||||||
|
with make_tempdir() as tmp_dir:
|
||||||
|
kb.to_disk(tmp_dir)
|
||||||
|
kb.from_disk(tmp_dir)
|
||||||
|
assert kb.get_size_aliases() == 2
|
||||||
|
assert set(kb.get_alias_strings()) == {"x", "y"}
|
|
@ -82,7 +82,7 @@ Add an alias or mention to the knowledge base, specifying its potential KB
|
||||||
identifiers and their prior probabilities. The entity identifiers should refer
|
identifiers and their prior probabilities. The entity identifiers should refer
|
||||||
to entities previously added with [`add_entity`](/api/kb#add_entity) or
|
to entities previously added with [`add_entity`](/api/kb#add_entity) or
|
||||||
[`set_entities`](/api/kb#set_entities). The sum of the prior probabilities
|
[`set_entities`](/api/kb#set_entities). The sum of the prior probabilities
|
||||||
should not exceed 1.
|
should not exceed 1. Note that an empty string can not be used as alias.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -92,7 +92,7 @@ should not exceed 1.
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| --------------- | --------------------------------------------------------------------------------- |
|
| --------------- | --------------------------------------------------------------------------------- |
|
||||||
| `alias` | The textual mention or alias. ~~str~~ |
|
| `alias` | The textual mention or alias. Can not be the empty string. ~~str~~ |
|
||||||
| `entities` | The potential entities that the alias may refer to. ~~Iterable[Union[str, int]]~~ |
|
| `entities` | The potential entities that the alias may refer to. ~~Iterable[Union[str, int]]~~ |
|
||||||
| `probabilities` | The prior probabilities of each entity. ~~Iterable[float]~~ |
|
| `probabilities` | The prior probabilities of each entity. ~~Iterable[float]~~ |
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user