mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
avoid empty aliases and improve UX and docs (#6840)
This commit is contained in:
parent
837a4f53c2
commit
24a697abb8
|
@ -470,6 +470,10 @@ class Errors:
|
|||
"issue tracker: http://github.com/explosion/spaCy/issues")
|
||||
|
||||
# TODO: fix numbering after merging develop into master
|
||||
E890 = ("Can not add the alias '{alias}' to the Knowledge base. "
|
||||
"Each alias should be a meaningful string.")
|
||||
E891 = ("Alias '{alias}' could not be added to the Knowledge base. "
|
||||
"This is likely a bug in spaCy.")
|
||||
E892 = ("Unknown function registry: '{name}'.\n\nAvailable names: {available}")
|
||||
E893 = ("Could not find function '{name}' in function registry '{reg_name}'. "
|
||||
"If you're using a custom function, make sure the code is available. "
|
||||
|
|
|
@ -187,6 +187,10 @@ cdef class KnowledgeBase:
|
|||
For a given alias, add its potential entities and prior probabilies to the KB.
|
||||
Return the alias_hash at the end
|
||||
"""
|
||||
if alias is None or len(alias) == 0:
|
||||
raise ValueError(Errors.E890.format(alias=alias))
|
||||
|
||||
previous_alias_nr = self.get_size_aliases()
|
||||
# Throw an error if the length of entities and probabilities are not the same
|
||||
if not len(entities) == len(probabilities):
|
||||
raise ValueError(Errors.E132.format(alias=alias,
|
||||
|
@ -220,6 +224,8 @@ cdef class KnowledgeBase:
|
|||
new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
|
||||
self._alias_index[alias_hash] = new_index
|
||||
|
||||
if previous_alias_nr + 1 != self.get_size_aliases():
|
||||
raise RuntimeError(Errors.E891.format(alias=alias))
|
||||
return alias_hash
|
||||
|
||||
def append_alias(self, unicode alias, unicode entity, float prior_prob, ignore_warnings=False):
|
||||
|
|
23
spacy/tests/regression/test_issue6730.py
Normal file
23
spacy/tests/regression/test_issue6730.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
import pytest
|
||||
from ..util import make_tempdir
|
||||
|
||||
|
||||
def test_issue6730(en_vocab):
|
||||
"""Ensure that the KB does not accept empty strings, but otherwise IO works fine."""
|
||||
from spacy.kb import KnowledgeBase
|
||||
|
||||
kb = KnowledgeBase(en_vocab, entity_vector_length=3)
|
||||
kb.add_entity(entity="1", freq=148, entity_vector=[1, 2, 3])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
kb.add_alias(alias="", entities=["1"], probabilities=[0.4])
|
||||
assert kb.contains_alias("") is False
|
||||
|
||||
kb.add_alias(alias="x", entities=["1"], probabilities=[0.2])
|
||||
kb.add_alias(alias="y", entities=["1"], probabilities=[0.1])
|
||||
|
||||
with make_tempdir() as tmp_dir:
|
||||
kb.to_disk(tmp_dir)
|
||||
kb.from_disk(tmp_dir)
|
||||
assert kb.get_size_aliases() == 2
|
||||
assert set(kb.get_alias_strings()) == {"x", "y"}
|
|
@ -82,7 +82,7 @@ Add an alias or mention to the knowledge base, specifying its potential KB
|
|||
identifiers and their prior probabilities. The entity identifiers should refer
|
||||
to entities previously added with [`add_entity`](/api/kb#add_entity) or
|
||||
[`set_entities`](/api/kb#set_entities). The sum of the prior probabilities
|
||||
should not exceed 1.
|
||||
should not exceed 1. Note that an empty string can not be used as alias.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
|
@ -92,7 +92,7 @@ should not exceed 1.
|
|||
|
||||
| Name | Description |
|
||||
| --------------- | --------------------------------------------------------------------------------- |
|
||||
| `alias` | The textual mention or alias. ~~str~~ |
|
||||
| `alias` | The textual mention or alias. Can not be the empty string. ~~str~~ |
|
||||
| `entities` | The potential entities that the alias may refer to. ~~Iterable[Union[str, int]]~~ |
|
||||
| `probabilities` | The prior probabilities of each entity. ~~Iterable[float]~~ |
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user