mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	avoid empty aliases and improve UX and docs (#6840)
This commit is contained in:
		
							parent
							
								
									837a4f53c2
								
							
						
					
					
						commit
						24a697abb8
					
				| 
						 | 
					@ -470,6 +470,10 @@ class Errors:
 | 
				
			||||||
            "issue tracker: http://github.com/explosion/spaCy/issues")
 | 
					            "issue tracker: http://github.com/explosion/spaCy/issues")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # TODO: fix numbering after merging develop into master
 | 
					    # TODO: fix numbering after merging develop into master
 | 
				
			||||||
 | 
					    E890 = ("Can not add the alias '{alias}' to the Knowledge base. "
 | 
				
			||||||
 | 
					            "Each alias should be a meaningful string.")
 | 
				
			||||||
 | 
					    E891 = ("Alias '{alias}' could not be added to the Knowledge base. "
 | 
				
			||||||
 | 
					            "This is likely a bug in spaCy.")
 | 
				
			||||||
    E892 = ("Unknown function registry: '{name}'.\n\nAvailable names: {available}")
 | 
					    E892 = ("Unknown function registry: '{name}'.\n\nAvailable names: {available}")
 | 
				
			||||||
    E893 = ("Could not find function '{name}' in function registry '{reg_name}'. "
 | 
					    E893 = ("Could not find function '{name}' in function registry '{reg_name}'. "
 | 
				
			||||||
            "If you're using a custom function, make sure the code is available. "
 | 
					            "If you're using a custom function, make sure the code is available. "
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -187,6 +187,10 @@ cdef class KnowledgeBase:
 | 
				
			||||||
        For a given alias, add its potential entities and prior probabilies to the KB.
 | 
					        For a given alias, add its potential entities and prior probabilies to the KB.
 | 
				
			||||||
        Return the alias_hash at the end
 | 
					        Return the alias_hash at the end
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
 | 
					        if alias is None or len(alias) == 0:
 | 
				
			||||||
 | 
					            raise ValueError(Errors.E890.format(alias=alias))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        previous_alias_nr = self.get_size_aliases()
 | 
				
			||||||
        # Throw an error if the length of entities and probabilities are not the same
 | 
					        # Throw an error if the length of entities and probabilities are not the same
 | 
				
			||||||
        if not len(entities) == len(probabilities):
 | 
					        if not len(entities) == len(probabilities):
 | 
				
			||||||
            raise ValueError(Errors.E132.format(alias=alias,
 | 
					            raise ValueError(Errors.E132.format(alias=alias,
 | 
				
			||||||
| 
						 | 
					@ -220,6 +224,8 @@ cdef class KnowledgeBase:
 | 
				
			||||||
        new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
 | 
					        new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
 | 
				
			||||||
        self._alias_index[alias_hash] = new_index
 | 
					        self._alias_index[alias_hash] = new_index
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if previous_alias_nr + 1 != self.get_size_aliases():
 | 
				
			||||||
 | 
					            raise RuntimeError(Errors.E891.format(alias=alias))
 | 
				
			||||||
        return alias_hash
 | 
					        return alias_hash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def append_alias(self, unicode alias, unicode entity, float prior_prob, ignore_warnings=False):
 | 
					    def append_alias(self, unicode alias, unicode entity, float prior_prob, ignore_warnings=False):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										23
									
								
								spacy/tests/regression/test_issue6730.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								spacy/tests/regression/test_issue6730.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,23 @@
 | 
				
			||||||
 | 
					import pytest
 | 
				
			||||||
 | 
					from ..util import make_tempdir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_issue6730(en_vocab):
 | 
				
			||||||
 | 
					    """Ensure that the KB does not accept empty strings, but otherwise IO works fine."""
 | 
				
			||||||
 | 
					    from spacy.kb import KnowledgeBase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    kb = KnowledgeBase(en_vocab, entity_vector_length=3)
 | 
				
			||||||
 | 
					    kb.add_entity(entity="1", freq=148, entity_vector=[1, 2, 3])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with pytest.raises(ValueError):
 | 
				
			||||||
 | 
					        kb.add_alias(alias="", entities=["1"], probabilities=[0.4])
 | 
				
			||||||
 | 
					    assert kb.contains_alias("") is False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    kb.add_alias(alias="x", entities=["1"], probabilities=[0.2])
 | 
				
			||||||
 | 
					    kb.add_alias(alias="y", entities=["1"], probabilities=[0.1])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with make_tempdir() as tmp_dir:
 | 
				
			||||||
 | 
					        kb.to_disk(tmp_dir)
 | 
				
			||||||
 | 
					        kb.from_disk(tmp_dir)
 | 
				
			||||||
 | 
					    assert kb.get_size_aliases() == 2
 | 
				
			||||||
 | 
					    assert set(kb.get_alias_strings()) == {"x", "y"}
 | 
				
			||||||
| 
						 | 
					@ -82,7 +82,7 @@ Add an alias or mention to the knowledge base, specifying its potential KB
 | 
				
			||||||
identifiers and their prior probabilities. The entity identifiers should refer
 | 
					identifiers and their prior probabilities. The entity identifiers should refer
 | 
				
			||||||
to entities previously added with [`add_entity`](/api/kb#add_entity) or
 | 
					to entities previously added with [`add_entity`](/api/kb#add_entity) or
 | 
				
			||||||
[`set_entities`](/api/kb#set_entities). The sum of the prior probabilities
 | 
					[`set_entities`](/api/kb#set_entities). The sum of the prior probabilities
 | 
				
			||||||
should not exceed 1.
 | 
					should not exceed 1. Note that an empty string can not be used as alias.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
> #### Example
 | 
					> #### Example
 | 
				
			||||||
>
 | 
					>
 | 
				
			||||||
| 
						 | 
					@ -92,7 +92,7 @@ should not exceed 1.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| Name            | Description                                                                       |
 | 
					| Name            | Description                                                                       |
 | 
				
			||||||
| --------------- | --------------------------------------------------------------------------------- |
 | 
					| --------------- | --------------------------------------------------------------------------------- |
 | 
				
			||||||
| `alias`         | The textual mention or alias. ~~str~~                                             |
 | 
					| `alias`         | The textual mention or alias. Can not be the empty string. ~~str~~                |
 | 
				
			||||||
| `entities`      | The potential entities that the alias may refer to. ~~Iterable[Union[str, int]]~~ |
 | 
					| `entities`      | The potential entities that the alias may refer to. ~~Iterable[Union[str, int]]~~ |
 | 
				
			||||||
| `probabilities` | The prior probabilities of each entity. ~~Iterable[float]~~                       |
 | 
					| `probabilities` | The prior probabilities of each entity. ~~Iterable[float]~~                       |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user