mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
Add ent_id_ to strings serialized with Doc (#6353)
This commit is contained in:
parent
d490428089
commit
320a8b1481
|
@ -109,6 +109,7 @@ def test_doc_api_serialize(en_tokenizer, text):
|
|||
tokens[0].lemma_ = "lemma"
|
||||
tokens[0].norm_ = "norm"
|
||||
tokens[0].ent_kb_id_ = "ent_kb_id"
|
||||
tokens[0].ent_id_ = "ent_id"
|
||||
new_tokens = Doc(tokens.vocab).from_bytes(tokens.to_bytes())
|
||||
assert tokens.text == new_tokens.text
|
||||
assert [t.text for t in tokens] == [t.text for t in new_tokens]
|
||||
|
@ -116,6 +117,7 @@ def test_doc_api_serialize(en_tokenizer, text):
|
|||
assert new_tokens[0].lemma_ == "lemma"
|
||||
assert new_tokens[0].norm_ == "norm"
|
||||
assert new_tokens[0].ent_kb_id_ == "ent_kb_id"
|
||||
assert new_tokens[0].ent_id_ == "ent_id"
|
||||
|
||||
new_tokens = Doc(tokens.vocab).from_bytes(
|
||||
tokens.to_bytes(exclude=["tensor"]), exclude=["tensor"]
|
||||
|
|
|
@ -166,3 +166,22 @@ def test_entity_ruler_overlapping_spans(nlp):
|
|||
doc = ruler(nlp.make_doc("foo bar baz"))
|
||||
assert len(doc.ents) == 1
|
||||
assert doc.ents[0].label_ == "FOOBAR"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_process", [1, 2])
|
||||
def test_entity_ruler_multiprocessing(nlp, n_process):
|
||||
ruler = EntityRuler(nlp)
|
||||
texts = [
|
||||
"I enjoy eating Pizza Hut pizza."
|
||||
]
|
||||
|
||||
patterns = [
|
||||
{"label": "FASTFOOD", "pattern": "Pizza Hut", "id": "1234"}
|
||||
]
|
||||
|
||||
ruler.add_patterns(patterns)
|
||||
nlp.add_pipe(ruler)
|
||||
|
||||
for doc in nlp.pipe(texts, n_process=2):
|
||||
for ent in doc.ents:
|
||||
assert ent.ent_id_ == "1234"
|
||||
|
|
|
@ -933,6 +933,7 @@ cdef class Doc:
|
|||
strings.add(token.dep_)
|
||||
strings.add(token.ent_type_)
|
||||
strings.add(token.ent_kb_id_)
|
||||
strings.add(token.ent_id_)
|
||||
strings.add(token.norm_)
|
||||
# Msgpack doesn't distinguish between lists and tuples, which is
|
||||
# vexing for user data. As a best guess, we *know* that within
|
||||
|
|
Loading…
Reference in New Issue
Block a user