mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-24 12:41:23 +03:00
attempt to fix _guess_spaces
This commit is contained in:
parent
5b1d15e247
commit
5b6ed05752
|
@ -73,7 +73,7 @@ cdef class Example:
|
|||
tok_dict["ORTH"] = [tok.text for tok in predicted]
|
||||
tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
|
||||
if not _has_field(tok_dict, "SPACY"):
|
||||
spaces = _guess_spaces(predicted.text, tok_dict["ORTH"])
|
||||
tok_dict["SPACY"] = _guess_spaces(predicted.text, tok_dict["ORTH"])
|
||||
return Example(
|
||||
predicted,
|
||||
annotations2doc(predicted.vocab, tok_dict, doc_dict)
|
||||
|
@ -333,8 +333,6 @@ def _fix_legacy_dict_data(example_dict):
|
|||
else:
|
||||
raise KeyError(Errors.E983.format(key=key, dict="token_annotation", keys=remapping.keys()))
|
||||
text = example_dict.get("text", example_dict.get("raw"))
|
||||
if not _has_field(token_dict, "SPACY"):
|
||||
token_dict["SPACY"] = _guess_spaces(text, token_dict["ORTH"])
|
||||
if "HEAD" in token_dict and "SENT_START" in token_dict:
|
||||
# If heads are set, we don't also redundantly specify SENT_START.
|
||||
token_dict.pop("SENT_START")
|
||||
|
|
|
@ -161,6 +161,7 @@ def test_example_from_dict_no_ner(en_vocab):
|
|||
ner_tags = example.get_aligned_ner()
|
||||
assert ner_tags == [None, None, None, None]
|
||||
|
||||
|
||||
def test_example_from_dict_some_ner(en_vocab):
|
||||
words = ["a", "b", "c", "d"]
|
||||
spaces = [True, True, False, True]
|
||||
|
|
Loading…
Reference in New Issue
Block a user