Revert "attempt to fix _guess_spaces"

This reverts commit 5b6ed05752.
This commit is contained in:
Matthw Honnibal 2020-06-26 23:42:41 +02:00
parent 8c29268749
commit 1d672e0c12
2 changed files with 3 additions and 2 deletions

View File

@ -73,7 +73,7 @@ cdef class Example:
tok_dict["ORTH"] = [tok.text for tok in predicted]
tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
if not _has_field(tok_dict, "SPACY"):
tok_dict["SPACY"] = _guess_spaces(predicted.text, tok_dict["ORTH"])
spaces = _guess_spaces(predicted.text, tok_dict["ORTH"])
return Example(
predicted,
annotations2doc(predicted.vocab, tok_dict, doc_dict)
@ -336,6 +336,8 @@ def _fix_legacy_dict_data(example_dict):
else:
raise KeyError(Errors.E983.format(key=key, dict="token_annotation", keys=remapping.keys()))
text = example_dict.get("text", example_dict.get("raw"))
if not _has_field(token_dict, "SPACY"):
token_dict["SPACY"] = _guess_spaces(text, token_dict["ORTH"])
if "HEAD" in token_dict and "SENT_START" in token_dict:
# If heads are set, we don't also redundantly specify SENT_START.
token_dict.pop("SENT_START")

View File

@ -161,7 +161,6 @@ def test_example_from_dict_no_ner(en_vocab):
ner_tags = example.get_aligned_ner()
assert ner_tags == [None, None, None, None]
def test_example_from_dict_some_ner(en_vocab):
words = ["a", "b", "c", "d"]
spaces = [True, True, False, True]