From 1d672e0c12daecbff34b58dcafe237c66fd4d92e Mon Sep 17 00:00:00 2001 From: Matthw Honnibal Date: Fri, 26 Jun 2020 23:42:41 +0200 Subject: [PATCH] Revert "attempt to fix _guess_spaces" This reverts commit 5b6ed0575275e86762cc58dab7b01b7fb2a97b63. --- spacy/gold/example.pyx | 4 +++- spacy/tests/test_gold.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index febbf50fc..7f8797043 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -73,7 +73,7 @@ cdef class Example: tok_dict["ORTH"] = [tok.text for tok in predicted] tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted] if not _has_field(tok_dict, "SPACY"): - tok_dict["SPACY"] = _guess_spaces(predicted.text, tok_dict["ORTH"]) + spaces = _guess_spaces(predicted.text, tok_dict["ORTH"]) return Example( predicted, annotations2doc(predicted.vocab, tok_dict, doc_dict) @@ -336,6 +336,8 @@ def _fix_legacy_dict_data(example_dict): else: raise KeyError(Errors.E983.format(key=key, dict="token_annotation", keys=remapping.keys())) text = example_dict.get("text", example_dict.get("raw")) + if not _has_field(token_dict, "SPACY"): + token_dict["SPACY"] = _guess_spaces(text, token_dict["ORTH"]) if "HEAD" in token_dict and "SENT_START" in token_dict: # If heads are set, we don't also redundantly specify SENT_START. token_dict.pop("SENT_START") diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py index a7c476688..17f0933d1 100644 --- a/spacy/tests/test_gold.py +++ b/spacy/tests/test_gold.py @@ -161,7 +161,6 @@ def test_example_from_dict_no_ner(en_vocab): ner_tags = example.get_aligned_ner() assert ner_tags == [None, None, None, None] - def test_example_from_dict_some_ner(en_vocab): words = ["a", "b", "c", "d"] spaces = [True, True, False, True]