fix spaces

2025-07-04 03:43:09 +03:00 · 2020-06-16 12:08:25 +02:00 · 2020-06-16 12:08:25 +02:00 · 1c35b8efcd
commit 1c35b8efcd
parent 6fea5fa4bd
2 changed files with 8 additions and 8 deletions
--- a/spacy/gold/example.pyx
+++ b/spacy/gold/example.pyx
@ -55,8 +55,9 @@ cdef class Example:
        tok_dict, doc_dict = _parse_example_dict_data(example_dict)
        if "ORTH" not in tok_dict:
            tok_dict["ORTH"] = [tok.text for tok in predicted]
        if "SPACY" not in tok_dict:
            tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
        if "SPACY" not in tok_dict:
            tok_dict["SPACY"] = None
        return Example(
            predicted,
            annotations2doc(predicted.vocab, tok_dict, doc_dict)
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@ -166,7 +166,7 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
    spaces = [True, True, True, True, True, False, False]
    doc = Doc(en_vocab, words=words, spaces=spaces)
    entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")]
-    gold_words =["I", "flew to", "San Francisco Valley", "."]
+    gold_words = ["I", "flew to", "San Francisco Valley", "."]
    example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
    assert example.get_aligned("ENT_IOB") == [2, 2, 2, 3, 1, 1, 2]
    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "LOC", "LOC", "LOC", ""]
@ -188,12 +188,11 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
    )
    doc = Doc(en_vocab, words=words, spaces=spaces)
    entities = [(len("I flew  to "), len("I flew  to San Francisco Valley"), "LOC")]
-    gp = GoldParse(
+    gold_words = ["I", "flew", " ", "to", "San Francisco Valley", "."]
-        doc,
+    gold_spaces = [True, True, False, True, False, False]
-        words=["I", "flew", " ", "to", "San Francisco Valley", "."],
+    example = Example.from_dict(doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities})
-        entities=entities,
+    assert example.get_aligned("ENT_IOB") == [2, 2, 2, 2, 3, 1, 2]
-    )
+    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "", "LOC", "LOC", ""]
    assert gp.ner == ["O", "O", "O", "O", "B-LOC", "L-LOC", "O"]
    # from issue #4791
    data = (