mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-11 23:05:50 +03:00
fix spaces
This commit is contained in:
parent
6fea5fa4bd
commit
1c35b8efcd
|
@ -55,8 +55,9 @@ cdef class Example:
|
|||
tok_dict, doc_dict = _parse_example_dict_data(example_dict)
|
||||
if "ORTH" not in tok_dict:
|
||||
tok_dict["ORTH"] = [tok.text for tok in predicted]
|
||||
if "SPACY" not in tok_dict:
|
||||
tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
|
||||
if "SPACY" not in tok_dict:
|
||||
tok_dict["SPACY"] = None
|
||||
return Example(
|
||||
predicted,
|
||||
annotations2doc(predicted.vocab, tok_dict, doc_dict)
|
||||
|
|
|
@ -166,7 +166,7 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
|
|||
spaces = [True, True, True, True, True, False, False]
|
||||
doc = Doc(en_vocab, words=words, spaces=spaces)
|
||||
entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")]
|
||||
gold_words =["I", "flew to", "San Francisco Valley", "."]
|
||||
gold_words = ["I", "flew to", "San Francisco Valley", "."]
|
||||
example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
|
||||
assert example.get_aligned("ENT_IOB") == [2, 2, 2, 3, 1, 1, 2]
|
||||
assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "LOC", "LOC", "LOC", ""]
|
||||
|
@ -188,12 +188,11 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
|
|||
)
|
||||
doc = Doc(en_vocab, words=words, spaces=spaces)
|
||||
entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")]
|
||||
gp = GoldParse(
|
||||
doc,
|
||||
words=["I", "flew", " ", "to", "San Francisco Valley", "."],
|
||||
entities=entities,
|
||||
)
|
||||
assert gp.ner == ["O", "O", "O", "O", "B-LOC", "L-LOC", "O"]
|
||||
gold_words = ["I", "flew", " ", "to", "San Francisco Valley", "."]
|
||||
gold_spaces = [True, True, False, True, False, False]
|
||||
example = Example.from_dict(doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities})
|
||||
assert example.get_aligned("ENT_IOB") == [2, 2, 2, 2, 3, 1, 2]
|
||||
assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "", "LOC", "LOC", ""]
|
||||
|
||||
# from issue #4791
|
||||
data = (
|
||||
|
|
Loading…
Reference in New Issue
Block a user