mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
attempt to fix _guess_spaces
This commit is contained in:
parent
5b1d15e247
commit
5b6ed05752
|
@ -73,7 +73,7 @@ cdef class Example:
|
||||||
tok_dict["ORTH"] = [tok.text for tok in predicted]
|
tok_dict["ORTH"] = [tok.text for tok in predicted]
|
||||||
tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
|
tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
|
||||||
if not _has_field(tok_dict, "SPACY"):
|
if not _has_field(tok_dict, "SPACY"):
|
||||||
spaces = _guess_spaces(predicted.text, tok_dict["ORTH"])
|
tok_dict["SPACY"] = _guess_spaces(predicted.text, tok_dict["ORTH"])
|
||||||
return Example(
|
return Example(
|
||||||
predicted,
|
predicted,
|
||||||
annotations2doc(predicted.vocab, tok_dict, doc_dict)
|
annotations2doc(predicted.vocab, tok_dict, doc_dict)
|
||||||
|
@ -333,8 +333,6 @@ def _fix_legacy_dict_data(example_dict):
|
||||||
else:
|
else:
|
||||||
raise KeyError(Errors.E983.format(key=key, dict="token_annotation", keys=remapping.keys()))
|
raise KeyError(Errors.E983.format(key=key, dict="token_annotation", keys=remapping.keys()))
|
||||||
text = example_dict.get("text", example_dict.get("raw"))
|
text = example_dict.get("text", example_dict.get("raw"))
|
||||||
if not _has_field(token_dict, "SPACY"):
|
|
||||||
token_dict["SPACY"] = _guess_spaces(text, token_dict["ORTH"])
|
|
||||||
if "HEAD" in token_dict and "SENT_START" in token_dict:
|
if "HEAD" in token_dict and "SENT_START" in token_dict:
|
||||||
# If heads are set, we don't also redundantly specify SENT_START.
|
# If heads are set, we don't also redundantly specify SENT_START.
|
||||||
token_dict.pop("SENT_START")
|
token_dict.pop("SENT_START")
|
||||||
|
|
|
@ -161,6 +161,7 @@ def test_example_from_dict_no_ner(en_vocab):
|
||||||
ner_tags = example.get_aligned_ner()
|
ner_tags = example.get_aligned_ner()
|
||||||
assert ner_tags == [None, None, None, None]
|
assert ner_tags == [None, None, None, None]
|
||||||
|
|
||||||
|
|
||||||
def test_example_from_dict_some_ner(en_vocab):
|
def test_example_from_dict_some_ner(en_vocab):
|
||||||
words = ["a", "b", "c", "d"]
|
words = ["a", "b", "c", "d"]
|
||||||
spaces = [True, True, False, True]
|
spaces = [True, True, False, True]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user