Auto-format

This commit is contained in:
Ines Montani 2019-02-17 12:22:07 +01:00
parent 7d4a52a4d0
commit 1e252b129c
2 changed files with 22 additions and 11 deletions

View File

@ -325,7 +325,7 @@ class Errors(object):
"in previous versions, but had significant bugs that led to poor " "in previous versions, but had significant bugs that led to poor "
"performance") "performance")
E117 = ("The newly split tokens must match the text of the original token. " E117 = ("The newly split tokens must match the text of the original token. "
"New orths: {new}. Old text: {old}.") "New orths: {new}. Old text: {old}.")
@add_codes @add_codes

View File

@ -22,9 +22,9 @@ def test_doc_split(en_vocab):
["Los", "Angeles"], ["Los", "Angeles"],
[(doc[0], 1), doc[1]], [(doc[0], 1), doc[1]],
attrs={ attrs={
"tag": ["NNP"]*2, "tag": ["NNP"] * 2,
"lemma": ["Los", "Angeles"], "lemma": ["Los", "Angeles"],
"ent_type": ["GPE"]*2 "ent_type": ["GPE"] * 2,
}, },
) )
assert len(doc) == 4 assert len(doc) == 4
@ -46,8 +46,12 @@ def test_split_dependencies(en_vocab):
dep1 = doc.vocab.strings.add("amod") dep1 = doc.vocab.strings.add("amod")
dep2 = doc.vocab.strings.add("subject") dep2 = doc.vocab.strings.add("subject")
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
retokenizer.split(doc[0], ["Los", "Angeles"], retokenizer.split(
[(doc[0], 1), doc[1]], attrs={'dep': [dep1, dep2]}) doc[0],
["Los", "Angeles"],
[(doc[0], 1), doc[1]],
attrs={"dep": [dep1, dep2]},
)
assert doc[0].dep == dep1 assert doc[0].dep == dep1
assert doc[1].dep == dep2 assert doc[1].dep == dep2
@ -73,8 +77,7 @@ def test_spans_entity_merge_iob():
assert doc[0].ent_iob_ == "B" assert doc[0].ent_iob_ == "B"
assert doc[1].ent_iob_ == "I" assert doc[1].ent_iob_ == "I"
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
retokenizer.split(doc[0], ["a", "b", "c"], retokenizer.split(doc[0], ["a", "b", "c"], [(doc[0], 1), (doc[0], 2), doc[1]])
[(doc[0], 1), (doc[0], 2), doc[1]])
assert doc[0].ent_iob_ == "B" assert doc[0].ent_iob_ == "B"
assert doc[1].ent_iob_ == "I" assert doc[1].ent_iob_ == "I"
assert doc[2].ent_iob_ == "I" assert doc[2].ent_iob_ == "I"
@ -94,10 +97,18 @@ def test_spans_sentence_update_after_merge(en_vocab):
init_len = len(sent1) init_len = len(sent1)
init_len2 = len(sent2) init_len2 = len(sent2)
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
retokenizer.split(doc[0], ["Stewart", "Lee"], [(doc[0], 1), doc[1]], retokenizer.split(
attrs={"dep": ["compound", "nsubj"]}) doc[0],
retokenizer.split(doc[13], ["Joe", "Pasquale"], [(doc[13], 1), doc[12]], ["Stewart", "Lee"],
attrs={"dep": ["compound", "dobj"]}) [(doc[0], 1), doc[1]],
attrs={"dep": ["compound", "nsubj"]},
)
retokenizer.split(
doc[13],
["Joe", "Pasquale"],
[(doc[13], 1), doc[12]],
attrs={"dep": ["compound", "dobj"]},
)
sent1, sent2 = list(doc.sents) sent1, sent2 = list(doc.sents)
assert len(sent1) == init_len + 1 assert len(sent1) == init_len + 1
assert len(sent2) == init_len2 + 1 assert len(sent2) == init_len2 + 1