mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Auto-format
This commit is contained in:
parent
7d4a52a4d0
commit
1e252b129c
|
@ -325,7 +325,7 @@ class Errors(object):
|
||||||
"in previous versions, but had significant bugs that led to poor "
|
"in previous versions, but had significant bugs that led to poor "
|
||||||
"performance")
|
"performance")
|
||||||
E117 = ("The newly split tokens must match the text of the original token. "
|
E117 = ("The newly split tokens must match the text of the original token. "
|
||||||
"New orths: {new}. Old text: {old}.")
|
"New orths: {new}. Old text: {old}.")
|
||||||
|
|
||||||
|
|
||||||
@add_codes
|
@add_codes
|
||||||
|
|
|
@ -22,9 +22,9 @@ def test_doc_split(en_vocab):
|
||||||
["Los", "Angeles"],
|
["Los", "Angeles"],
|
||||||
[(doc[0], 1), doc[1]],
|
[(doc[0], 1), doc[1]],
|
||||||
attrs={
|
attrs={
|
||||||
"tag": ["NNP"]*2,
|
"tag": ["NNP"] * 2,
|
||||||
"lemma": ["Los", "Angeles"],
|
"lemma": ["Los", "Angeles"],
|
||||||
"ent_type": ["GPE"]*2
|
"ent_type": ["GPE"] * 2,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
assert len(doc) == 4
|
assert len(doc) == 4
|
||||||
|
@ -46,8 +46,12 @@ def test_split_dependencies(en_vocab):
|
||||||
dep1 = doc.vocab.strings.add("amod")
|
dep1 = doc.vocab.strings.add("amod")
|
||||||
dep2 = doc.vocab.strings.add("subject")
|
dep2 = doc.vocab.strings.add("subject")
|
||||||
with doc.retokenize() as retokenizer:
|
with doc.retokenize() as retokenizer:
|
||||||
retokenizer.split(doc[0], ["Los", "Angeles"],
|
retokenizer.split(
|
||||||
[(doc[0], 1), doc[1]], attrs={'dep': [dep1, dep2]})
|
doc[0],
|
||||||
|
["Los", "Angeles"],
|
||||||
|
[(doc[0], 1), doc[1]],
|
||||||
|
attrs={"dep": [dep1, dep2]},
|
||||||
|
)
|
||||||
assert doc[0].dep == dep1
|
assert doc[0].dep == dep1
|
||||||
assert doc[1].dep == dep2
|
assert doc[1].dep == dep2
|
||||||
|
|
||||||
|
@ -73,8 +77,7 @@ def test_spans_entity_merge_iob():
|
||||||
assert doc[0].ent_iob_ == "B"
|
assert doc[0].ent_iob_ == "B"
|
||||||
assert doc[1].ent_iob_ == "I"
|
assert doc[1].ent_iob_ == "I"
|
||||||
with doc.retokenize() as retokenizer:
|
with doc.retokenize() as retokenizer:
|
||||||
retokenizer.split(doc[0], ["a", "b", "c"],
|
retokenizer.split(doc[0], ["a", "b", "c"], [(doc[0], 1), (doc[0], 2), doc[1]])
|
||||||
[(doc[0], 1), (doc[0], 2), doc[1]])
|
|
||||||
assert doc[0].ent_iob_ == "B"
|
assert doc[0].ent_iob_ == "B"
|
||||||
assert doc[1].ent_iob_ == "I"
|
assert doc[1].ent_iob_ == "I"
|
||||||
assert doc[2].ent_iob_ == "I"
|
assert doc[2].ent_iob_ == "I"
|
||||||
|
@ -94,10 +97,18 @@ def test_spans_sentence_update_after_merge(en_vocab):
|
||||||
init_len = len(sent1)
|
init_len = len(sent1)
|
||||||
init_len2 = len(sent2)
|
init_len2 = len(sent2)
|
||||||
with doc.retokenize() as retokenizer:
|
with doc.retokenize() as retokenizer:
|
||||||
retokenizer.split(doc[0], ["Stewart", "Lee"], [(doc[0], 1), doc[1]],
|
retokenizer.split(
|
||||||
attrs={"dep": ["compound", "nsubj"]})
|
doc[0],
|
||||||
retokenizer.split(doc[13], ["Joe", "Pasquale"], [(doc[13], 1), doc[12]],
|
["Stewart", "Lee"],
|
||||||
attrs={"dep": ["compound", "dobj"]})
|
[(doc[0], 1), doc[1]],
|
||||||
|
attrs={"dep": ["compound", "nsubj"]},
|
||||||
|
)
|
||||||
|
retokenizer.split(
|
||||||
|
doc[13],
|
||||||
|
["Joe", "Pasquale"],
|
||||||
|
[(doc[13], 1), doc[12]],
|
||||||
|
attrs={"dep": ["compound", "dobj"]},
|
||||||
|
)
|
||||||
sent1, sent2 = list(doc.sents)
|
sent1, sent2 = list(doc.sents)
|
||||||
assert len(sent1) == init_len + 1
|
assert len(sent1) == init_len + 1
|
||||||
assert len(sent2) == init_len2 + 1
|
assert len(sent2) == init_len2 + 1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user