mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Fix .merge tests to not use deprecated API
This commit is contained in:
parent
705a4e3e4a
commit
5914faafbb
|
@ -127,7 +127,8 @@ def test_doc_api_merge(en_tokenizer):
|
||||||
# merge 'The Beach Boys'
|
# merge 'The Beach Boys'
|
||||||
doc = en_tokenizer(text)
|
doc = en_tokenizer(text)
|
||||||
assert len(doc) == 9
|
assert len(doc) == 9
|
||||||
doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE')
|
doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA',
|
||||||
|
ent_type='TYPE')
|
||||||
assert len(doc) == 7
|
assert len(doc) == 7
|
||||||
assert doc[4].text == 'the beach boys'
|
assert doc[4].text == 'the beach boys'
|
||||||
assert doc[4].text_with_ws == 'the beach boys '
|
assert doc[4].text_with_ws == 'the beach boys '
|
||||||
|
@ -136,7 +137,8 @@ def test_doc_api_merge(en_tokenizer):
|
||||||
# merge 'all night'
|
# merge 'all night'
|
||||||
doc = en_tokenizer(text)
|
doc = en_tokenizer(text)
|
||||||
assert len(doc) == 9
|
assert len(doc) == 9
|
||||||
doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), 'NAMED', 'LEMMA', 'TYPE')
|
doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), tag='NAMED', lemma='LEMMA',
|
||||||
|
ent_type='TYPE')
|
||||||
assert len(doc) == 8
|
assert len(doc) == 8
|
||||||
assert doc[7].text == 'all night'
|
assert doc[7].text == 'all night'
|
||||||
assert doc[7].text_with_ws == 'all night'
|
assert doc[7].text_with_ws == 'all night'
|
||||||
|
@ -147,7 +149,8 @@ def test_doc_api_merge_children(en_tokenizer):
|
||||||
text = "WKRO played songs by the beach boys all night"
|
text = "WKRO played songs by the beach boys all night"
|
||||||
doc = en_tokenizer(text)
|
doc = en_tokenizer(text)
|
||||||
assert len(doc) == 9
|
assert len(doc) == 9
|
||||||
doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE')
|
doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA',
|
||||||
|
ent_type='TYPE')
|
||||||
|
|
||||||
for word in doc:
|
for word in doc:
|
||||||
if word.i < word.head.i:
|
if word.i < word.head.i:
|
||||||
|
@ -159,8 +162,8 @@ def test_doc_api_merge_children(en_tokenizer):
|
||||||
def test_doc_api_merge_hang(en_tokenizer):
|
def test_doc_api_merge_hang(en_tokenizer):
|
||||||
text = "through North and South Carolina"
|
text = "through North and South Carolina"
|
||||||
doc = en_tokenizer(text)
|
doc = en_tokenizer(text)
|
||||||
doc.merge(18, 32, '', '', 'ORG')
|
doc.merge(18, 32, tag='', lemma='', ent_type='ORG')
|
||||||
doc.merge(8, 32, '', '', 'ORG')
|
doc.merge(8, 32, tag='', lemma='', ent_type='ORG')
|
||||||
|
|
||||||
|
|
||||||
def test_doc_api_sents_empty_string(en_tokenizer):
|
def test_doc_api_sents_empty_string(en_tokenizer):
|
||||||
|
@ -188,7 +191,8 @@ def test_doc_api_runtime_error(en_tokenizer):
|
||||||
if len(np) > 1:
|
if len(np) > 1:
|
||||||
nps.append((np.start_char, np.end_char, np.root.tag_, np.text, np.root.ent_type_))
|
nps.append((np.start_char, np.end_char, np.root.tag_, np.text, np.root.ent_type_))
|
||||||
for np in nps:
|
for np in nps:
|
||||||
doc.merge(*np)
|
start, end, tag, lemma, ent_type = np
|
||||||
|
doc.merge(start, end, tag=tag, lemma=lemma, ent_type=ent_type)
|
||||||
|
|
||||||
|
|
||||||
def test_doc_api_right_edge(en_tokenizer):
|
def test_doc_api_right_edge(en_tokenizer):
|
||||||
|
|
|
@ -14,7 +14,7 @@ def test_spans_merge_tokens(en_tokenizer):
|
||||||
assert len(doc) == 4
|
assert len(doc) == 4
|
||||||
assert doc[0].head.text == 'Angeles'
|
assert doc[0].head.text == 'Angeles'
|
||||||
assert doc[1].head.text == 'start'
|
assert doc[1].head.text == 'start'
|
||||||
doc.merge(0, len('Los Angeles'), 'NNP', 'Los Angeles', 'GPE')
|
doc.merge(0, len('Los Angeles'), tag='NNP', lemma='Los Angeles', ent_type='GPE')
|
||||||
assert len(doc) == 3
|
assert len(doc) == 3
|
||||||
assert doc[0].text == 'Los Angeles'
|
assert doc[0].text == 'Los Angeles'
|
||||||
assert doc[0].head.text == 'start'
|
assert doc[0].head.text == 'start'
|
||||||
|
@ -36,7 +36,8 @@ def test_spans_merge_heads(en_tokenizer):
|
||||||
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
||||||
|
|
||||||
assert len(doc) == 8
|
assert len(doc) == 8
|
||||||
doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), doc[4].tag_, 'pilates class', 'O')
|
doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), tag=doc[4].tag_,
|
||||||
|
lemma='pilates class', ent_type='O')
|
||||||
assert len(doc) == 7
|
assert len(doc) == 7
|
||||||
assert doc[0].head.i == 1
|
assert doc[0].head.i == 1
|
||||||
assert doc[1].head.i == 1
|
assert doc[1].head.i == 1
|
||||||
|
@ -53,7 +54,8 @@ def test_span_np_merges(en_tokenizer):
|
||||||
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
||||||
|
|
||||||
assert doc[4].head.i == 1
|
assert doc[4].head.i == 1
|
||||||
doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), 'NP', 'tool', 'O')
|
doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), tag='NP', lemma='tool',
|
||||||
|
ent_type='O')
|
||||||
assert doc[2].head.i == 1
|
assert doc[2].head.i == 1
|
||||||
|
|
||||||
text = "displaCy is a lightweight and modern dependency parse tree visualization tool built with CSS3 and JavaScript."
|
text = "displaCy is a lightweight and modern dependency parse tree visualization tool built with CSS3 and JavaScript."
|
||||||
|
@ -63,7 +65,7 @@ def test_span_np_merges(en_tokenizer):
|
||||||
|
|
||||||
ents = [(e[0].idx, e[-1].idx + len(e[-1]), e.label_, e.lemma_) for e in doc.ents]
|
ents = [(e[0].idx, e[-1].idx + len(e[-1]), e.label_, e.lemma_) for e in doc.ents]
|
||||||
for start, end, label, lemma in ents:
|
for start, end, label, lemma in ents:
|
||||||
merged = doc.merge(start, end, label, lemma, label)
|
merged = doc.merge(start, end, tag=label, lemma=lemma, ent_type=label)
|
||||||
assert merged != None, (start, end, label, lemma)
|
assert merged != None, (start, end, label, lemma)
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,7 +90,7 @@ def test_spans_entity_merge(en_tokenizer):
|
||||||
assert len(doc) == 17
|
assert len(doc) == 17
|
||||||
for ent in doc.ents:
|
for ent in doc.ents:
|
||||||
label, lemma, type_ = (ent.root.tag_, ent.root.lemma_, max(w.ent_type_ for w in ent))
|
label, lemma, type_ = (ent.root.tag_, ent.root.lemma_, max(w.ent_type_ for w in ent))
|
||||||
ent.merge(label, lemma, type_)
|
ent.merge(label=label, lemma=lemma, ent_type=type_)
|
||||||
# check looping is ok
|
# check looping is ok
|
||||||
assert len(doc) == 15
|
assert len(doc) == 15
|
||||||
|
|
||||||
|
@ -105,8 +107,8 @@ def test_spans_sentence_update_after_merge(en_tokenizer):
|
||||||
sent1, sent2 = list(doc.sents)
|
sent1, sent2 = list(doc.sents)
|
||||||
init_len = len(sent1)
|
init_len = len(sent1)
|
||||||
init_len2 = len(sent2)
|
init_len2 = len(sent2)
|
||||||
doc[0:2].merge('none', 'none', 'none')
|
doc[0:2].merge(label='none', lemma='none', ent_type='none')
|
||||||
doc[-2:].merge('none', 'none', 'none')
|
doc[-2:].merge(label='none', lemma='none', ent_type='none')
|
||||||
assert len(sent1) == init_len - 1
|
assert len(sent1) == init_len - 1
|
||||||
assert len(sent2) == init_len2 - 1
|
assert len(sent2) == init_len2 - 1
|
||||||
|
|
||||||
|
@ -122,5 +124,5 @@ def test_spans_subtree_size_check(en_tokenizer):
|
||||||
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
|
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
|
||||||
sent1 = list(doc.sents)[0]
|
sent1 = list(doc.sents)[0]
|
||||||
init_len = len(list(sent1.root.subtree))
|
init_len = len(list(sent1.root.subtree))
|
||||||
doc[0:2].merge('none', 'none', 'none')
|
doc[0:2].merge(label='none', lemma='none', ent_type='none')
|
||||||
assert len(list(sent1.root.subtree)) == init_len - 1
|
assert len(list(sent1.root.subtree)) == init_len - 1
|
Loading…
Reference in New Issue
Block a user