From 5914faafbb6ed4703f85e7f4a9a23e7ad7da383b Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 1 Nov 2017 16:49:11 +0100 Subject: [PATCH] Fix .merge tests to not use deprecated API --- spacy/tests/doc/test_doc_api.py | 16 ++++++++++------ spacy/tests/spans/test_merge.py | 20 +++++++++++--------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index 2c90572e3..e4d57cbb0 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -127,7 +127,8 @@ def test_doc_api_merge(en_tokenizer): # merge 'The Beach Boys' doc = en_tokenizer(text) assert len(doc) == 9 - doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE') + doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA', + ent_type='TYPE') assert len(doc) == 7 assert doc[4].text == 'the beach boys' assert doc[4].text_with_ws == 'the beach boys ' @@ -136,7 +137,8 @@ def test_doc_api_merge(en_tokenizer): # merge 'all night' doc = en_tokenizer(text) assert len(doc) == 9 - doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), 'NAMED', 'LEMMA', 'TYPE') + doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), tag='NAMED', lemma='LEMMA', + ent_type='TYPE') assert len(doc) == 8 assert doc[7].text == 'all night' assert doc[7].text_with_ws == 'all night' @@ -147,7 +149,8 @@ def test_doc_api_merge_children(en_tokenizer): text = "WKRO played songs by the beach boys all night" doc = en_tokenizer(text) assert len(doc) == 9 - doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE') + doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA', + ent_type='TYPE') for word in doc: if word.i < word.head.i: @@ -159,8 +162,8 @@ def test_doc_api_merge_children(en_tokenizer): def test_doc_api_merge_hang(en_tokenizer): text = "through North and South Carolina" doc = en_tokenizer(text) - doc.merge(18, 32, '', '', 'ORG') - doc.merge(8, 32, '', '', 'ORG') + doc.merge(18, 32, tag='', lemma='', ent_type='ORG') + doc.merge(8, 32, tag='', lemma='', ent_type='ORG') def test_doc_api_sents_empty_string(en_tokenizer): @@ -188,7 +191,8 @@ def test_doc_api_runtime_error(en_tokenizer): if len(np) > 1: nps.append((np.start_char, np.end_char, np.root.tag_, np.text, np.root.ent_type_)) for np in nps: - doc.merge(*np) + start, end, tag, lemma, ent_type = np + doc.merge(start, end, tag=tag, lemma=lemma, ent_type=ent_type) def test_doc_api_right_edge(en_tokenizer): diff --git a/spacy/tests/spans/test_merge.py b/spacy/tests/spans/test_merge.py index 29cc917fe..61f8ca50d 100644 --- a/spacy/tests/spans/test_merge.py +++ b/spacy/tests/spans/test_merge.py @@ -14,7 +14,7 @@ def test_spans_merge_tokens(en_tokenizer): assert len(doc) == 4 assert doc[0].head.text == 'Angeles' assert doc[1].head.text == 'start' - doc.merge(0, len('Los Angeles'), 'NNP', 'Los Angeles', 'GPE') + doc.merge(0, len('Los Angeles'), tag='NNP', lemma='Los Angeles', ent_type='GPE') assert len(doc) == 3 assert doc[0].text == 'Los Angeles' assert doc[0].head.text == 'start' @@ -36,7 +36,8 @@ def test_spans_merge_heads(en_tokenizer): doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) assert len(doc) == 8 - doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), doc[4].tag_, 'pilates class', 'O') + doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), tag=doc[4].tag_, + lemma='pilates class', ent_type='O') assert len(doc) == 7 assert doc[0].head.i == 1 assert doc[1].head.i == 1 @@ -53,7 +54,8 @@ def test_span_np_merges(en_tokenizer): doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) assert doc[4].head.i == 1 - doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), 'NP', 'tool', 'O') + doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), tag='NP', lemma='tool', + ent_type='O') assert doc[2].head.i == 1 text = "displaCy is a lightweight and modern dependency parse tree visualization tool built with CSS3 and JavaScript." @@ -63,7 +65,7 @@ def test_span_np_merges(en_tokenizer): ents = [(e[0].idx, e[-1].idx + len(e[-1]), e.label_, e.lemma_) for e in doc.ents] for start, end, label, lemma in ents: - merged = doc.merge(start, end, label, lemma, label) + merged = doc.merge(start, end, tag=label, lemma=lemma, ent_type=label) assert merged != None, (start, end, label, lemma) @@ -88,7 +90,7 @@ def test_spans_entity_merge(en_tokenizer): assert len(doc) == 17 for ent in doc.ents: label, lemma, type_ = (ent.root.tag_, ent.root.lemma_, max(w.ent_type_ for w in ent)) - ent.merge(label, lemma, type_) + ent.merge(label=label, lemma=lemma, ent_type=type_) # check looping is ok assert len(doc) == 15 @@ -105,8 +107,8 @@ def test_spans_sentence_update_after_merge(en_tokenizer): sent1, sent2 = list(doc.sents) init_len = len(sent1) init_len2 = len(sent2) - doc[0:2].merge('none', 'none', 'none') - doc[-2:].merge('none', 'none', 'none') + doc[0:2].merge(label='none', lemma='none', ent_type='none') + doc[-2:].merge(label='none', lemma='none', ent_type='none') assert len(sent1) == init_len - 1 assert len(sent2) == init_len2 - 1 @@ -122,5 +124,5 @@ def test_spans_subtree_size_check(en_tokenizer): doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps) sent1 = list(doc.sents)[0] init_len = len(list(sent1.root.subtree)) - doc[0:2].merge('none', 'none', 'none') - assert len(list(sent1.root.subtree)) == init_len - 1 \ No newline at end of file + doc[0:2].merge(label='none', lemma='none', ent_type='none') + assert len(list(sent1.root.subtree)) == init_len - 1