mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Work on test for #615
This commit is contained in:
parent
1b77932ba5
commit
e01c1875ee
36
spacy/tests/regression/test_issue615.py
Normal file
36
spacy/tests/regression/test_issue615.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
from __future__ import unicode_literals
|
||||
import spacy
|
||||
from spacy.attrs import ORTH
|
||||
|
||||
|
||||
def merge_phrases(matcher, doc, i, matches):
|
||||
'''
|
||||
Merge a phrase. We have to be careful here because we'll change the token indices.
|
||||
To avoid problems, merge all the phrases once we're called on the last match.
|
||||
'''
|
||||
if i != len(matches)-1:
|
||||
return None
|
||||
# Get Span objects
|
||||
spans = [(ent_id, label, doc[start : end]) for ent_id, label, start, end in matches]
|
||||
for ent_id, label, span in spans:
|
||||
span.merge('NNP' if label else span.root.tag_, span.text, doc.vocab.strings[label])
|
||||
|
||||
def test_entity_ID_assignment():
|
||||
nlp = spacy.en.English()
|
||||
text = u"""The golf club is broken"""
|
||||
doc = nlp(text)
|
||||
|
||||
golf_pattern = [
|
||||
{ ORTH: "golf"},
|
||||
{ ORTH: "club"}
|
||||
]
|
||||
|
||||
matcher = spacy.matcher.Matcher(nlp.vocab)
|
||||
matcher.add_entity('Sport_Equipment', on_match = merge_phrases)
|
||||
matcher.add_pattern("Sport_Equipment", golf_pattern, label = 'Sport_Equipment')
|
||||
|
||||
match = matcher(doc)
|
||||
entities = list(doc.ents)
|
||||
|
||||
assert entities != [] #assertion 1
|
||||
assert entities[0].label != 0 #assertion 2
|
Loading…
Reference in New Issue
Block a user