diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 1786dda87..2abff62f1 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -95,7 +95,7 @@ class EntityRuler(object): matches = set( [(m_id, start, end) for m_id, start, end in matches if start != end] ) - get_sort_key = lambda m: (m[2] - m[1], m[1]) + get_sort_key = lambda m: (m[2] - m[1], -m[1]) matches = sorted(matches, key=get_sort_key, reverse=True) entities = list(doc.ents) new_entities = [] diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py index b6e3c40c9..9e22c9cc7 100644 --- a/spacy/tests/pipeline/test_entity_ruler.py +++ b/spacy/tests/pipeline/test_entity_ruler.py @@ -154,3 +154,15 @@ def test_entity_ruler_properties(nlp, patterns): ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True) assert sorted(ruler.labels) == sorted(["HELLO", "BYE", "COMPLEX", "TECH_ORG"]) assert sorted(ruler.ent_ids) == ["a1", "a2"] + + +def test_entity_ruler_overlapping_spans(nlp): + ruler = EntityRuler(nlp) + patterns = [ + {"label": "FOOBAR", "pattern": "foo bar"}, + {"label": "BARBAZ", "pattern": "bar baz"}, + ] + ruler.add_patterns(patterns) + doc = ruler(nlp.make_doc("foo bar baz")) + assert len(doc.ents) == 1 + assert doc.ents[0].label_ == "FOOBAR"