Officially support DependencyMatcher

Add official support for the `DependencyMatcher`. Redesign the pattern specification. Fix and extend operator implementations. Update API docs and add usage docs. Patterns -------- Refactor pattern structure to: ``` { "LEFT_ID": str, "REL_OP": str, "RIGHT_ID": str, "RIGHT_ATTRS": dict, } ``` The first node contains only `RIGHT_ID` and `RIGHT_ATTRS` and all subsequent nodes contain all four keys. New operators ------------- Because of the way patterns are constructed from left to right, it's helpful to have `follows` operators along with `precedes` operators. Add operators for simple precedes / follows alongside immediate precedes / follows. * `.*`: precedes * `;`: immediately follows * `;*`: follows Operator fixes -------------- * `<` and `<<` do not include the node itself * Fix reversed order for all operators involving linear precedence (`.`, all sibling operators) * Linear precedence operators do not match nodes outside the same parse Additional fixes ---------------- * Use v3 Matcher API * Support `get` and `remove` * Support pickling
2025-10-30 07:27:28 +03:00 · 2020-08-31 20:04:26 +02:00 · 2020-08-31 20:04:26 +02:00 · 960d9cfadc
commit 960d9cfadc
parent 6bfb1b3a29
10 changed files with 1069 additions and 278 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -284,12 +284,12 @@ class Errors:
            "Span objects, or dicts if set to manual=True.")
    E097 = ("Invalid pattern: expected token pattern (list of dicts) or "
            "phrase pattern (string) but got:\n{pattern}")
-    E098 = ("Invalid pattern specified: expected both SPEC and PATTERN.")
+    E098 = ("Invalid pattern: expected both RIGHT_ID and RIGHT_ATTRS.")
-    E099 = ("First node of pattern should be a root node. The root should "
+    E099 = ("Invalid pattern: the first node of pattern should be an anchor "
-            "only contain NODE_NAME.")
+            "node. The node should only contain RIGHT_ID and RIGHT_ATTRS.")
-    E100 = ("Nodes apart from the root should contain NODE_NAME, NBOR_NAME and "
+    E100 = ("Nodes other than the anchor node should all contain LEFT_ID, "
-            "NBOR_RELOP.")
+            "REL_OP and RIGHT_ID.")
-    E101 = ("NODE_NAME should be a new node and NBOR_NAME should already have "
+    E101 = ("RIGHT_ID should be a new node and LEFT_ID should already have "
            "have been declared in previous edges.")
    E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
            "tokens to merge. If you want to find the longest non-overlapping "
@ -652,6 +652,9 @@ class Errors:
             "'{chunk}'. Tokenizer exceptions are only allowed to specify "
             "`ORTH` and `NORM`.")
    E1006 = ("Unable to initialize {name} model with 0 labels.")
    E1007 = ("Unsupported DependencyMatcher operator '{op}'.")
    E1008 = ("Invalid pattern: each pattern should be a list of dicts. Check "
             "that you are providing a list of patterns as `List[List[dict]]`.")
@add_codes
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@ -1,16 +1,16 @@
 # cython: infer_types=True, profile=True
-from cymem.cymem cimport Pool
+from typing import List
 from preshed.maps cimport PreshMap
 from libcpp cimport bool
 import numpy
 from cymem.cymem cimport Pool
 from .matcher cimport Matcher
 from ..vocab cimport Vocab
 from ..tokens.doc cimport Doc
 from .matcher import unpickle_matcher
 from ..errors import Errors
 from ..tokens import Span
 DELIMITER = "||"
@ -22,36 +22,52 @@ cdef class DependencyMatcher:
    """Match dependency parse tree based on pattern rules."""
    cdef Pool mem
    cdef readonly Vocab vocab
-    cdef readonly Matcher token_matcher
+    cdef readonly Matcher matcher
    cdef public object _patterns
    cdef public object _raw_patterns
    cdef public object _keys_to_token
    cdef public object _root
    cdef public object _entities
    cdef public object _callbacks
    cdef public object _nodes
    cdef public object _tree
    cdef public object _ops
-    def __init__(self, vocab):
+    def __init__(self, vocab, *, validate=False):
        """Create the DependencyMatcher.
        vocab (Vocab): The vocabulary object, which must be shared with the
            documents the matcher will operate on.
        validate (bool): Whether patterns should be validated, passed to
            Matcher as `validate`
        """
        size = 20
-        # TODO: make matcher work with validation
+        self.matcher = Matcher(vocab, validate=validate)
        self.token_matcher = Matcher(vocab, validate=False)
        self._keys_to_token = {}
        self._patterns = {}
        self._raw_patterns = {}
        self._root = {}
        self._nodes = {}
        self._tree = {}
        self._entities = {}
        self._callbacks = {}
        self.vocab = vocab
        self.mem = Pool()
        self._ops = {
            "<": self.dep,
            ">": self.gov,
            "<<": self.dep_chain,
            ">>": self.gov_chain,
            ".": self.imm_precede,
            ".*": self.precede,
            ";": self.imm_follow,
            ";*": self.follow,
            "$+": self.imm_right_sib,
            "$-": self.imm_left_sib,
            "$++": self.right_sib,
            "$--": self.left_sib,
        }
    def __reduce__(self):
-        data = (self.vocab, self._patterns,self._tree, self._callbacks)
+        data = (self.vocab, self._raw_patterns, self._callbacks)
        return (unpickle_matcher, data, None, None)
    def __len__(self):
@ -74,54 +90,61 @@ cdef class DependencyMatcher:
        idx = 0
        visited_nodes = {}
        for relation in pattern:
-            if "PATTERN" not in relation or "SPEC" not in relation:
+            if not isinstance(relation, dict):
                raise ValueError(Errors.E1008)
            if "RIGHT_ATTRS" not in relation and "RIGHT_ID" not in relation:
                raise ValueError(Errors.E098.format(key=key))
            if idx == 0:
                if not(
-                    "NODE_NAME" in relation["SPEC"]
+                    "RIGHT_ID" in relation
-                    and "NBOR_RELOP" not in relation["SPEC"]
+                    and "REL_OP" not in relation
-                    and "NBOR_NAME" not in relation["SPEC"]
+                    and "LEFT_ID" not in relation
                ):
                    raise ValueError(Errors.E099.format(key=key))
-                visited_nodes[relation["SPEC"]["NODE_NAME"]] = True
+                visited_nodes[relation["RIGHT_ID"]] = True
            else:
                if not(
-                    "NODE_NAME" in relation["SPEC"]
+                    "RIGHT_ID" in relation
-                    and "NBOR_RELOP" in relation["SPEC"]
+                    and "RIGHT_ATTRS" in relation
-                    and "NBOR_NAME" in relation["SPEC"]
+                    and "REL_OP" in relation
                    and "LEFT_ID" in relation
                ):
                    raise ValueError(Errors.E100.format(key=key))
                if (
-                    relation["SPEC"]["NODE_NAME"] in visited_nodes
+                    relation["RIGHT_ID"] in visited_nodes
-                    or relation["SPEC"]["NBOR_NAME"] not in visited_nodes
+                    or relation["LEFT_ID"] not in visited_nodes
                ):
                    raise ValueError(Errors.E101.format(key=key))
-                visited_nodes[relation["SPEC"]["NODE_NAME"]] = True
+                if relation["REL_OP"] not in self._ops:
-                visited_nodes[relation["SPEC"]["NBOR_NAME"]] = True
+                    raise ValueError(Errors.E1007.format(op=relation["REL_OP"]))
                visited_nodes[relation["RIGHT_ID"]] = True
                visited_nodes[relation["LEFT_ID"]] = True
            idx = idx + 1
-    def add(self, key, patterns, *_patterns, on_match=None):
+    def add(self, key, patterns, *, on_match=None):
        """Add a new matcher rule to the matcher.
        key (str): The match ID.
        patterns (list): The patterns to add for the given key.
        on_match (callable): Optional callback executed on match.
        """
-        if patterns is None or hasattr(patterns, "__call__"):  # old API
+        if on_match is not None and not hasattr(on_match, "__call__"):
-            on_match = patterns
+            raise ValueError(Errors.E171.format(arg_type=type(on_match)))
-            patterns = _patterns
+        if patterns is None or not isinstance(patterns, List):  # old API
            raise ValueError(Errors.E948.format(arg_type=type(patterns)))
        for pattern in patterns:
            if len(pattern) == 0:
                raise ValueError(Errors.E012.format(key=key))
-            self.validate_input(pattern,key)
+            self.validate_input(pattern, key)
        key = self._normalize_key(key)
        self._raw_patterns.setdefault(key, [])
        self._raw_patterns[key].extend(patterns)
        _patterns = []
        for pattern in patterns:
            token_patterns = []
            for i in range(len(pattern)):
-                token_pattern = [pattern[i]["PATTERN"]]
+                token_pattern = [pattern[i]["RIGHT_ATTRS"]]
                token_patterns.append(token_pattern)
            # self.patterns.append(token_patterns)
            _patterns.append(token_patterns)
        self._patterns.setdefault(key, [])
        self._callbacks[key] = on_match
@ -135,7 +158,7 @@ cdef class DependencyMatcher:
            # TODO: Better ways to hash edges in pattern?
            for j in range(len(_patterns[i])):
                k = self._normalize_key(unicode(key) + DELIMITER + unicode(i) + DELIMITER + unicode(j))
-                self.token_matcher.add(k, [_patterns[i][j]])
+                self.matcher.add(k, [_patterns[i][j]])
                _keys_to_token[k] = j
            _keys_to_token_list.append(_keys_to_token)
        self._keys_to_token.setdefault(key, [])
@ -144,14 +167,14 @@ cdef class DependencyMatcher:
        for pattern in patterns:
            nodes = {}
            for i in range(len(pattern)):
-                nodes[pattern[i]["SPEC"]["NODE_NAME"]] = i
+                nodes[pattern[i]["RIGHT_ID"]] = i
            _nodes_list.append(nodes)
        self._nodes.setdefault(key, [])
        self._nodes[key].extend(_nodes_list)
        # Create an object tree to traverse later on. This data structure
        # enables easy tree pattern match. Doc-Token based tree cannot be
        # reused since it is memory-heavy and tightly coupled with the Doc.
-        self.retrieve_tree(patterns, _nodes_list,key)
+        self.retrieve_tree(patterns, _nodes_list, key)
    def retrieve_tree(self, patterns, _nodes_list, key):
        _heads_list = []
@ -161,13 +184,13 @@ cdef class DependencyMatcher:
            root = -1
            for j in range(len(patterns[i])):
                token_pattern = patterns[i][j]
-                if ("NBOR_RELOP" not in token_pattern["SPEC"]):
+                if ("REL_OP" not in token_pattern):
                    heads[j] = ('root', j)
                    root = j
                else:
                    heads[j] = (
-                        token_pattern["SPEC"]["NBOR_RELOP"],
+                        token_pattern["REL_OP"],
-                        _nodes_list[i][token_pattern["SPEC"]["NBOR_NAME"]]
+                        _nodes_list[i][token_pattern["LEFT_ID"]]
                    )
            _heads_list.append(heads)
            _root_list.append(root)
@ -202,11 +225,21 @@ cdef class DependencyMatcher:
        RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
        """
        key = self._normalize_key(key)
-        if key not in self._patterns:
+        if key not in self._raw_patterns:
            return default
-        return (self._callbacks[key], self._patterns[key])
+        return (self._callbacks[key], self._raw_patterns[key])
-    def __call__(self, Doc doc):
+    def remove(self, key):
        key = self._normalize_key(key)
        if not key in self._patterns:
            raise ValueError(Errors.E175.format(key=key))
        self._patterns.pop(key)
        self._raw_patterns.pop(key)
        self._nodes.pop(key)
        self._tree.pop(key)
        self._root.pop(key)
    def __call__(self, object doclike):
        """Find all token sequences matching the supplied pattern.
        doclike (Doc or Span): The document to match over.
@ -214,8 +247,14 @@ cdef class DependencyMatcher:
            describing the matches. A match tuple describes a span
            `doc[start:end]`. The `label_id` and `key` are both integers.
        """
        if isinstance(doclike, Doc):
            doc = doclike
        elif isinstance(doclike, Span):
            doc = doclike.as_doc()
        else:
            raise ValueError(Errors.E195.format(good="Doc or Span", got=type(doclike).__name__))
        matched_key_trees = []
-        matches = self.token_matcher(doc)
+        matches = self.matcher(doc)
        for key in list(self._patterns.keys()):
            _patterns_list = self._patterns[key]
            _keys_to_token_list = self._keys_to_token[key]
@ -244,26 +283,26 @@ cdef class DependencyMatcher:
                length = len(_nodes)
                matched_trees = []
-                self.recurse(_tree,id_to_position,_node_operator_map,0,[],matched_trees)
+                self.recurse(_tree, id_to_position, _node_operator_map, 0, [], matched_trees)
-                matched_key_trees.append((key,matched_trees))
+                for matched_tree in matched_trees:
-
+                    matched_key_trees.append((key, matched_tree))
-            for i, (ent_id, nodes) in enumerate(matched_key_trees):
+            for i, (match_id, nodes) in enumerate(matched_key_trees):
-                on_match = self._callbacks.get(ent_id)
+                on_match = self._callbacks.get(match_id)
                if on_match is not None:
                    on_match(self, doc, i, matched_key_trees)
        return matched_key_trees
-    def recurse(self,tree,id_to_position,_node_operator_map,int patternLength,visited_nodes,matched_trees):
+    def recurse(self, tree, id_to_position, _node_operator_map, int patternLength, visited_nodes, matched_trees):
-        cdef bool isValid;
+        cdef bint isValid;
-        if(patternLength == len(id_to_position.keys())):
+        if patternLength == len(id_to_position.keys()):
            isValid = True
            for node in range(patternLength):
-                if(node in tree):
+                if node in tree:
                    for idx, (relop,nbor) in enumerate(tree[node]):
                        computed_nbors = numpy.asarray(_node_operator_map[visited_nodes[node]][relop])
                        isNbor = False
                        for computed_nbor in computed_nbors:
-                            if(computed_nbor.i == visited_nodes[nbor]):
+                            if computed_nbor.i == visited_nodes[nbor]:
                                isNbor = True
                        isValid = isValid & isNbor
            if(isValid):
@ -271,14 +310,14 @@ cdef class DependencyMatcher:
            return
        allPatternNodes = numpy.asarray(id_to_position[patternLength])
        for patternNode in allPatternNodes:
-            self.recurse(tree,id_to_position,_node_operator_map,patternLength+1,visited_nodes+[patternNode],matched_trees)
+            self.recurse(tree, id_to_position, _node_operator_map, patternLength+1, visited_nodes+[patternNode], matched_trees)
    # Given a node and an edge operator, to return the list of nodes
    # from the doc that belong to node+operator. This is used to store
    # all the results beforehand to prevent unnecessary computation while
    # pattern matching
    # _node_operator_map[node][operator] = [...]
-    def get_node_operator_map(self,doc,tree,id_to_position,nodes,root):
+    def get_node_operator_map(self, doc, tree, id_to_position, nodes, root):
        _node_operator_map = {}
        all_node_indices = nodes.values()
        all_operators = []
@ -295,24 +334,14 @@ cdef class DependencyMatcher:
            _node_operator_map[node] = {}
            for operator in all_operators:
                _node_operator_map[node][operator] = []
        # Used to invoke methods for each operator
        switcher = {
            "<": self.dep,
            ">": self.gov,
            "<<": self.dep_chain,
            ">>": self.gov_chain,
            ".": self.imm_precede,
            "$+": self.imm_right_sib,
            "$-": self.imm_left_sib,
            "$++": self.right_sib,
            "$--": self.left_sib
        }
        for operator in all_operators:
            for node in all_nodes:
-                _node_operator_map[node][operator] = switcher.get(operator)(doc,node)
+                _node_operator_map[node][operator] = self._ops.get(operator)(doc, node)
        return _node_operator_map
    def dep(self, doc, node):
        if doc[node].head == doc[node]:
            return []
        return [doc[node].head]
    def gov(self,doc,node):
@ -322,36 +351,51 @@ cdef class DependencyMatcher:
        return list(doc[node].ancestors)
    def gov_chain(self, doc, node):
-        return list(doc[node].subtree)
+        return [t for t in doc[node].subtree if t != doc[node]]
    def imm_precede(self, doc, node):
-        if node > 0:
+        sent = self._get_sent(doc[node])
        if node < len(doc) - 1 and doc[node + 1] in sent:
            return [doc[node + 1]]
        return []
    def precede(self, doc, node):
        sent = self._get_sent(doc[node])
        return [doc[i] for i in range(node + 1, sent.end)]
    def imm_follow(self, doc, node):
        sent = self._get_sent(doc[node])
        if node > 0 and doc[node - 1] in sent:
            return [doc[node - 1]]
        return []
    def follow(self, doc, node):
        sent = self._get_sent(doc[node])
        return [doc[i] for i in range(sent.start, node)]
    def imm_right_sib(self, doc, node):
        for child in list(doc[node].head.children):
-            if child.i == node - 1:
+            if child.i == node + 1:
                return [doc[child.i]]
        return []
    def imm_left_sib(self, doc, node):
        for child in list(doc[node].head.children):
-            if child.i == node + 1:
+            if child.i == node - 1:
                return [doc[child.i]]
        return []
    def right_sib(self, doc, node):
        candidate_children = []
        for child in list(doc[node].head.children):
-            if child.i < node:
+            if child.i > node:
                candidate_children.append(doc[child.i])
        return candidate_children
    def left_sib(self, doc, node):
        candidate_children = []
        for child in list(doc[node].head.children):
-            if child.i > node:
+            if child.i < node:
                candidate_children.append(doc[child.i])
        return candidate_children
@ -360,3 +404,15 @@ cdef class DependencyMatcher:
            return self.vocab.strings.add(key)
        else:
            return key
    def _get_sent(self, token):
        root = (list(token.ancestors) or [token])[-1]
        return token.doc[root.left_edge.i:root.right_edge.i + 1]
 def unpickle_matcher(vocab, patterns, callbacks):
    matcher = DependencyMatcher(vocab)
    for key, pattern in patterns.items():
        callback = callbacks.get(key, None)
        matcher.add(key, pattern, on_match=callback)
    return matcher
--- a/spacy/tests/matcher/test_dependency_matcher.py
+++ b/spacy/tests/matcher/test_dependency_matcher.py
@ -0,0 +1,386 @@
 import pytest
 import pickle
 import re
 import copy
 from mock import Mock
 from spacy.matcher import DependencyMatcher
 from ..util import get_doc
@pytest.fixture
 def doc(en_vocab):
    text = "The quick brown fox jumped over the lazy fox"
    heads = [3, 2, 1, 1, 0, -1, 2, 1, -3]
    deps = ["det", "amod", "amod", "nsubj", "ROOT", "prep", "pobj", "det", "amod"]
    doc = get_doc(en_vocab, text.split(), heads=heads, deps=deps)
    return doc
@pytest.fixture
 def patterns(en_vocab):
    def is_brown_yellow(text):
        return bool(re.compile(r"brown|yellow").match(text))
    IS_BROWN_YELLOW = en_vocab.add_flag(is_brown_yellow)
    pattern1 = [
        {"RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"}},
        {
            "LEFT_ID": "fox",
            "REL_OP": ">",
            "RIGHT_ID": "q",
            "RIGHT_ATTRS": {"ORTH": "quick", "DEP": "amod"},
        },
        {
            "LEFT_ID": "fox",
            "REL_OP": ">",
            "RIGHT_ID": "r",
            "RIGHT_ATTRS": {IS_BROWN_YELLOW: True},
        },
    ]
    pattern2 = [
        {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
        {
            "LEFT_ID": "jumped",
            "REL_OP": ">",
            "RIGHT_ID": "fox1",
            "RIGHT_ATTRS": {"ORTH": "fox"},
        },
        {
            "LEFT_ID": "jumped",
            "REL_OP": ".",
            "RIGHT_ID": "over",
            "RIGHT_ATTRS": {"ORTH": "over"},
        },
    ]
    pattern3 = [
        {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
        {
            "LEFT_ID": "jumped",
            "REL_OP": ">",
            "RIGHT_ID": "fox",
            "RIGHT_ATTRS": {"ORTH": "fox"},
        },
        {
            "LEFT_ID": "fox",
            "REL_OP": ">>",
            "RIGHT_ID": "r",
            "RIGHT_ATTRS": {"ORTH": "brown"},
        },
    ]
    pattern4 = [
        {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
        {
            "LEFT_ID": "jumped",
            "REL_OP": ">",
            "RIGHT_ID": "fox",
            "RIGHT_ATTRS": {"ORTH": "fox"},
        }
    ]
    pattern5 = [
        {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
        {
            "LEFT_ID": "jumped",
            "REL_OP": ">>",
            "RIGHT_ID": "fox",
            "RIGHT_ATTRS": {"ORTH": "fox"},
        },
    ]
    return [pattern1, pattern2, pattern3, pattern4, pattern5]
@pytest.fixture
 def dependency_matcher(en_vocab, patterns, doc):
    matcher = DependencyMatcher(en_vocab)
    mock = Mock()
    for i in range(1, len(patterns) + 1):
        if i == 1:
            matcher.add("pattern1", [patterns[0]], on_match=mock)
        else:
            matcher.add("pattern" + str(i), [patterns[i - 1]])
    return matcher
 def test_dependency_matcher(dependency_matcher, doc, patterns):
    assert len(dependency_matcher) == 5
    assert "pattern3" in dependency_matcher
    assert dependency_matcher.get("pattern3") == (None, [patterns[2]])
    matches = dependency_matcher(doc)
    assert len(matches) == 6
    assert matches[0][1] == [3, 1, 2]
    assert matches[1][1] == [4, 3, 5]
    assert matches[2][1] == [4, 3, 2]
    assert matches[3][1] == [4, 3]
    assert matches[4][1] == [4, 3]
    assert matches[5][1] == [4, 8]
    span = doc[0:6]
    matches = dependency_matcher(span)
    assert len(matches) == 5
    assert matches[0][1] == [3, 1, 2]
    assert matches[1][1] == [4, 3, 5]
    assert matches[2][1] == [4, 3, 2]
    assert matches[3][1] == [4, 3]
    assert matches[4][1] == [4, 3]
 def test_dependency_matcher_pickle(en_vocab, patterns, doc):
    matcher = DependencyMatcher(en_vocab)
    for i in range(1, len(patterns) + 1):
        matcher.add("pattern" + str(i), [patterns[i - 1]])
    matches = matcher(doc)
    assert matches[0][1] == [3, 1, 2]
    assert matches[1][1] == [4, 3, 5]
    assert matches[2][1] == [4, 3, 2]
    assert matches[3][1] == [4, 3]
    assert matches[4][1] == [4, 3]
    assert matches[5][1] == [4, 8]
    b = pickle.dumps(matcher)
    matcher_r = pickle.loads(b)
    assert len(matcher) == len(matcher_r)
    matches = matcher_r(doc)
    assert matches[0][1] == [3, 1, 2]
    assert matches[1][1] == [4, 3, 5]
    assert matches[2][1] == [4, 3, 2]
    assert matches[3][1] == [4, 3]
    assert matches[4][1] == [4, 3]
    assert matches[5][1] == [4, 8]
 def test_dependency_matcher_pattern_validation(en_vocab):
    pattern = [
        {"RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"}},
        {
            "LEFT_ID": "fox",
            "REL_OP": ">",
            "RIGHT_ID": "q",
            "RIGHT_ATTRS": {"ORTH": "quick", "DEP": "amod"},
        },
        {
            "LEFT_ID": "fox",
            "REL_OP": ">",
            "RIGHT_ID": "r",
            "RIGHT_ATTRS": {"ORTH": "brown"},
        },
    ]
    matcher = DependencyMatcher(en_vocab)
    # original pattern is valid
    matcher.add("FOUNDED", [pattern])
    # individual pattern not wrapped in a list
    with pytest.raises(ValueError):
        matcher.add("FOUNDED", pattern)
    # no anchor node
    with pytest.raises(ValueError):
        matcher.add("FOUNDED", [pattern[1:]])
    # required keys missing
    with pytest.raises(ValueError):
        pattern2 = copy.deepcopy(pattern)
        del pattern2[0]["RIGHT_ID"]
        matcher.add("FOUNDED", [pattern2])
    with pytest.raises(ValueError):
        pattern2 = copy.deepcopy(pattern)
        del pattern2[1]["RIGHT_ID"]
        matcher.add("FOUNDED", [pattern2])
    with pytest.raises(ValueError):
        pattern2 = copy.deepcopy(pattern)
        del pattern2[1]["RIGHT_ATTRS"]
        matcher.add("FOUNDED", [pattern2])
    with pytest.raises(ValueError):
        pattern2 = copy.deepcopy(pattern)
        del pattern2[1]["LEFT_ID"]
        matcher.add("FOUNDED", [pattern2])
    with pytest.raises(ValueError):
        pattern2 = copy.deepcopy(pattern)
        del pattern2[1]["REL_OP"]
        matcher.add("FOUNDED", [pattern2])
    # invalid operator
    with pytest.raises(ValueError):
        pattern2 = copy.deepcopy(pattern)
        pattern2[1]["REL_OP"] = "!!!"
        matcher.add("FOUNDED", [pattern2])
    # duplicate node name
    with pytest.raises(ValueError):
        pattern2 = copy.deepcopy(pattern)
        pattern2[1]["RIGHT_ID"] = "fox"
        matcher.add("FOUNDED", [pattern2])
 def test_dependency_matcher_callback(en_vocab, doc):
    pattern = [
        {"RIGHT_ID": "quick", "RIGHT_ATTRS": {"ORTH": "quick"}},
    ]
    matcher = DependencyMatcher(en_vocab)
    mock = Mock()
    matcher.add("pattern", [pattern], on_match=mock)
    matches = matcher(doc)
    mock.assert_called_once_with(matcher, doc, 0, matches)
    # check that matches with and without callback are the same (#4590)
    matcher2 = DependencyMatcher(en_vocab)
    matcher2.add("pattern", [pattern])
    matches2 = matcher2(doc)
    assert matches == matches2
@pytest.mark.parametrize(
    "op,num_matches", [(".", 8), (".*", 20), (";", 8), (";*", 20),]
 )
 def test_dependency_matcher_precedence_ops(en_vocab, op, num_matches):
    # two sentences to test that all matches are within the same sentence
    doc = get_doc(
        en_vocab,
        words=["a", "b", "c", "d", "e"] * 2,
        heads=[0, -1, -2, -3, -4] * 2,
        deps=["dep"] * 10,
    )
    match_count = 0
    for text in ["a", "b", "c", "d", "e"]:
        pattern = [
            {"RIGHT_ID": "1", "RIGHT_ATTRS": {"ORTH": text}},
            {"LEFT_ID": "1", "REL_OP": op, "RIGHT_ID": "2", "RIGHT_ATTRS": {},},
        ]
        matcher = DependencyMatcher(en_vocab)
        matcher.add("A", [pattern])
        matches = matcher(doc)
        match_count += len(matches)
        for match in matches:
            match_id, token_ids = match
            # token_ids[0] op token_ids[1]
            if op == ".":
                assert token_ids[0] == token_ids[1] - 1
            elif op == ";":
                assert token_ids[0] == token_ids[1] + 1
            elif op == ".*":
                assert token_ids[0] < token_ids[1]
            elif op == ";*":
                assert token_ids[0] > token_ids[1]
            # all tokens are within the same sentence
            assert doc[token_ids[0]].sent == doc[token_ids[1]].sent
    assert match_count == num_matches
@pytest.mark.parametrize(
    "left,right,op,num_matches",
    [
        ("fox", "jumped", "<", 1),
        ("the", "lazy", "<", 0),
        ("jumped", "jumped", "<", 0),
        ("fox", "jumped", ">", 0),
        ("fox", "lazy", ">", 1),
        ("lazy", "lazy", ">", 0),
        ("fox", "jumped", "<<", 2),
        ("jumped", "fox", "<<", 0),
        ("the", "fox", "<<", 2),
        ("fox", "jumped", ">>", 0),
        ("over", "the", ">>", 1),
        ("fox", "the", ">>", 2),
        ("fox", "jumped", ".", 1),
        ("lazy", "fox", ".", 1),
        ("the", "fox", ".", 0),
        ("the", "the", ".", 0),
        ("fox", "jumped", ";", 0),
        ("lazy", "fox", ";", 0),
        ("the", "fox", ";", 0),
        ("the", "the", ";", 0),
        ("quick", "fox", ".*", 2),
        ("the", "fox", ".*", 3),
        ("the", "the", ".*", 1),
        ("fox", "jumped", ";*", 1),
        ("quick", "fox", ";*", 0),
        ("the", "fox", ";*", 1),
        ("the", "the", ";*", 1),
        ("quick", "brown", "$+", 1),
        ("brown", "quick", "$+", 0),
        ("brown", "brown", "$+", 0),
        ("quick", "brown", "$-", 0),
        ("brown", "quick", "$-", 1),
        ("brown", "brown", "$-", 0),
        ("the", "brown", "$++", 1),
        ("brown", "the", "$++", 0),
        ("brown", "brown", "$++", 0),
        ("the", "brown", "$--", 0),
        ("brown", "the", "$--", 1),
        ("brown", "brown", "$--", 0),
    ],
 )
 def test_dependency_matcher_ops(en_vocab, doc, left, right, op, num_matches):
    right_id = right
    if left == right:
        right_id = right + "2"
    pattern = [
        {"RIGHT_ID": left, "RIGHT_ATTRS": {"LOWER": left}},
        {
            "LEFT_ID": left,
            "REL_OP": op,
            "RIGHT_ID": right_id,
            "RIGHT_ATTRS": {"LOWER": right},
        },
    ]
    matcher = DependencyMatcher(en_vocab)
    matcher.add("pattern", [pattern])
    matches = matcher(doc)
    assert len(matches) == num_matches
@pytest.mark.parametrize(
    "pattern",
    [
        # empty
        [],
        # unsupported op
        [
            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
            {
                "LEFT_ID": "jumped",
                "REL_OP": "==",
                "RIGHT_ID": "fox",
                "RIGHT_ATTRS": {"ORTH": "fox"},
            },
        ],
        # first dict isn't just a node
        [
            {
                "LEFT_ID": "jumped",
                "REL_OP": "==",
                "RIGHT_ID": "fox",
                "RIGHT_ATTRS": {"ORTH": "fox"},
            },
        ],
        # missing op
        [
            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
            {"LEFT_ID": "jumped", "RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"},},
        ],
        # missing left-hand ID
        [
            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
            {"REL_OP": ">", "RIGHT_ID": "fox", "RIGHT_ATTRS": {"ORTH": "fox"},},
        ],
        # missing right-hand ID
        [
            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
            {"LEFT_ID": "jumped", "REL_OP": ">", "RIGHT_ATTRS": {"ORTH": "fox"},},
        ],
        # missing right-hand attrs
        [
            {"RIGHT_ID": "jumped", "RIGHT_ATTRS": {"ORTH": "jumped"}},
            {"LEFT_ID": "jumped", "REL_OP": ">", "RIGHT_ID": "fox",},
        ],
    ],
 )
 def test_dependency_matcher_pattern_validation(en_vocab, pattern):
    matcher = DependencyMatcher(en_vocab)
    with pytest.raises(ValueError):
        matcher.add("pattern", [pattern])
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@ -1,7 +1,6 @@
 import pytest
 import re
 from mock import Mock
-from spacy.matcher import Matcher, DependencyMatcher
+from spacy.matcher import Matcher
 from spacy.tokens import Doc, Token, Span
 from ..doc.test_underscore import clean_underscore  # noqa: F401
@ -292,84 +291,6 @@ def test_matcher_extension_set_membership(en_vocab):
    assert len(matches) == 0
@pytest.fixture
 def text():
    return "The quick brown fox jumped over the lazy fox"
@pytest.fixture
 def heads():
    return [3, 2, 1, 1, 0, -1, 2, 1, -3]
@pytest.fixture
 def deps():
    return ["det", "amod", "amod", "nsubj", "prep", "pobj", "det", "amod"]
@pytest.fixture
 def dependency_matcher(en_vocab):
    def is_brown_yellow(text):
        return bool(re.compile(r"brown|yellow|over").match(text))
    IS_BROWN_YELLOW = en_vocab.add_flag(is_brown_yellow)
    pattern1 = [
        {"SPEC": {"NODE_NAME": "fox"}, "PATTERN": {"ORTH": "fox"}},
        {
            "SPEC": {"NODE_NAME": "q", "NBOR_RELOP": ">", "NBOR_NAME": "fox"},
            "PATTERN": {"ORTH": "quick", "DEP": "amod"},
        },
        {
            "SPEC": {"NODE_NAME": "r", "NBOR_RELOP": ">", "NBOR_NAME": "fox"},
            "PATTERN": {IS_BROWN_YELLOW: True},
        },
    ]
    pattern2 = [
        {"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "jumped"}},
        {
            "SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"},
            "PATTERN": {"ORTH": "fox"},
        },
        {
            "SPEC": {"NODE_NAME": "quick", "NBOR_RELOP": ".", "NBOR_NAME": "jumped"},
            "PATTERN": {"ORTH": "fox"},
        },
    ]
    pattern3 = [
        {"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "jumped"}},
        {
            "SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"},
            "PATTERN": {"ORTH": "fox"},
        },
        {
            "SPEC": {"NODE_NAME": "r", "NBOR_RELOP": ">>", "NBOR_NAME": "fox"},
            "PATTERN": {"ORTH": "brown"},
        },
    ]
    matcher = DependencyMatcher(en_vocab)
    matcher.add("pattern1", [pattern1])
    matcher.add("pattern2", [pattern2])
    matcher.add("pattern3", [pattern3])
    return matcher
 def test_dependency_matcher_compile(dependency_matcher):
    assert len(dependency_matcher) == 3
 # def test_dependency_matcher(dependency_matcher, text, heads, deps):
 #     doc = get_doc(dependency_matcher.vocab, text.split(), heads=heads, deps=deps)
 #     matches = dependency_matcher(doc)
 #     assert matches[0][1] == [[3, 1, 2]]
 #     assert matches[1][1] == [[4, 3, 3]]
 #     assert matches[2][1] == [[4, 3, 2]]
 def test_matcher_basic_check(en_vocab):
    matcher = Matcher(en_vocab)
    # Potential mistake: pass in pattern instead of list of patterns
--- a/spacy/tests/regression/test_issue4501-5000.py
+++ b/spacy/tests/regression/test_issue4501-5000.py
@ -38,32 +38,6 @@ def test_gold_misaligned(en_tokenizer, text, words):
    Example.from_dict(doc, {"words": words})
 def test_issue4590(en_vocab):
    """Test that matches param in on_match method are the same as matches run with no on_match method"""
    pattern = [
        {"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "jumped"}},
        {
            "SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"},
            "PATTERN": {"ORTH": "fox"},
        },
        {
            "SPEC": {"NODE_NAME": "quick", "NBOR_RELOP": ".", "NBOR_NAME": "jumped"},
            "PATTERN": {"ORTH": "fox"},
        },
    ]
    on_match = Mock()
    matcher = DependencyMatcher(en_vocab)
    matcher.add("pattern", on_match, pattern)
    text = "The quick brown fox jumped over the lazy fox"
    heads = [3, 2, 1, 1, 0, -1, 2, 1, -3]
    deps = ["det", "amod", "amod", "nsubj", "ROOT", "prep", "det", "amod", "pobj"]
    doc = get_doc(en_vocab, text.split(), heads=heads, deps=deps)
    matches = matcher(doc)
    on_match_args = on_match.call_args
    assert on_match_args[0][3] == matches
 def test_issue4651_with_phrase_matcher_attr():
    """Test that the EntityRuler PhraseMatcher is deserialized correctly using
    the method from_disk when the EntityRuler argument phrase_matcher_attr is
--- a/website/docs/api/dependencymatcher.md
+++ b/website/docs/api/dependencymatcher.md
@ -1,65 +1,135 @@
 ---
 title: DependencyMatcher
-teaser: Match sequences of tokens, based on the dependency parse
+teaser: Match subtrees within a dependency parse
 tag: class
 new: 3
 source: spacy/matcher/dependencymatcher.pyx
 ---
 The `DependencyMatcher` follows the same API as the [`Matcher`](/api/matcher)
 and [`PhraseMatcher`](/api/phrasematcher) and lets you match on dependency trees
-using the
+using
-[Semgrex syntax](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html).
+[Semgrex operators](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html).
 It requires a pretrained [`DependencyParser`](/api/parser) or other component
-that sets the `Token.dep` attribute.
+that sets the `Token.dep` and `Token.head` attributes.
 ## Pattern format {#patterns}
-> ```json
+> ```python
 > ### Example
 > # pattern: "[subject] ... initially founded"
 > [
 >   # anchor token: founded
 >   {
->     "SPEC": {"NODE_NAME": "founded"},
+>     "RIGHT_ID": "founded",
->     "PATTERN": {"ORTH": "founded"}
+>     "RIGHT_ATTRS": {"ORTH": "founded"}
 >   },
 >   # founded -> subject
 >   {
->     "SPEC": {
+>     "LEFT_ID": "founded",
->       "NODE_NAME": "founder",
+>     "REL_OP": ">",
->       "NBOR_RELOP": ">",
+>     "RIGHT_ID": "subject",
->       "NBOR_NAME": "founded"
+>     "RIGHT_ATTRS": {"DEP": "nsubj"}
 >   },
 >     "PATTERN": {"DEP": "nsubj"}
 >   },
 >   # "founded" follows "initially"
 >   {
->     "SPEC": {
+>     "LEFT_ID": "founded",
->       "NODE_NAME": "object",
+>     "REL_OP": ";",
->       "NBOR_RELOP": ">",
+>     "RIGHT_ID": "initially",
->       "NBOR_NAME": "founded"
+>     "RIGHT_ATTRS": {"ORTH": "initially"}
 >   },
 >     "PATTERN": {"DEP": "dobj"}
 >   }
 > ]
 > ```
 A pattern added to the `DependencyMatcher` consists of a list of dictionaries,
-with each dictionary describing a node to match. Each pattern should have the
+with each dictionary describing a token to match. Except for the first
-following top-level keys:
+dictionary, which defines an anchor token using only `RIGHT_ID` and
 `RIGHT_ATTRS`, each pattern should have the following keys:
-| Name      | Description                                                                                                                                    |
+| Name          | Description                                                                                                                                                            |
-| --------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
+| ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `PATTERN` | The token attributes to match in the same format as patterns provided to the regular token-based [`Matcher`](/api/matcher). ~~Dict[str, Any]~~ |
+| `LEFT_ID`     | The name of the left-hand node in the relation, which has been defined in an earlier node.                                                                             |
-| `SPEC`    | The relationships of the nodes in the subtree that should be matched. ~~Dict[str, str]~~                                                       |
+| `REL_OP`      | An operator that describes how the two nodes are related. ~~str~~                                                                                                      |
 | `RIGHT_ID`    | A unique name for the right-hand node in the relation. ~~str~~                                                                                                         |
 | `RIGHT_ATTRS` | The token attributes to match for the right-hand node in the same format as patterns provided to the regular token-based [`Matcher`](/api/matcher). ~~Dict[str, Any]~~ |
-The `SPEC` includes the following fields:
+The first pattern defines an anchor token and each additional token added to the
 pattern is linked to an existing token `LEFT_ID` by the relation `REL_OP` and is
 described by the name `RIGHT_ID` and the attributes `RIGHT_ATTRS`.
-| Name         | Description                                                                                                                                                                    |
+Let's say we want to find sentences describing who founded what kind of company:
-| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+
-| `NODE_NAME`  | A unique name for this node to refer to it in other specs. ~~str~~                                                                                                             |
+- `Smith founded a healthcare company in 2005.`
-| `NBOR_RELOP` | A [Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html) operator that describes how the two nodes are related. ~~str~~ |
+- `Williams initially founded an insurance company in 1987.`
-| `NBOR_NAME`  | The unique name of the node that this node is connected to. ~~str~~                                                                                                            |
+- `Lee, an established CEO, founded yet another AI startup.`
 Since it's the root of the dependency parse, `founded` is a good choice for the
 anchor token in our pattern:
 ```python
 pattern = [
    {"RIGHT_ID": "anchor_founded", "RIGHT_ATTRS": {"ORTH": "founded"}}
 ]
 ```
 We can add the subject as the token with the dependency label `nsubj` that is a
 direct child `>` of the anchor token named `anchor_founded`:
 ```python
 pattern = [
    {"RIGHT_ID": "anchor_founded", "RIGHT_ATTRS": {"ORTH": "founded"}},
    {
        "LEFT_ID": "anchor_founded",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"},
    }
 ]
 ```
 And the direct object along with its modifier:
 ```python
 pattern = [ ...
    {
        "LEFT_ID": "anchor_founded",
        "REL_OP": ">",
        "RIGHT_ID": "founded_object",
        "RIGHT_ATTRS": {"DEP": "dobj"},
    },
    {
        "LEFT_ID": "founded_object",
        "REL_OP": ">",
        "RIGHT_ID": "founded_object_modifier",
        "RIGHT_ATTRS": {"DEP": {"IN": ["amod", "compound"]}},
    }
 ]
 ```
 ### Operators
 The following operators are supported by the `DependencyMatcher`, most of which
 come directly from
 [Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html):
 | Symbol    | Description                                                                                                         |
 | --------- | ------------------------------------------------------------------------------------------------------------------- |
 | `A < B`   | `A` is the immediate dependent of `B`                                                                               |
 | `A > B`   | `A` is the immediate head of `B`                                                                                    |
 | `A << B`  | `A` is the dependent in a chain to `B` following dep->head paths                                                    |
 | `A >> B`  | `A` is the head in a chain to `B` following head->dep paths                                                         |
 | `A . B`   | `A` immediately precedes `B`, i.e. `A.i == B.i - 1`, and both are within the same dependency tree                   |
 | `A .* B`  | `A` precedes `B`, i.e. `A.i < B.i`, and both are within the same dependency tree _(not in Semgrex)_                 |
 | `A ; B`   | `A` immediately follows `B`, i.e. `A.i == B.i + 1`, and both are within the same dependency tree _(not in Semgrex)_ |
 | `A ;* B`  | `A` follows `B`, i.e. `A.i > B.i`, and both are within the same dependency tree _(not in Semgrex)_                  |
 | `A $+ B`  | `B` is a right immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i - 1`                 |
 | `A $- B`  | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`                  |
 | `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`                                |
 | `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`                                 |
 ## DependencyMatcher.\_\_init\_\_ {#init tag="method"}
-Create a rule-based `DependencyMatcher`.
+Create a `DependencyMatcher`.
 > #### Example
 >
@ -68,13 +138,15 @@ Create a rule-based `DependencyMatcher`.
 > matcher = DependencyMatcher(nlp.vocab)
 > ```
-| Name    | Description                                                                                           |
+| Name           | Description                                                                                           |
-| ------- | ----------------------------------------------------------------------------------------------------- |
+| -------------- | ----------------------------------------------------------------------------------------------------- |
-| `vocab` | The vocabulary object, which must be shared with the documents the matcher will operate on. ~~Vocab~~ |
+| `vocab`        | The vocabulary object, which must be shared with the documents the matcher will operate on. ~~Vocab~~ |
 | _keyword-only_ |                                                                                                       |
 | `validate`     | Validate all patterns added to this matcher. ~~bool~~                                                 |
 ## DependencyMatcher.\_\call\_\_ {#call tag="method"}
-Find all token sequences matching the supplied patterns on the `Doc` or `Span`.
+Find all tokens matching the supplied patterns on the `Doc` or `Span`.
 > #### Example
 >
@ -82,36 +154,32 @@ Find all token sequences matching the supplied patterns on the `Doc` or `Span`.
 > from spacy.matcher import DependencyMatcher
 >
 > matcher = DependencyMatcher(nlp.vocab)
-> pattern = [
+> pattern = [{"RIGHT_ID": "founded_id",
->     {"SPEC": {"NODE_NAME": "founded"}, "PATTERN": {"ORTH": "founded"}},
+>   "RIGHT_ATTRS": {"ORTH": "founded"}}]
->     {"SPEC": {"NODE_NAME": "founder", "NBOR_RELOP": ">", "NBOR_NAME": "founded"}, "PATTERN": {"DEP": "nsubj"}},
+> matcher.add("FOUNDED", [pattern])
 > ]
 > matcher.add("Founder", [pattern])
 > doc = nlp("Bill Gates founded Microsoft.")
 > matches = matcher(doc)
 > ```
-| Name        | Description                                                                                                                                                                                             |
+| Name        | Description                                                                                                                                                                                                                                                                                                                           |
-| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `doclike`   | The `Doc` or `Span` to match over. ~~Union[Doc, Span]~~                                                                                                                                                 |
+| `doclike`   | The `Doc` or `Span` to match over. ~~Union[Doc, Span]~~                                                                                                                                                                                                                                                                               |
-| **RETURNS** | A list of `(match_id, start, end)` tuples, describing the matches. A match tuple describes a span `doc[start:end`]. The `match_id` is the ID of the added match pattern. ~~List[Tuple[int, int, int]]~~ |
+| **RETURNS** | A list of `(match_id, token_ids)` tuples, describing the matches. The `match_id` is the ID of the match pattern and `token_ids` is a list of token indices matched by the pattern, where the position of each token in the list corresponds to the position of the node specification in the pattern. ~~List[Tuple[int, List[int]]]~~ |
 ## DependencyMatcher.\_\_len\_\_ {#len tag="method"}
-Get the number of rules (edges) added to the dependency matcher. Note that this
+Get the number of rules added to the dependency matcher. Note that this only
-only returns the number of rules (identical with the number of IDs), not the
+returns the number of rules (identical with the number of IDs), not the number
-number of individual patterns.
+of individual patterns.
 > #### Example
 >
 > ```python
 > matcher = DependencyMatcher(nlp.vocab)
 > assert len(matcher) == 0
-> pattern = [
+> pattern = [{"RIGHT_ID": "founded_id",
->     {"SPEC": {"NODE_NAME": "founded"}, "PATTERN": {"ORTH": "founded"}},
+>   "RIGHT_ATTRS": {"ORTH": "founded"}}]
->     {"SPEC": {"NODE_NAME": "START_ENTITY", "NBOR_RELOP": ">", "NBOR_NAME": "founded"}, "PATTERN": {"DEP": "nsubj"}},
+> matcher.add("FOUNDED", [pattern])
 > ]
 > matcher.add("Rule", [pattern])
 > assert len(matcher) == 1
 > ```
@ -126,10 +194,10 @@ Check whether the matcher contains rules for a match ID.
 > #### Example
 >
 > ```python
-> matcher = Matcher(nlp.vocab)
+> matcher = DependencyMatcher(nlp.vocab)
-> assert "Rule" not in matcher
+> assert "FOUNDED" not in matcher
-> matcher.add("Rule", [pattern])
+> matcher.add("FOUNDED", [pattern])
-> assert "Rule" in matcher
+> assert "FOUNDED" in matcher
 > ```
 | Name        | Description                                                    |
@ -152,33 +220,15 @@ will be overwritten.
 >     print('Matched!', matches)
 >
 > matcher = DependencyMatcher(nlp.vocab)
-> matcher.add("TEST_PATTERNS", patterns)
+> matcher.add("FOUNDED", patterns, on_match=on_match)
 > ```
-| Name           | Description                                                                                                                                                |
+| Name           | Description                                                                                                                                                          |
-| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `match_id`     | An ID for the thing you're matching. ~~str~~                                                                                                               |
+| `match_id`     | An ID for the patterns. ~~str~~                                                                                                                                      |
-| `patterns`     | list                                                                                                                                                       | Match pattern. A pattern consists of a list of dicts, where each dict describes a `"PATTERN"` and `"SPEC"`. ~~List[List[Dict[str, dict]]]~~ |
+| `patterns`     | A list of match patterns. A pattern consists of a list of dicts, where each dict describes a token in the tree. ~~List[List[Dict[str, Union[str, Dict]]]]~~          |
-| _keyword-only_ |                                                                                                                                                            |  |
+| _keyword-only_ |                                                                                                                                                                      |  |
-| `on_match`     | Callback function to act on matches. Takes the arguments `matcher`, `doc`, `i` and `matches`. ~~Optional[Callable[[Matcher, Doc, int, List[tuple], Any]]~~ |
+| `on_match`     | Callback function to act on matches. Takes the arguments `matcher`, `doc`, `i` and `matches`. ~~Optional[Callable[[DependencyMatcher, Doc, int, List[Tuple], Any]]~~ |
 ## DependencyMatcher.remove {#remove tag="method"}
 Remove a rule from the matcher. A `KeyError` is raised if the match ID does not
 exist.
 > #### Example
 >
 > ```python
 > matcher.add("Rule", [pattern]])
 > assert "Rule" in matcher
 > matcher.remove("Rule")
 > assert "Rule" not in matcher
 > ```
 | Name  | Description                       |
 | ----- | --------------------------------- |
 | `key` | The ID of the match rule. ~~str~~ |
 ## DependencyMatcher.get {#get tag="method"}
@ -188,11 +238,29 @@ Retrieve the pattern stored for a key. Returns the rule as an
 > #### Example
 >
 > ```python
-> matcher.add("Rule", [pattern], on_match=on_match)
+> matcher.add("FOUNDED", patterns, on_match=on_match)
-> on_match, patterns = matcher.get("Rule")
+> on_match, patterns = matcher.get("FOUNDED")
 > ```
-| Name        | Description                                                                                   |
+| Name        | Description                                                                                                 |
-| ----------- | --------------------------------------------------------------------------------------------- |
+| ----------- | ----------------------------------------------------------------------------------------------------------- |
-| `key`       | The ID of the match rule. ~~str~~                                                             |
+| `key`       | The ID of the match rule. ~~str~~                                                                           |
-| **RETURNS** | The rule, as an `(on_match, patterns)` tuple. ~~Tuple[Optional[Callable], List[List[dict]]]~~ |
+| **RETURNS** | The rule, as an `(on_match, patterns)` tuple. ~~Tuple[Optional[Callable], List[List[Union[Dict, Tuple]]]]~~ |
 ## DependencyMatcher.remove {#remove tag="method"}
 Remove a rule from the dependency matcher. A `KeyError` is raised if the match
 ID does not exist.
 > #### Example
 >
 > ```python
 > matcher.add("FOUNDED", patterns)
 > assert "FOUNDED" in matcher
 > matcher.remove("FOUNDED")
 > assert "FOUNDED" not in matcher
 > ```
 | Name  | Description                       |
 | ----- | --------------------------------- |
 | `key` | The ID of the match rule. ~~str~~ |
--- a/website/docs/images/dep-match-diagram.svg
+++ b/website/docs/images/dep-match-diagram.svg
@ -0,0 +1,64 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd">
 <svg width="40cm" height="9cm" viewBox="78 215 793 171" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
  <g id="Background">
    <g>
      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="79.225" y="263.65" width="169.55" height="54" rx="0" ry="0"/>
      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="164" y="286.55">
        <tspan x="164" y="286.55">ID: founded</tspan>
        <tspan x="164" y="302.55">ORTH: founded</tspan>
      </text>
    </g>
    <g>
      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="426.238" y="216" width="139.1" height="54" rx="0" ry="0"/>
      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="495.788" y="238.9">
        <tspan x="495.788" y="238.9">ID: subject</tspan>
        <tspan x="495.788" y="254.9">DEP: nsubj</tspan>
      </text>
    </g>
    <g>
      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="429.662" y="330.65" width="132.25" height="54" rx="0" ry="0"/>
      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="495.787" y="353.55">
        <tspan x="495.787" y="353.55">ID: object</tspan>
        <tspan x="495.787" y="369.55">DEP: dobj</tspan>
      </text>
    </g>
    <g>
      <line style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x1="248.775" y1="290.65" x2="416.834" y2="245.525"/>
      <polygon style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" fill-rule="evenodd" points="424.078,243.58 415.717,251.002 416.834,245.525 413.123,241.344 "/>
    </g>
    <g>
      <line style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x1="248.775" y1="290.65" x2="420.533" y2="354.268"/>
      <polygon style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" fill-rule="evenodd" points="427.566,356.873 416.452,358.089 420.533,354.268 419.925,348.711 "/>
    </g>
    <g>
      <ellipse style="fill: #f3e815; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" cx="342.393" cy="265.102" rx="20.1432" ry="22.8019"/>
      <text font-size="12.8" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="342.393" y="269.002">
        <tspan x="342.393" y="269.002">&gt;</tspan>
      </text>
    </g>
    <g>
      <ellipse style="fill: #f3e815; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" cx="342.393" cy="326.65" rx="20.1432" ry="22.8019"/>
      <text font-size="12.8" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="342.393" y="330.55">
        <tspan x="342.393" y="330.55">&gt;</tspan>
      </text>
    </g>
    <g>
      <rect style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x="697.337" y="330.65" width="172.4" height="54" rx="0" ry="0"/>
      <text font-size="12.7998" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="783.537" y="353.55">
        <tspan x="783.537" y="353.55">ID: modifier</tspan>
        <tspan x="783.537" y="369.55">DEP: amod | compound</tspan>
      </text>
    </g>
    <g>
      <line style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" x1="561.912" y1="357.65" x2="687.601" y2="357.65"/>
      <polygon style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" fill-rule="evenodd" points="695.101,357.65 685.101,362.65 687.601,357.65 685.101,352.65 "/>
    </g>
    <g>
      <ellipse style="fill: #f3e815; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" cx="629.625" cy="357.65" rx="20.1432" ry="22.8019"/>
      <text font-size="12.8" style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="629.625" y="361.55">
        <tspan x="629.625" y="361.55">&gt;</tspan>
      </text>
    </g>
  </g>
 </svg>
--- a/website/docs/images/displacy-dep-founded.html
+++ b/website/docs/images/displacy-dep-founded.html
@ -0,0 +1,58 @@
 <svg xmlns="http://www.w3.org/2000/svg" xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="c3124cc3e661444cb9d4175a5b7c09d1-0" class="displacy" width="925" height="399.5" direction="ltr" style="max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">
 <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
    <tspan class="displacy-word" fill="currentColor" x="50">Smith</tspan>
    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50"></tspan>
 </text>
 <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
    <tspan class="displacy-word" fill="currentColor" x="225">founded</tspan>
    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="225"></tspan>
 </text>
 <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
    <tspan class="displacy-word" fill="currentColor" x="400">a</tspan>
    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="400"></tspan>
 </text>
 <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
    <tspan class="displacy-word" fill="currentColor" x="575">healthcare</tspan>
    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="575"></tspan>
 </text>
 <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
    <tspan class="displacy-word" fill="currentColor" x="750">company.</tspan>
    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="750"></tspan>
 </text>
 <g class="displacy-arrow">
    <path class="displacy-arc" id="arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-0" stroke-width="2px" d="M70,264.5 C70,177.0 215.0,177.0 215.0,264.5" fill="none" stroke="currentColor"></path>
    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
        <textPath xlink:href="#arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">nsubj</textPath>
    </text>
    <path class="displacy-arrowhead" d="M70,266.5 L62,254.5 78,254.5" fill="currentColor"></path>
 </g>
 <g class="displacy-arrow">
    <path class="displacy-arc" id="arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-1" stroke-width="2px" d="M420,264.5 C420,89.5 745.0,89.5 745.0,264.5" fill="none" stroke="currentColor"></path>
    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
        <textPath xlink:href="#arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">det</textPath>
    </text>
    <path class="displacy-arrowhead" d="M420,266.5 L412,254.5 428,254.5" fill="currentColor"></path>
 </g>
 <g class="displacy-arrow">
    <path class="displacy-arc" id="arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-2" stroke-width="2px" d="M595,264.5 C595,177.0 740.0,177.0 740.0,264.5" fill="none" stroke="currentColor"></path>
    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
        <textPath xlink:href="#arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">compound</textPath>
    </text>
    <path class="displacy-arrowhead" d="M595,266.5 L587,254.5 603,254.5" fill="currentColor"></path>
 </g>
 <g class="displacy-arrow">
    <path class="displacy-arc" id="arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-3" stroke-width="2px" d="M245,264.5 C245,2.0 750.0,2.0 750.0,264.5" fill="none" stroke="currentColor"></path>
    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
        <textPath xlink:href="#arrow-c3124cc3e661444cb9d4175a5b7c09d1-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">dobj</textPath>
    </text>
    <path class="displacy-arrowhead" d="M750.0,266.5 L758.0,254.5 742.0,254.5" fill="currentColor"></path>
 </g>
 </svg>
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@ -4,6 +4,7 @@ teaser: Find phrases and tokens, and match entities
 menu:
  - ['Token Matcher', 'matcher']
  - ['Phrase Matcher', 'phrasematcher']
  - ['Dependency Matcher', 'dependencymatcher']
  - ['Entity Ruler', 'entityruler']
  - ['Models & Rules', 'models-rules']
 ---
@ -938,10 +939,10 @@ object patterns as efficiently as possible and without running any of the other
 pipeline components. If the token attribute you want to match on are set by a
 pipeline component, **make sure that the pipeline component runs** when you
 create the pattern. For example, to match on `POS` or `LEMMA`, the pattern `Doc`
-objects need to have part-of-speech tags set by the `tagger`. You can either
+objects need to have part-of-speech tags set by the `tagger` or `morphologizer`.
-call the `nlp` object on your pattern texts instead of `nlp.make_doc`, or use
+You can either call the `nlp` object on your pattern texts instead of
-[`nlp.select_pipes`](/api/language#select_pipes) to disable components
+`nlp.make_doc`, or use [`nlp.select_pipes`](/api/language#select_pipes) to
-selectively.
+disable components selectively.
 </Infobox>
@ -972,10 +973,268 @@ to match phrases with the same sequence of punctuation and non-punctuation
 tokens as the pattern. But this can easily get confusing and doesn't have much
 of an advantage over writing one or two token patterns.
 ## Dependency Matcher {#dependencymatcher new="3"}
 The [`DependencyMatcher`](/api/dependencymatcher) lets you match patterns within
 the dependency parse. It requires a model containing a parser such as the
 [`DependencyParser`](/api/dependencyparser). Instead of defining a list of
 adjacent tokens as in `Matcher` patterns, the `DependencyMatcher` patterns match
 tokens in the dependency parse and specify the relations between them.
 > ```python
 > ### Example
 > from spacy.matcher import DependencyMatcher
 >
 > # "[subject] ... initially founded"
 > pattern = [
 >   # anchor token: founded
 >   {
 >     "RIGHT_ID": "founded",
 >     "RIGHT_ATTRS": {"ORTH": "founded"}
 >   },
 >   # founded -> subject
 >   {
 >     "LEFT_ID": "founded",
 >     "REL_OP": ">",
 >     "RIGHT_ID": "subject",
 >     "RIGHT_ATTRS": {"DEP": "nsubj"}
 >   },
 >   # "founded" follows "initially"
 >   {
 >     "LEFT_ID": "founded",
 >     "REL_OP": ";",
 >     "RIGHT_ID": "initially",
 >     "RIGHT_ATTRS": {"ORTH": "initially"}
 >   }
 > ]
 >
 > matcher = DependencyMatcher(nlp.vocab)
 > matcher.add("FOUNDED", [pattern])
 > matches = matcher(doc)
 > ```
 A pattern added to the `DependencyMatcher` consists of a list of dictionaries,
 with each dictionary describing a token to match and its relation to an existing
 token in the pattern. Except for the first dictionary, which defines an anchor
 token using only `RIGHT_ID` and `RIGHT_ATTRS`, each pattern should have the
 following keys:
 | Name          | Description                                                                                                                                                            |
 | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `LEFT_ID`     | The name of the left-hand node in the relation, which has been defined in an earlier node.                                                                             |
 | `REL_OP`      | An operator that describes how the two nodes are related. ~~str~~                                                                                                      |
 | `RIGHT_ID`    | A unique name for the right-hand node in the relation. ~~str~~                                                                                                         |
 | `RIGHT_ATTRS` | The token attributes to match for the right-hand node in the same format as patterns provided to the regular token-based [`Matcher`](/api/matcher). ~~Dict[str, Any]~~ |
 Each additional token added to the pattern is linked to an existing token
 `LEFT_ID` by the relation `REL_OP`. The new token is given the name `RIGHT_ID`
 and described by the attributes `RIGHT_ATTRS`.
 <Infobox title="Important note" variant="warning">
 Because the unique token **names** in `LEFT_ID` and `RIGHT_ID` are used to
 identify tokens, the order of the dicts in the patterns is important: a token
 name needs to be defined as `RIGHT_ID` in one dict in the pattern **before** it
 can be used as `LEFT_ID` in another dict.
 </Infobox>
 ### Dependency matcher operators
 The following operators are supported by the `DependencyMatcher`, most of which
 come directly from
 [Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html):
 | Symbol    | Description                                                                                                         |
 | --------- | ------------------------------------------------------------------------------------------------------------------- |
 | `A < B`   | `A` is the immediate dependent of `B`                                                                               |
 | `A > B`   | `A` is the immediate head of `B`                                                                                    |
 | `A << B`  | `A` is the dependent in a chain to `B` following dep->head paths                                                    |
 | `A >> B`  | `A` is the head in a chain to `B` following head->dep paths                                                         |
 | `A . B`   | `A` immediately precedes `B`, i.e. `A.i == B.i - 1`, and both are within the same dependency tree                   |
 | `A .* B`  | `A` precedes `B`, i.e. `A.i < B.i`, and both are within the same dependency tree _(not in Semgrex)_                 |
 | `A ; B`   | `A` immediately follows `B`, i.e. `A.i == B.i + 1`, and both are within the same dependency tree _(not in Semgrex)_ |
 | `A ;* B`  | `A` follows `B`, i.e. `A.i > B.i`, and both are within the same dependency tree _(not in Semgrex)_                  |
 | `A $+ B`  | `B` is a right immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i - 1`                 |
 | `A $- B`  | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`                  |
 | `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`                                |
 | `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`                                 |
 ### Designing dependency matcher patterns
 Let's say we want to find sentences describing who founded what kind of company:
 - `Smith founded a healthcare company in 2005.`
 - `Williams initially founded an insurance company in 1987.`
 - `Lee, an experienced CEO, has founded two AI startups.`
 The dependency parse for `Smith founded a healthcare company` shows types of
 relations and tokens we want to match:
 import DisplaCyDepFoundedHtml from 'images/displacy-dep-founded.html'
 <Iframe title="displaCy visualization of dependencies" html={DisplaCyDepFoundedHtml} height={450} />
 The relations we're interested in are:
 - the founder is the subject (`nsubj`) of the token with the text `founded`
 - the company is the object (`dobj`) of `founded`
 - the kind of company may be an adjective (`amod`, not shown above) or a
  compound (`compound`)
 The first step is to pick an anchor token for the pattern. Since it's the root
 of the dependency parse, `founded` is a good choice here. It is often easier to
 construct patterns when all dependency relation operators point from the head to
 the children. In this example, we'll only use `>`, which connects a head to an
 immediate dependent as `head > child`.
 The simplest dependency matcher pattern will identify and name a single token in
 the tree:
 ```python
 ### {executable="true"}
 import spacy
 from spacy.matcher import DependencyMatcher
 nlp = spacy.load("en_core_web_sm")
 matcher = DependencyMatcher(nlp.vocab)
 pattern = [
  {
    "RIGHT_ID": "anchor_founded",      # unique name
    "RIGHT_ATTRS": {"ORTH": "founded"} # token pattern for "founded"
  }
 ]
 matcher.add("FOUNDED", [pattern])
 doc = nlp("Smith founded two companies.")
 matches = matcher(doc)
 print(matches) # [(4851363122962674176, [1])]
 ```
 Now that we have a named anchor token (`anchor_founded`), we can add the founder
 as the immediate dependent (`>`) of `founded` with the dependency label `nsubj`:
 ```python
 pattern = [
    {
        "RIGHT_ID": "anchor_founded",
        "RIGHT_ATTRS": {"ORTH": "founded"}
    },
    {
        "LEFT_ID": "anchor_founded",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"},
    }
 ]
 ```
 The direct object (`dobj`) is added in the same way:
 ```python
 pattern = [ ...
    {
        "LEFT_ID": "anchor_founded",
        "REL_OP": ">",
        "RIGHT_ID": "founded_object",
        "RIGHT_ATTRS": {"DEP": "dobj"},
    }
 ]
 ```
 When the subject and object tokens are added, they are required to have names
 under the key `RIGHT_ID`, which are allowed to be any unique string, e.g.
 `founded_subject`. These names can then be used as `LEFT_ID` to link new tokens
 into the pattern. For the final part of our pattern, we'll specify that the
 token `founded_object` should have a modifier with the dependency relation
 `amod` or `compound`:
 ```python
 pattern = [ ...
    {
        "LEFT_ID": "founded_object",
        "REL_OP": ">",
        "RIGHT_ID": "founded_object_modifier",
        "RIGHT_ATTRS": {"DEP": {"IN": ["amod", "compound"]}},
    }
 ]
 ```
 You can picture the process of creating a dependency matcher pattern as defining
 an anchor token on the left and building up the pattern by linking tokens
 one-by-one on the right using relation operators. To create a valid pattern,
 each new token needs to be linked to an existing token on its left. As for
 `founded` in this example, a token may be linked to more than one token on its
 right:
 <!-- TODO: adjust for final example, prettify -->
 ![Dependency matcher pattern](../images/dep-match-diagram.svg)
 The full pattern comes together as shown in the example below:
 ```python
 ### {executable="true"}
 import spacy
 from spacy.matcher import DependencyMatcher
 nlp = spacy.load("en_core_web_sm")
 matcher = DependencyMatcher(nlp.vocab)
 pattern = [
    {
        "RIGHT_ID": "anchor_founded",
        "RIGHT_ATTRS": {"ORTH": "founded"}
    },
    {
        "LEFT_ID": "anchor_founded",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"},
    },
    {
        "LEFT_ID": "anchor_founded",
        "REL_OP": ">",
        "RIGHT_ID": "founded_object",
        "RIGHT_ATTRS": {"DEP": "dobj"},
    },
    {
        "LEFT_ID": "founded_object",
        "REL_OP": ">",
        "RIGHT_ID": "founded_object_modifier",
        "RIGHT_ATTRS": {"DEP": {"IN": ["amod", "compound"]}},
    }
 ]
 matcher.add("FOUNDED", [pattern])
 doc = nlp("Lee, an experienced CEO, has founded two AI startups.")
 matches = matcher(doc)
 print(matches) # [(4851363122962674176, [6, 0, 10, 9])]
 # each token_id corresponds to one pattern dict
 match_id, token_ids = matches[0]
 for i in range(len(token_ids)):
    print(pattern[i]["RIGHT_ID"] + ":", doc[token_ids[i]].text)
 ```
 <Infobox title="Important note on speed" variant="warning">
 The dependency matcher may be slow when token patterns can potentially match
 many tokens in the sentence or when relation operators allow longer paths in the
 dependency parse, e.g. `<<`, `>>`, `.*` and `;*`.
 To improve the matcher speed, try to make your token patterns and operators as
 specific as possible. For example, use `>` instead of `>>` if possible and use
 token patterns that include dependency labels and other token attributes instead
 of patterns such as `{}` that match any token in the sentence.
 </Infobox>
 ## Rule-based entity recognition {#entityruler new="2.1"}
-The [`EntityRuler`](/api/entityruler) is an exciting new component that lets you
+The [`EntityRuler`](/api/entityruler) is a component that lets you add named
-add named entities based on pattern dictionaries, and makes it easy to combine
+entities based on pattern dictionaries, which makes it easy to combine
 rule-based and statistical named entity recognition for even more powerful
 models.
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@ -153,6 +153,7 @@ add to your pipeline and customize for your use case:
 | [`Morphologizer`](/api/morphologizer)           | Trainable component to predict morphological features.                                                                                                                                                                  |
 | [`Lemmatizer`](/api/lemmatizer)                 | Standalone component for rule-based and lookup lemmatization.                                                                                                                                                           |
 | [`AttributeRuler`](/api/attributeruler)         | Component for setting token attributes using match patterns.                                                                                                                                                            |
 | [`DependencyMatcher`](/api/dependencymatcher)   | Component for matching subtrees within a dependency parse.                                                                                                                                                              |
 | [`Transformer`](/api/transformer)               | Component for using [transformer models](/usage/embeddings-transformers) in your pipeline, accessing outputs and aligning tokens. Provided via [`spacy-transformers`](https://github.com/explosion/spacy-transformers). |
 <Infobox title="Details & Documentation" emoji="📖" list>
@ -314,7 +315,8 @@ format for documenting argument and return types.
  [`Transformer`](/api/transformer), [`Lemmatizer`](/api/lemmatizer),
  [`Morphologizer`](/api/morphologizer),
  [`AttributeRuler`](/api/attributeruler),
-  [`SentenceRecognizer`](/api/sentencerecognizer), [`Pipe`](/api/pipe),
+  [`SentenceRecognizer`](/api/sentencerecognizer),
  [`DependencyMatcher`])(/api/dependencymatcher), [`Pipe`](/api/pipe),
  [`Corpus`](/api/corpus)
 </Infobox>