diff --git a/spacy/matcher/dependencymatcher.pyx b/spacy/matcher/dependencymatcher.pyx index 74c2d002f..adf96702b 100644 --- a/spacy/matcher/dependencymatcher.pyx +++ b/spacy/matcher/dependencymatcher.pyx @@ -82,8 +82,12 @@ cdef class DependencyMatcher: "$-": self._imm_left_sib, "$++": self._right_sib, "$--": self._left_sib, + ">+": self._imm_right_child, + ">-": self._imm_left_child, ">++": self._right_child, ">--": self._left_child, + "<+": self._imm_right_parent, + "<-": self._imm_left_parent, "<++": self._right_parent, "<--": self._left_parent, } @@ -427,12 +431,34 @@ cdef class DependencyMatcher: def _left_sib(self, doc, node): return [doc[child.i] for child in doc[node].head.children if child.i < node] + def _imm_right_child(self, doc, node): + for child in doc[node].children: + if child.i == node + 1: + return [doc[child.i]] + return [] + + def _imm_left_child(self, doc, node): + for child in doc[node].children: + if child.i == node - 1: + return [doc[child.i]] + return [] + def _right_child(self, doc, node): return [doc[child.i] for child in doc[node].children if child.i > node] def _left_child(self, doc, node): return [doc[child.i] for child in doc[node].children if child.i < node] + def _imm_right_parent(self, doc, node): + if doc[node].head.i == node + 1: + return [doc[node].head] + return [] + + def _imm_left_parent(self, doc, node): + if doc[node].head.i == node - 1: + return [doc[node].head] + return [] + def _right_parent(self, doc, node): if doc[node].head.i > node: return [doc[node].head] diff --git a/spacy/tests/matcher/test_dependency_matcher.py b/spacy/tests/matcher/test_dependency_matcher.py index b4e19d69d..200384320 100644 --- a/spacy/tests/matcher/test_dependency_matcher.py +++ b/spacy/tests/matcher/test_dependency_matcher.py @@ -316,16 +316,32 @@ def test_dependency_matcher_precedence_ops(en_vocab, op, num_matches): ("the", "brown", "$--", 0), ("brown", "the", "$--", 1), ("brown", "brown", "$--", 0), + ("over", "jumped", "<+", 0), + ("quick", "fox", "<+", 0), + ("the", "quick", "<+", 0), + ("brown", "fox", "<+", 1), ("quick", "fox", "<++", 1), ("quick", "over", "<++", 0), ("over", "jumped", "<++", 0), ("the", "fox", "<++", 2), + ("brown", "fox", "<-", 0), + ("fox", "over", "<-", 0), + ("the", "over", "<-", 0), + ("over", "jumped", "<-", 1), ("brown", "fox", "<--", 0), ("fox", "jumped", "<--", 0), ("fox", "over", "<--", 1), + ("fox", "brown", ">+", 0), + ("over", "fox", ">+", 0), + ("over", "the", ">+", 0), + ("jumped", "over", ">+", 1), ("jumped", "over", ">++", 1), ("fox", "lazy", ">++", 0), ("over", "the", ">++", 0), + ("jumped", "over", ">-", 0), + ("fox", "quick", ">-", 0), + ("brown", "quick", ">-", 0), + ("fox", "brown", ">-", 1), ("brown", "fox", ">--", 0), ("fox", "brown", ">--", 1), ("jumped", "fox", ">--", 1), diff --git a/website/docs/api/dependencymatcher.mdx b/website/docs/api/dependencymatcher.mdx index 390034a6c..cad5185f7 100644 --- a/website/docs/api/dependencymatcher.mdx +++ b/website/docs/api/dependencymatcher.mdx @@ -82,8 +82,12 @@ come directly from | `A $- B` | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`. | | `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`. | | `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`. | +| `A >+ B` | `B` is a right immediate child of `A`, i.e. `A` is a parent of `B` and `A.i == B.i - 1` _(not in Semgrex)_. | +| `A >- B` | `B` is a left immediate child of `A`, i.e. `A` is a parent of `B` and `A.i == B.i + 1` _(not in Semgrex)_. | | `A >++ B` | `B` is a right child of `A`, i.e. `A` is a parent of `B` and `A.i < B.i` _(not in Semgrex)_. | | `A >-- B` | `B` is a left child of `A`, i.e. `A` is a parent of `B` and `A.i > B.i` _(not in Semgrex)_. | +| `A <+ B` | `B` is a right immediate parent of `A`, i.e. `A` is a child of `B` and `A.i == B.i - 1` _(not in Semgrex)_. | +| `A <- B` | `B` is a left immediate parent of `A`, i.e. `A` is a child of `B` and `A.i == B.i + 1` _(not in Semgrex)_. | | `A <++ B` | `B` is a right parent of `A`, i.e. `A` is a child of `B` and `A.i < B.i` _(not in Semgrex)_. | | `A <-- B` | `B` is a left parent of `A`, i.e. `A` is a child of `B` and `A.i > B.i` _(not in Semgrex)_. | diff --git a/website/docs/usage/rule-based-matching.mdx b/website/docs/usage/rule-based-matching.mdx index 628c2953f..6a11ac8bd 100644 --- a/website/docs/usage/rule-based-matching.mdx +++ b/website/docs/usage/rule-based-matching.mdx @@ -1110,6 +1110,14 @@ come directly from | `A $- B` | `B` is a left immediate sibling of `A`, i.e. `A` and `B` have the same parent and `A.i == B.i + 1`. | | `A $++ B` | `B` is a right sibling of `A`, i.e. `A` and `B` have the same parent and `A.i < B.i`. | | `A $-- B` | `B` is a left sibling of `A`, i.e. `A` and `B` have the same parent and `A.i > B.i`. | +| `A >+ B` | `B` is a right immediate child of `A`, i.e. `A` is a parent of `B` and `A.i == B.i - 1` _(not in Semgrex)_. | +| `A >- B` | `B` is a left immediate child of `A`, i.e. `A` is a parent of `B` and `A.i == B.i + 1` _(not in Semgrex)_. | +| `A >++ B` | `B` is a right child of `A`, i.e. `A` is a parent of `B` and `A.i < B.i` _(not in Semgrex)_. | +| `A >-- B` | `B` is a left child of `A`, i.e. `A` is a parent of `B` and `A.i > B.i` _(not in Semgrex)_. | +| `A <+ B` | `B` is a right immediate parent of `A`, i.e. `A` is a child of `B` and `A.i == B.i - 1` _(not in Semgrex)_. | +| `A <- B` | `B` is a left immediate parent of `A`, i.e. `A` is a child of `B` and `A.i == B.i + 1` _(not in Semgrex)_. | +| `A <++ B` | `B` is a right parent of `A`, i.e. `A` is a child of `B` and `A.i < B.i` _(not in Semgrex)_. | +| `A <-- B` | `B` is a left parent of `A`, i.e. `A` is a child of `B` and `A.i > B.i` _(not in Semgrex)_. | ### Designing dependency matcher patterns {id="dependencymatcher-patterns"}