spaCy/spacy/tests/regression/test_issue4002.py

# coding: utf8
from __future__ import unicode_literals

from spacy.matcher import PhraseMatcher
from spacy.tokens import Doc


def test_issue4002(en_vocab):
    """Test that the PhraseMatcher can match on overwritten NORM attributes.
    """
    matcher = PhraseMatcher(en_vocab, attr="NORM")
    pattern1 = Doc(en_vocab, words=["c", "d"])
    assert [t.norm_ for t in pattern1] == ["c", "d"]
    matcher.add("TEST", [pattern1])
    doc = Doc(en_vocab, words=["a", "b", "c", "d"])
    assert [t.norm_ for t in doc] == ["a", "b", "c", "d"]
    matches = matcher(doc)
    assert len(matches) == 1
    matcher = PhraseMatcher(en_vocab, attr="NORM")
    pattern2 = Doc(en_vocab, words=["1", "2"])
    pattern2[0].norm_ = "c"
    pattern2[1].norm_ = "d"
    assert [t.norm_ for t in pattern2] == ["c", "d"]
    matcher.add("TEST", [pattern2])
    matches = matcher(doc)
    assert len(matches) == 1
Add regression test for #4002 Test that the PhraseMatcher can match on overwritten NORM attributes. 2019-07-22 15:18:24 +03:00			`# coding: utf8`
			`from __future__ import unicode_literals`

			`from spacy.matcher import PhraseMatcher`
			`from spacy.tokens import Doc`


			`def test_issue4002(en_vocab):`
			`"""Test that the PhraseMatcher can match on overwritten NORM attributes.`
			`"""`
			`matcher = PhraseMatcher(en_vocab, attr="NORM")`
			`pattern1 = Doc(en_vocab, words=["c", "d"])`
			`assert [t.norm_ for t in pattern1] == ["c", "d"]`
Implement new API for {Phrase}Matcher.add (backwards-compatible) (#4522) * Implement new API for {Phrase}Matcher.add (backwards-compatible) * Update docs * Also update DependencyMatcher.add * Update internals * Rewrite tests to use new API * Add basic check for common mistake Raise error with suggestion if user likely passed in a pattern instead of a list of patterns * Fix typo [ci skip] 2019-10-25 23:21:08 +03:00			`matcher.add("TEST", [pattern1])`
Add regression test for #4002 Test that the PhraseMatcher can match on overwritten NORM attributes. 2019-07-22 15:18:24 +03:00			`doc = Doc(en_vocab, words=["a", "b", "c", "d"])`
			`assert [t.norm_ for t in doc] == ["a", "b", "c", "d"]`
			`matches = matcher(doc)`
			`assert len(matches) == 1`
			`matcher = PhraseMatcher(en_vocab, attr="NORM")`
			`pattern2 = Doc(en_vocab, words=["1", "2"])`
			`pattern2[0].norm_ = "c"`
			`pattern2[1].norm_ = "d"`
			`assert [t.norm_ for t in pattern2] == ["c", "d"]`
Implement new API for {Phrase}Matcher.add (backwards-compatible) (#4522) * Implement new API for {Phrase}Matcher.add (backwards-compatible) * Update docs * Also update DependencyMatcher.add * Update internals * Rewrite tests to use new API * Add basic check for common mistake Raise error with suggestion if user likely passed in a pattern instead of a list of patterns * Fix typo [ci skip] 2019-10-25 23:21:08 +03:00			`matcher.add("TEST", [pattern2])`
Add regression test for #4002 Test that the PhraseMatcher can match on overwritten NORM attributes. 2019-07-22 15:18:24 +03:00			`matches = matcher(doc)`
			`assert len(matches) == 1`