2017-05-22 14:54:20 +03:00
|
|
|
# coding: utf-8
|
2017-06-05 03:26:13 +03:00
|
|
|
from __future__ import unicode_literals
|
2017-03-07 17:55:28 +03:00
|
|
|
import pytest
|
|
|
|
|
|
|
|
from ...matcher import Matcher
|
|
|
|
from ...vocab import Vocab
|
|
|
|
from ...attrs import LOWER
|
|
|
|
from ...tokens import Doc
|
|
|
|
|
|
|
|
|
2017-03-07 18:08:32 +03:00
|
|
|
def test_basic_case():
|
2017-05-22 14:54:20 +03:00
|
|
|
"""Test Matcher matches with '*' operator and Boolean flag"""
|
2017-03-07 18:08:32 +03:00
|
|
|
matcher = Matcher(Vocab(
|
|
|
|
lex_attr_getters={LOWER: lambda string: string.lower()}))
|
|
|
|
IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
|
2017-05-22 14:54:20 +03:00
|
|
|
matcher.add('FarAway', None, [{'LOWER': "bob"}, {'OP': '*', 'LOWER': 'and'}, {'LOWER': 'frank'}])
|
2017-03-07 18:08:32 +03:00
|
|
|
doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
|
|
|
|
match = matcher(doc)
|
|
|
|
assert len(match) == 1
|
2017-05-22 14:54:20 +03:00
|
|
|
ent_id, start, end = match[0]
|
2017-03-07 18:08:32 +03:00
|
|
|
assert start == 0
|
|
|
|
assert end == 4
|
|
|
|
|
2017-03-07 19:16:26 +03:00
|
|
|
|
2017-03-07 17:55:28 +03:00
|
|
|
def test_issue850():
|
2018-03-27 20:23:02 +03:00
|
|
|
"""The variable-length pattern matches the
|
|
|
|
succeeding token. Check we handle the ambiguity correctly."""
|
2017-03-07 18:08:32 +03:00
|
|
|
matcher = Matcher(Vocab(
|
|
|
|
lex_attr_getters={LOWER: lambda string: string.lower()}))
|
2017-03-07 17:55:28 +03:00
|
|
|
IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
|
2017-05-22 14:54:20 +03:00
|
|
|
matcher.add('FarAway', None, [{'LOWER': "bob"}, {'OP': '*', 'IS_ANY_TOKEN': True}, {'LOWER': 'frank'}])
|
2017-03-07 18:08:32 +03:00
|
|
|
doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
|
2017-03-07 17:55:28 +03:00
|
|
|
match = matcher(doc)
|
|
|
|
assert len(match) == 1
|
2017-05-22 14:54:20 +03:00
|
|
|
ent_id, start, end = match[0]
|
2017-03-07 17:55:28 +03:00
|
|
|
assert start == 0
|
|
|
|
assert end == 4
|