mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-30 20:06:30 +03:00
35 lines
805 B
Python
35 lines
805 B
Python
from __future__ import unicode_literals
|
|
from ...attrs import *
|
|
from ...matcher import Matcher
|
|
from ...tokens import Doc
|
|
from ...en import English
|
|
|
|
def test_overlapping_matches():
|
|
vocab = English.Defaults.create_vocab()
|
|
doc = Doc(vocab, words=['n', '=', '1', ';', 'a', ':', '5', '%'])
|
|
|
|
matcher = Matcher(vocab)
|
|
matcher.add_entity(
|
|
"ab",
|
|
acceptor=None,
|
|
on_match=None
|
|
)
|
|
matcher.add_pattern(
|
|
'ab',
|
|
[
|
|
{IS_ALPHA: True},
|
|
{ORTH: ':'},
|
|
{LIKE_NUM: True},
|
|
{ORTH: '%'}
|
|
], label='a')
|
|
matcher.add_pattern(
|
|
'ab',
|
|
[
|
|
{IS_ALPHA: True},
|
|
{ORTH: '='},
|
|
{LIKE_NUM: True},
|
|
], label='b')
|
|
|
|
matches = matcher(doc)
|
|
assert len(matches) == 2
|