mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Add test for #1488 (passes on v2.0.0a18?)
This commit is contained in:
parent
711278b667
commit
f0986df94b
26
spacy/tests/regression/test_issue1488.py
Normal file
26
spacy/tests/regression/test_issue1488.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import regex as re
|
||||
from ...lang.en import English
|
||||
from ...tokenizer import Tokenizer
|
||||
|
||||
|
||||
def test_issue1488():
|
||||
prefix_re = re.compile(r'''[\[\("']''')
|
||||
suffix_re = re.compile(r'''[\]\)"']''')
|
||||
infix_re = re.compile(r'''[-~\.]''')
|
||||
simple_url_re = re.compile(r'''^https?://''')
|
||||
|
||||
def my_tokenizer(nlp):
|
||||
return Tokenizer(nlp.vocab, {},
|
||||
prefix_search=prefix_re.search,
|
||||
suffix_search=suffix_re.search,
|
||||
infix_finditer=infix_re.finditer,
|
||||
token_match=simple_url_re.match)
|
||||
|
||||
nlp = English()
|
||||
nlp.tokenizer = my_tokenizer(nlp)
|
||||
doc = nlp("This is a test.")
|
||||
for token in doc:
|
||||
print(token.text)
|
Loading…
Reference in New Issue
Block a user