mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
* Add tokenizer rule to fix numeric range tokenization
This commit is contained in:
parent
3ba66f2dc7
commit
454c1996d0
|
@ -1,3 +1,4 @@
|
|||
\.\.\.
|
||||
(?<=[a-z])\.(?=[A-Z])
|
||||
(?<=[a-zA-Z])-(?=[a-zA-z])
|
||||
(?<=[0-9])-(?=[0-9])
|
||||
|
|
|
@ -7,6 +7,10 @@ def test_hyphen(en_tokenizer):
|
|||
assert len(tokens) == 3
|
||||
|
||||
|
||||
def test_numeric_range(en_tokenizer):
|
||||
tokens = en_tokenizer('0.1-13.5')
|
||||
assert len(tokens) == 3
|
||||
|
||||
def test_period(en_tokenizer):
|
||||
tokens = en_tokenizer('best.Known')
|
||||
assert len(tokens) == 3
|
||||
|
|
Loading…
Reference in New Issue
Block a user