mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Better error handling
This commit is contained in:
parent
b03a46792c
commit
b4df202bfa
|
@ -2,7 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..language_data.punctuation import ALPHA_LOWER, LIST_ELLIPSES, QUOTES, ALPHA_UPPER, LIST_QUOTES, UNITS, \
|
from ..language_data.punctuation import ALPHA_LOWER, LIST_ELLIPSES, QUOTES, ALPHA_UPPER, LIST_QUOTES, UNITS, \
|
||||||
CURRENCY, LIST_PUNCT, ALPHA
|
CURRENCY, LIST_PUNCT, ALPHA, _QUOTES
|
||||||
|
|
||||||
CURRENCY_SYMBOLS = r"\$ ¢ £ € ¥ ฿"
|
CURRENCY_SYMBOLS = r"\$ ¢ £ € ¥ ฿"
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ TOKENIZER_INFIXES = (
|
||||||
r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA),
|
r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA),
|
||||||
r'(?<=[{a}])--(?=[{a}])'.format(a=ALPHA),
|
r'(?<=[{a}])--(?=[{a}])'.format(a=ALPHA),
|
||||||
r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
|
r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
|
||||||
r'(?<=[{a}])(({q})|[\)\]\(\[])(?=[\-{a}])'.format(a=ALPHA, q=QUOTES),
|
r'(?<=[{a}])([{q}\)\]\(\[])(?=[\-{a}])'.format(a=ALPHA, q=_QUOTES.replace("'", "").strip().replace(" ", "")),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
__all__ = ["TOKENIZER_PREFIXES", "TOKENIZER_SUFFIXES", "TOKENIZER_INFIXES"]
|
__all__ = ["TOKENIZER_PREFIXES", "TOKENIZER_SUFFIXES", "TOKENIZER_INFIXES"]
|
||||||
|
|
|
@ -63,7 +63,6 @@ EU.
|
||||||
F.
|
F.
|
||||||
Fla.
|
Fla.
|
||||||
Folyt.
|
Folyt.
|
||||||
Ford.
|
|
||||||
Fpk.
|
Fpk.
|
||||||
Főszerk.
|
Főszerk.
|
||||||
G.
|
G.
|
||||||
|
@ -184,7 +183,6 @@ Ty.
|
||||||
Tyr.
|
Tyr.
|
||||||
U.
|
U.
|
||||||
Ui.
|
Ui.
|
||||||
User.
|
|
||||||
Ut.
|
Ut.
|
||||||
V.
|
V.
|
||||||
VB.
|
VB.
|
||||||
|
@ -261,7 +259,6 @@ ea.
|
||||||
ed.
|
ed.
|
||||||
eff.
|
eff.
|
||||||
egyh.
|
egyh.
|
||||||
el.
|
|
||||||
ell.
|
ell.
|
||||||
elv.
|
elv.
|
||||||
elvt.
|
elvt.
|
||||||
|
@ -349,7 +346,7 @@ io.
|
||||||
ip.
|
ip.
|
||||||
ir.
|
ir.
|
||||||
irod.
|
irod.
|
||||||
is.
|
irod.
|
||||||
isk.
|
isk.
|
||||||
ism.
|
ism.
|
||||||
izr.
|
izr.
|
||||||
|
@ -604,7 +601,6 @@ zs.
|
||||||
ált.
|
ált.
|
||||||
ápr.
|
ápr.
|
||||||
ásv.
|
ásv.
|
||||||
át.
|
|
||||||
é.
|
é.
|
||||||
ék.
|
ék.
|
||||||
ény.
|
ény.
|
||||||
|
|
|
@ -36,6 +36,7 @@ HYPHEN_TESTS = [
|
||||||
('Lakik-e... van.', ['Lakik', '-e', '...', 'van', '.']),
|
('Lakik-e... van.', ['Lakik', '-e', '...', 'van', '.']),
|
||||||
('Lakik-e van?', ['Lakik', '-e', 'van', '?']),
|
('Lakik-e van?', ['Lakik', '-e', 'van', '?']),
|
||||||
('Lakik-elem van?', ['Lakik-elem', 'van', '?']),
|
('Lakik-elem van?', ['Lakik-elem', 'van', '?']),
|
||||||
|
('Az életbiztosításáról- egy.', ['Az', 'életbiztosításáról-', 'egy', '.']),
|
||||||
('Van lakik-elem.', ['Van', 'lakik-elem', '.']),
|
('Van lakik-elem.', ['Van', 'lakik-elem', '.']),
|
||||||
('A 7-es busz?', ['A', '7-es', 'busz', '?']),
|
('A 7-es busz?', ['A', '7-es', 'busz', '?']),
|
||||||
('A 7-es?', ['A', '7-es', '?']),
|
('A 7-es?', ['A', '7-es', '?']),
|
||||||
|
@ -218,7 +219,7 @@ QUOTE_TESTS = [
|
||||||
('"Ime, hat"-ban irja.', ['"', 'Ime', ',', 'hat', '"', '-ban', 'irja', '.']),
|
('"Ime, hat"-ban irja.', ['"', 'Ime', ',', 'hat', '"', '-ban', 'irja', '.']),
|
||||||
('Az "Ime, hat".', ['Az', '"', 'Ime', ',', 'hat', '"', '.']),
|
('Az "Ime, hat".', ['Az', '"', 'Ime', ',', 'hat', '"', '.']),
|
||||||
('Egy 24"-os monitor.', ['Egy', '24"-os', 'monitor', '.']),
|
('Egy 24"-os monitor.', ['Egy', '24"-os', 'monitor', '.']),
|
||||||
# ("A don't van.", ['A', "don't", 'van', '.'])
|
("A McDonald's van.", ['A', "McDonald's", 'van', '.'])
|
||||||
]
|
]
|
||||||
|
|
||||||
DOT_TESTS = [
|
DOT_TESTS = [
|
||||||
|
|
Loading…
Reference in New Issue
Block a user