mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
[issue 805] Fix issue
This commit is contained in:
parent
35100c8bdd
commit
d25556bf80
|
@ -5,12 +5,14 @@ from .. import language_data as base
|
||||||
from ..language_data import update_exc, strings_to_exc
|
from ..language_data import update_exc, strings_to_exc
|
||||||
|
|
||||||
from .stop_words import STOP_WORDS
|
from .stop_words import STOP_WORDS
|
||||||
|
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY
|
||||||
|
|
||||||
|
|
||||||
STOP_WORDS = set(STOP_WORDS)
|
STOP_WORDS = set(STOP_WORDS)
|
||||||
|
|
||||||
|
TOKENIZER_EXCEPTIONS = dict(TOKENIZER_EXCEPTIONS)
|
||||||
TOKENIZER_EXCEPTIONS = strings_to_exc(base.EMOTICONS)
|
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(ORTH_ONLY))
|
||||||
|
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.EMOTICONS))
|
||||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS))
|
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -107,7 +107,6 @@ ORTH_ONLY = [
|
||||||
"p.g.a.",
|
"p.g.a.",
|
||||||
"ref.",
|
"ref.",
|
||||||
"resp.",
|
"resp.",
|
||||||
"s.",
|
|
||||||
"s.a.s.",
|
"s.a.s.",
|
||||||
"s.k.",
|
"s.k.",
|
||||||
"st.",
|
"st.",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user