mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-13 13:17:06 +03:00
Remove time from German language data
This commit is contained in:
parent
e8ae588be9
commit
70b51ed7c8
|
@ -10,7 +10,6 @@ from ..util import update_exc
|
||||||
from ..language_data import EMOTICONS
|
from ..language_data import EMOTICONS
|
||||||
from .language_data import ORTH_ONLY
|
from .language_data import ORTH_ONLY
|
||||||
from .language_data import strings_to_exc
|
from .language_data import strings_to_exc
|
||||||
from .language_data import get_time_exc
|
|
||||||
|
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||||
|
@ -23,7 +22,6 @@ STOP_WORDS = set(language_data.STOP_WORDS)
|
||||||
|
|
||||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(EMOTICONS))
|
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(EMOTICONS))
|
||||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(ORTH_ONLY))
|
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(ORTH_ONLY))
|
||||||
update_exc(TOKENIZER_EXCEPTIONS, get_time_exc(range(1, 24 + 1)))
|
|
||||||
|
|
||||||
|
|
||||||
class German(Language):
|
class German(Language):
|
||||||
|
|
|
@ -9,17 +9,6 @@ def strings_to_exc(orths):
|
||||||
return {orth: [{ORTH: orth}] for orth in orths}
|
return {orth: [{ORTH: orth}] for orth in orths}
|
||||||
|
|
||||||
|
|
||||||
def get_time_exc(hours):
|
|
||||||
exc = {}
|
|
||||||
for hour in hours:
|
|
||||||
# currently only supporting formats like "10h", not "10 Uhr"
|
|
||||||
exc["%dh" % hour] = [
|
|
||||||
{ORTH: hour},
|
|
||||||
{ORTH: "h", LEMMA: "Uhr"}
|
|
||||||
]
|
|
||||||
return exc
|
|
||||||
|
|
||||||
|
|
||||||
PRON_LEMMA = "-PRON-"
|
PRON_LEMMA = "-PRON-"
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user