mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Remove time from German language data
This commit is contained in:
parent
e8ae588be9
commit
70b51ed7c8
|
@ -10,7 +10,6 @@ from ..util import update_exc
|
|||
from ..language_data import EMOTICONS
|
||||
from .language_data import ORTH_ONLY
|
||||
from .language_data import strings_to_exc
|
||||
from .language_data import get_time_exc
|
||||
|
||||
|
||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||
|
@ -23,7 +22,6 @@ STOP_WORDS = set(language_data.STOP_WORDS)
|
|||
|
||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(EMOTICONS))
|
||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(ORTH_ONLY))
|
||||
update_exc(TOKENIZER_EXCEPTIONS, get_time_exc(range(1, 24 + 1)))
|
||||
|
||||
|
||||
class German(Language):
|
||||
|
|
|
@ -9,17 +9,6 @@ def strings_to_exc(orths):
|
|||
return {orth: [{ORTH: orth}] for orth in orths}
|
||||
|
||||
|
||||
def get_time_exc(hours):
|
||||
exc = {}
|
||||
for hour in hours:
|
||||
# currently only supporting formats like "10h", not "10 Uhr"
|
||||
exc["%dh" % hour] = [
|
||||
{ORTH: hour},
|
||||
{ORTH: "h", LEMMA: "Uhr"}
|
||||
]
|
||||
return exc
|
||||
|
||||
|
||||
PRON_LEMMA = "-PRON-"
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user