mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 20:28:20 +03:00
a54f0cfc2b
* Norwegian fix Add support for alternative past tense verb form (vaska). * Norwegian months Add all Norwegian months to tokenizer excpetions. * More Norwegian abbreviations Add more Norwegian abbreviations to tokenizer_exceptions. * Contributor agreement khellan Add signed contributor agreement for khellan (Knut O. Hellan).
33 lines
661 B
Python
33 lines
661 B
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
ADJECTIVE_RULES = [
|
|
["e", ""], # pene -> pen
|
|
["ere", ""], # penere -> pen
|
|
["est", ""], # penest -> pen
|
|
["este", ""], # peneste -> pen
|
|
]
|
|
|
|
|
|
NOUN_RULES = [
|
|
["en", "e"], # hansken -> hanske
|
|
["a", "e"], # veska -> veske
|
|
["et", ""], # dyret -> dyr
|
|
["er", "e"], # hasker -> hanske
|
|
["ene", "e"], # veskene -> veske
|
|
]
|
|
|
|
|
|
VERB_RULES = [
|
|
["er", "e"], # vasker -> vaske
|
|
["et", "e"], # vasket -> vaske
|
|
["a", "e"], # vaska -> vaske
|
|
["es", "e"], # vaskes -> vaske
|
|
["te", "e"], # stekte -> steke
|
|
["år", "å"], # får -> få
|
|
]
|
|
|
|
|
|
PUNCT_RULES = []
|