spaCy/spacy/lang/nl/lemmatizer/_determiners_irreg.py
Yves Peirsman 951825532c Improved Dutch language resources and Dutch lemmatization (#3409)
* Improved Dutch language resources and Dutch lemmatization

* Fix conftest

* Update punctuation.py

* Auto-format

* Format and fix tests

* Remove unused test file

* Re-add deleted test

* removed redundant infix regex pattern for ','; note: brackets + simple hyphen remains

* Cleaner lemmatization files
2019-04-03 14:13:26 +02:00

70 lines
1.5 KiB
Python

# coding: utf8
from __future__ import unicode_literals
DETERMINERS_IRREG = {
"'r": ('haar',),
"'s": ('de',),
"'t": ('het',),
"'tgene": ('hetgeen',),
'alle': ('al',),
'allen': ('al',),
'aller': ('al',),
'beiden': ('beide',),
'beider': ('beide',),
"d'": ('het',),
"d'r": ('haar',),
'der': ('de',),
'des': ('de',),
'dezer': ('deze',),
'dienen': ('die',),
'dier': ('die',),
'elke': ('elk',),
'ene': ('een',),
'enen': ('een',),
'ener': ('een',),
'enige': ('enig',),
'enigen': ('enig',),
'er': ('haar',),
'gene': ('geen',),
'genen': ('geen',),
'hare': ('haar',),
'haren': ('haar',),
'harer': ('haar',),
'hunne': ('hun',),
'hunnen': ('hun',),
'jou': ('jouw',),
'jouwe': ('jouw',),
'julliejen': ('jullie',),
"m'n": ('mijn',),
'mee': ('meer',),
'meer': ('veel',),
'meerderen': ('meerdere',),
'meest': ('veel',),
'meesten': ('veel',),
'meet': ('veel',),
'menige': ('menig',),
'mij': ('mijn',),
'mijnen': ('mijn',),
'minder': ('weinig',),
'mindere': ('weinig',),
'minst': ('weinig',),
'minste': ('minst',),
'ne': ('een',),
'onze': ('ons',),
'onzent': ('ons',),
'onzer': ('ons',),
'ouw': ('uw',),
'sommige': ('sommig',),
'sommigen': ('sommig',),
'u': ('uw',),
'vaker': ('vaak',),
'vele': ('veel',),
'velen': ('veel',),
'welke': ('welk',),
'zijne': ('zijn',),
'zijnen': ('zijn',),
'zijns': ('zijn',),
'één': ('een',)
}