mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 13:47:13 +03:00
951825532c
* Improved Dutch language resources and Dutch lemmatization * Fix conftest * Update punctuation.py * Auto-format * Format and fix tests * Remove unused test file * Re-add deleted test * removed redundant infix regex pattern for ','; note: brackets + simple hyphen remains * Cleaner lemmatization files
70 lines
1.5 KiB
Python
70 lines
1.5 KiB
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
DETERMINERS_IRREG = {
|
|
"'r": ('haar',),
|
|
"'s": ('de',),
|
|
"'t": ('het',),
|
|
"'tgene": ('hetgeen',),
|
|
'alle': ('al',),
|
|
'allen': ('al',),
|
|
'aller': ('al',),
|
|
'beiden': ('beide',),
|
|
'beider': ('beide',),
|
|
"d'": ('het',),
|
|
"d'r": ('haar',),
|
|
'der': ('de',),
|
|
'des': ('de',),
|
|
'dezer': ('deze',),
|
|
'dienen': ('die',),
|
|
'dier': ('die',),
|
|
'elke': ('elk',),
|
|
'ene': ('een',),
|
|
'enen': ('een',),
|
|
'ener': ('een',),
|
|
'enige': ('enig',),
|
|
'enigen': ('enig',),
|
|
'er': ('haar',),
|
|
'gene': ('geen',),
|
|
'genen': ('geen',),
|
|
'hare': ('haar',),
|
|
'haren': ('haar',),
|
|
'harer': ('haar',),
|
|
'hunne': ('hun',),
|
|
'hunnen': ('hun',),
|
|
'jou': ('jouw',),
|
|
'jouwe': ('jouw',),
|
|
'julliejen': ('jullie',),
|
|
"m'n": ('mijn',),
|
|
'mee': ('meer',),
|
|
'meer': ('veel',),
|
|
'meerderen': ('meerdere',),
|
|
'meest': ('veel',),
|
|
'meesten': ('veel',),
|
|
'meet': ('veel',),
|
|
'menige': ('menig',),
|
|
'mij': ('mijn',),
|
|
'mijnen': ('mijn',),
|
|
'minder': ('weinig',),
|
|
'mindere': ('weinig',),
|
|
'minst': ('weinig',),
|
|
'minste': ('minst',),
|
|
'ne': ('een',),
|
|
'onze': ('ons',),
|
|
'onzent': ('ons',),
|
|
'onzer': ('ons',),
|
|
'ouw': ('uw',),
|
|
'sommige': ('sommig',),
|
|
'sommigen': ('sommig',),
|
|
'u': ('uw',),
|
|
'vaker': ('vaak',),
|
|
'vele': ('veel',),
|
|
'velen': ('veel',),
|
|
'welke': ('welk',),
|
|
'zijne': ('zijn',),
|
|
'zijnen': ('zijn',),
|
|
'zijns': ('zijn',),
|
|
'één': ('een',)
|
|
}
|