mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 13:47:13 +03:00
951825532c
* Improved Dutch language resources and Dutch lemmatization * Fix conftest * Update punctuation.py * Auto-format * Format and fix tests * Remove unused test file * Re-add deleted test * removed redundant infix regex pattern for ','; note: brackets + simple hyphen remains * Cleaner lemmatization files
36 lines
761 B
Python
36 lines
761 B
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
PRONOUNS_IRREG = {
|
|
"'r": ('haar',),
|
|
"'rzelf": ('haarzelf',),
|
|
"'t": ('het',),
|
|
"d'r": ('haar',),
|
|
'da': ('dat',),
|
|
'dienen': ('die',),
|
|
'diens': ('die',),
|
|
'dies': ('die',),
|
|
'elkaars': ('elkaar',),
|
|
'elkanders': ('elkander',),
|
|
'ene': ('een',),
|
|
'enen': ('een',),
|
|
'fik': ('ik',),
|
|
'gaat': ('gaan',),
|
|
'gene': ('geen',),
|
|
'harer': ('haar',),
|
|
'ieders': ('ieder',),
|
|
'iemands': ('iemand',),
|
|
'ikke': ('ik',),
|
|
'mijnen': ('mijn',),
|
|
'oe': ('je',),
|
|
'onzer': ('ons',),
|
|
'wa': ('wat',),
|
|
'watte': ('wat',),
|
|
'wier': ('wie',),
|
|
'zijns': ('zijn',),
|
|
'zoietsken': ('zoietske',),
|
|
'zulks': ('zulk',),
|
|
'één': ('een',)
|
|
}
|