changed tag_map, morph_rules, lemmatizer for Norwegian

This commit is contained in:
Kaisa Korsak 2018-07-18 13:05:27 +02:00
parent 19a5ef1c58
commit dd07e180ea
15 changed files with 390355 additions and 20887 deletions

View File

@ -4,11 +4,11 @@ from __future__ import unicode_literals
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .stop_words import STOP_WORDS
from .morph_rules import MORPH_RULES
from .lemmatizer import LEMMA_EXC, LEMMA_INDEX, LOOKUP, LEMMA_RULES
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
from .lemmatizer import LOOKUP
from .tag_map import TAG_MAP
from .morph_rules import MORPH_RULES
from ...language import Language
from ...attrs import LANG, NORM
from ...util import update_exc, add_lookups
@ -26,8 +26,12 @@ class NorwegianDefaults(Language.Defaults):
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = STOP_WORDS
morph_rules = MORPH_RULES
tag_map = TAG_MAP
lemma_lookup = LOOKUP
lemma_exc = LEMMA_EXC
lemma_index = LEMMA_INDEX
lemma_rules = LEMMA_RULES
syntax_iterators = SYNTAX_ITERATORS

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
# coding: utf8
#structure copied from the English lemmatizer
from __future__ import unicode_literals
from .lookup import LOOKUP
from ._adjectives_wordforms import ADJECTIVES_WORDFORMS
from ._adverbs_wordforms import ADVERBS_WORDFORMS
from ._nouns_wordforms import NOUNS_WORDFORMS
from ._verbs_wordforms import VERBS_WORDFORMS
from ._lemma_rules import ADJECTIVE_RULES, NOUN_RULES, VERB_RULES, PUNCT_RULES
from ._verbs import VERBS
from ._nouns import NOUNS
from ._adjectives import ADJECTIVES
from ._adverbs import ADVERBS
LEMMA_INDEX = {'adj': ADJECTIVES, 'adv': ADVERBS, 'noun': NOUNS, 'verb': VERBS}
LEMMA_EXC = {'adj': ADJECTIVES_WORDFORMS, 'adv': ADVERBS_WORDFORMS, 'noun': NOUNS_WORDFORMS,
'verb': VERBS_WORDFORMS}
LEMMA_RULES = {'adj': ADJECTIVE_RULES, 'noun': NOUN_RULES, 'verb': VERB_RULES,
'punct': PUNCT_RULES}

View File

@ -0,0 +1,7 @@
# coding: utf8
from __future__ import unicode_literals
ADJECTIVES = set("""
""".split())

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,7 @@
# coding: utf8
from __future__ import unicode_literals
ADVERBS = set("""
""".split())

View File

@ -0,0 +1,805 @@
"""
All wordforms are extracted from Norsk Ordbank in Norwegian Bokmål 2005
(CLARINO NB - Språkbanken), Nasjonalbiblioteket, Norway:
https://www.nb.no/sprakbanken/show?serial=oai%3Anb.no%3Asbr-5&lang=en
License:
Creative_Commons-BY (CC-BY) (https://creativecommons.org/licenses/by/4.0/)
"""
# coding: utf8
from __future__ import unicode_literals
ADVERBS_WORDFORMS = {
'à jour': ('à jour',),
'à la carte': ('à la carte',),
'à la grecque': ('à la grecque',),
'à la mode': ('à la mode',),
'òg': ('òg',),
'a': ('a',),
'a cappella': ('a cappella',),
'a conto': ('a conto',),
'a konto': ('a konto',),
'a posteriori': ('a posteriori',),
'a prima vista': ('a prima vista',),
'a priori': ('a priori',),
'a tempo': ('a tempo',),
'a verbo': ('a verbo',),
'a viso': ('a viso',),
'a vista': ('a vista',),
'ad absurdum': ('ad absurdum',),
'ad acta': ('ad acta',),
'ad hoc': ('ad hoc',),
'ad infinitum': ('ad infinitum',),
'ad notam': ('ad notam',),
'ad undas': ('ad undas',),
'adagio': ('adagio',),
'akkurat': ('akkurat',),
'al fresco': ('al fresco',),
'al secco': ('al secco',),
'aldeles': ('aldeles',),
'alders tid': ('alders tid',),
'aldri': ('aldri',),
'aleine': ('aleine',),
'alene': ('alene',),
'alias': ('alias',),
'allegretto': ('allegretto',),
'allegro': ('allegro',),
'aller': ('aller',),
'allerede': ('allerede',),
'allikevel': ('allikevel',),
'alltid': ('alltid',),
'alltids': ('alltids',),
'alt': ('alt',),
'altfor': ('altfor',),
'altså': ('altså',),
'amok': ('amok',),
'an': ('an',),
'ana': ('ana',),
'andante': ('andante',),
'andantino': ('andantino',),
'andelsvis': ('andelsvis',),
'andfares': ('andfares',),
'andføttes': ('andføttes',),
'annetsteds': ('annetsteds',),
'annetstedsfra': ('annetstedsfra',),
'annetstedshen': ('annetstedshen',),
'anno': ('anno',),
'anslagsvis': ('anslagsvis',),
'anstendigvis': ('anstendigvis',),
'anstigende': ('anstigende',),
'antakeligvis': ('antakeligvis',),
'antydningsvis': ('antydningsvis',),
'apropos': ('apropos',),
'argende': ('argende',),
'at': ('at',),
'atter': ('atter',),
'attpåtil': ('attpåtil',),
'attåt': ('attåt',),
'au': ('au',),
'avdelingsvis': ('avdelingsvis',),
'avdragsvis': ('avdragsvis',),
'avhendes': ('avhendes',),
'avhends': ('avhends',),
'avsatsvis': ('avsatsvis',),
'bakk': ('bakk',),
'baklengs': ('baklengs',),
'bare': ('bare',),
'bataljonsvis': ('bataljonsvis',),
'bekende': ('bekende',),
'belgende': ('belgende',),
'betids': ('betids',),
'bi': ('bi',),
'bidevind': ('bidevind',),
'bis': ('bis',),
'bitevis': ('bitevis',),
'bitte': ('bitte',),
'bitterlig': ('bitterlig',),
'blanko': ('blanko',),
'blidelig': ('blidelig',),
'blikk': ('blikk',),
'blikkende': ('blikkende',),
'blottende': ('blottende',),
'bom': ('bom',),
'bommende': ('bommende',),
'bona fide': ('bona fide',),
'brennfort': ('brennfort',),
'brutto': ('brutto',),
'bråtevis': ('bråtevis',),
'bums': ('bums',),
'buntevis': ('buntevis',),
'buntvis': ('buntvis',),
'bus': ('bus',),
'cantabile': ('cantabile',),
'cf': ('cf',),
'cif': ('cif',),
'cirka': ('cirka',),
'crescendo': ('crescendo',),
'da': ('da',),
'dagevis': ('dagevis',),
'dagstøtt': ('dagstøtt',),
'dakapo': ('dakapo',),
'dam': ('dam',),
'dammende': ('dammende',),
'dann': ('dann',),
'de facto': ('de facto',),
'de jure': ('de jure',),
'decrescendo': ('decrescendo',),
'delkredere': ('delkredere',),
'dels': ('dels',),
'delvis': ('delvis',),
'derav': ('derav',),
'deretter': ('deretter',),
'derfor': ('derfor',),
'derimot': ('derimot',),
'dermed': ('dermed',),
'dernest': ('dernest',),
'dess': ('dess',),
'dessuten': ('dessuten',),
'dessverre': ('dessverre',),
'desto': ('desto',),
'diminuendo': ('diminuendo',),
'dis': ('dis',),
'dog': ('dog',),
'dolce': ('dolce',),
'dorgende': ('dorgende',),
'dryppende': ('dryppende',),
'drøssevis': ('drøssevis',),
'dus': ('dus',),
'dusinvis': ('dusinvis',),
'dyende': ('dyende',),
'døgnvis': ('døgnvis',),
'dønn': ('dønn',),
'dørg': ('dørg',),
'dørgende': ('dørgende',),
'dørimellom': ('dørimellom',),
'ei': ('ei',),
'eiende': ('eiende',),
'einkom': ('einkom',),
'eitrende': ('eitrende',),
'eks': ('eks',),
'eksempelvis': ('eksempelvis',),
'ekspress': ('ekspress',),
'ekstempore': ('ekstempore',),
'eldende': ('eldende',),
'eldende': ('eldende',),
'ellers': ('ellers',),
'en': ('en',),
'en bloc': ('en bloc',),
'en detail': ('en detail',),
'en face': ('en face',),
'en gros': ('en gros',),
'en masse': ('en masse',),
'en passant': ('en passant',),
'en profil': ('en profil',),
'en suite': ('en suite',),
'enda': ('enda',),
'endatil': ('endatil',),
'ende': ('ende',),
'ender': ('ender',),
'endog': ('endog',),
'ene': ('ene',),
'engang': ('engang',),
'enkeltvis': ('enkeltvis',),
'enkom': ('enkom',),
'enn': ('enn',),
'ennå': ('ennå',),
'eo ipso': ('eo ipso',),
'ergo': ('ergo',),
'et cetera': ('et cetera',),
'etappevis': ('etappevis',),
'etterhånden': ('etterhånden',),
'etterpå': ('etterpå',),
'etterskottsvis': ('etterskottsvis',),
'etterskuddsvis': ('etterskuddsvis',),
'ex animo': ('ex animo',),
'ex auditorio': ('ex auditorio',),
'ex cathedra': ('ex cathedra',),
'ex officio': ('ex officio',),
'fas': ('fas',),
'fatt': ('fatt',),
'fatt': ('fatt',),
'feil': ('feil',),
'femti-femti': ('femti-femti',),
'fifty-fifty': ('fifty-fifty',),
'flekkevis': ('flekkevis',),
'flokkevis': ('flokkevis',),
'fluks': ('fluks',),
'fluksens': ('fluksens',),
'flunkende': ('flunkende',),
'flust': ('flust',),
'fly': ('fly',),
'fob': ('fob',),
'for': ('for',),
'for lengst': ('for lengst',),
'for resten': ('for resten',),
'for så vidt': ('for så vidt',),
'for visst': ('for visst',),
'for øvrig': ('for øvrig',),
'fordevind': ('fordevind',),
'fordum': ('fordum',),
'fore': ('fore',),
'forhakkende': ('forhakkende',),
'forholdsvis': ('forholdsvis',),
'forhåpentlig': ('forhåpentlig',),
'forhåpentligvis': ('forhåpentligvis',),
'forlengs': ('forlengs',),
'formelig': ('formelig',),
'forresten': ('forresten',),
'forsøksvis': ('forsøksvis',),
'forte': ('forte',),
'fortfarende': ('fortfarende',),
'fortissimo': ('fortissimo',),
'fortrinnsvis': ('fortrinnsvis',),
'framleis': ('framleis',),
'framlengs': ('framlengs',),
'framstupes': ('framstupes',),
'framstups': ('framstups',),
'franko': ('franko',),
'free on board': ('free on board',),
'free on rail': ('free on rail',),
'fremdeles': ('fremdeles',),
'fremlengs': ('fremlengs',),
'fremstupes': ('fremstupes',),
'fremstups': ('fremstups',),
'furioso': ('furioso',),
'fylkesvis': ('fylkesvis',),
'følgelig': ('følgelig',),
'først': ('først',),
'ganske': ('ganske',),
'gid': ('gid',),
'givetvis': ('givetvis',),
'gjerne': ('gjerne',),
'gladelig': ('gladelig',),
'glimtvis': ('glimtvis',),
'glissando': ('glissando',),
'glugg': ('glugg',),
'gorr': ('gorr',),
'gorrende': ('gorrende',),
'gradvis': ('gradvis',),
'grandioso': ('grandioso',),
'granngivelig': ('granngivelig',),
'grassat': ('grassat',),
'grave': ('grave',),
'gruppevis': ('gruppevis',),
'gudskjelov': ('gudskjelov',),
'gullende': ('gullende',),
'gørr': ('gørr',),
'gørrende': ('gørrende',),
'hakk': ('hakk',),
'hakkende': ('hakkende',),
'halvveis': ('halvveis',),
'haugevis': ('haugevis',),
'heden': ('heden',),
'heiman': ('heiman',),
'heldigvis': ('heldigvis',),
'heller': ('heller',),
'helst': ('helst',),
'henholdsvis': ('henholdsvis',),
'herre': ('herre',),
'hersens': ('hersens',),
'himlende': ('himlende',),
'hodekulls': ('hodekulls',),
'hodestupes': ('hodestupes',),
'hodestups': ('hodestups',),
'hoggende': ('hoggende',),
'honoris causa': ('honoris causa',),
'hoppende': ('hoppende',),
'hulter': ('hulter',),
'hundretusenvis': ('hundretusenvis',),
'hundrevis': ('hundrevis',),
'hurra-meg-rundt': ('hurra-meg-rundt',),
'hvi': ('hvi',),
'hvor': ('hvor',),
'hvorav': ('hvorav',),
'hvordan': ('hvordan',),
'hvorfor': ('hvorfor',),
'hånt': ('hånt',),
'høylig': ('høylig',),
'høyst': ('høyst',),
'i alle fall': ('i alle fall',),
'i stedet': ('i stedet',),
'iallfall': ('iallfall',),
'ibidem': ('ibidem',),
'id est': ('id est',),
'igjen': ('igjen',),
'ikke': ('ikke',),
'ildende': ('ildende',),
'ildende': ('ildende',),
'imens': ('imens',),
'imidlertid': ('imidlertid',),
'in absentia': ('in absentia',),
'in absurdum': ('in absurdum',),
'in blanko': ('in blanko',),
'in casu': ('in casu',),
'in contumaciam': ('in contumaciam',),
'in corpore': ('in corpore',),
'in duplo': ('in duplo',),
'in extenso': ('in extenso',),
'in flagranti': ('in flagranti',),
'in honorem': ('in honorem',),
'in medias res': ('in medias res',),
'in memoriam': ('in memoriam',),
'in mente': ('in mente',),
'in natura': ('in natura',),
'in nuce': ('in nuce',),
'in persona': ('in persona',),
'in quarto': ('in quarto',),
'in saldo': ('in saldo',),
'in salvo': ('in salvo',),
'in situ': ('in situ',),
'in solidum': ('in solidum',),
'in spe': ('in spe',),
'in triplo': ('in triplo',),
'in vitro': ('in vitro',),
'in vivo': ('in vivo',),
'ingenlunde': ('ingenlunde',),
'ingensteds': ('ingensteds',),
'inkognito': ('inkognito',),
'innenat': ('innenat',),
'innledningsvis': ('innledningsvis',),
'innleiingsvis': ('innleiingsvis',),
'isteden': ('isteden',),
'især': ('især',),
'item': ('item',),
'ja menn': ('ja menn',),
'ja så menn': ('ja så menn',),
'jammen': ('jammen',),
'jamnlig': ('jamnlig',),
'jamsides': ('jamsides',),
'jamt over': ('jamt over',),
'jamvel': ('jamvel',),
'jaså': ('jaså',),
'jevnlig': ('jevnlig',),
'jevnsides': ('jevnsides',),
'jevnt over': ('jevnt over',),
'jo menn': ('jo menn',),
'jommen': ('jommen',),
'just': ('just',),
'kanon': ('kanon',),
'kanskje': ('kanskje',),
'kav': ('kav',),
'kavende': ('kavende',),
'kilovis': ('kilovis',),
'klin': ('klin',),
'klink': ('klink',),
'klinkende': ('klinkende',),
'klokelig': ('klokelig',),
'knakende': ('knakende',),
'knapt': ('knapt',),
'knasende': ('knasende',),
'knekkende': ('knekkende',),
'knøtrende': ('knøtrende',),
'knøttende': ('knøttende',),
'kolende': ('kolende',),
'kul': ('kul',),
'kuli': ('kuli',),
'kun': ('kun',),
'kvartalsvis': ('kvartalsvis',),
'kvekk': ('kvekk',),
'kølende': ('kølende',),
'lagerfritt': ('lagerfritt',),
'lagom': ('lagom',),
'lagvis': ('lagvis',),
'larghetto': ('larghetto',),
'largo': ('largo',),
'lassevis': ('lassevis',),
'legato': ('legato',),
'leilighetsvis': ('leilighetsvis',),
'lell': ('lell',),
'lenger': ('lenger',),
'liddelig': ('liddelig',),
'like': ('like',),
'likeledes': ('likeledes',),
'likeså': ('likeså',),
'likevel': ('likevel',),
'likså': ('likså',),
'lissom': ('lissom',),
'litervis': ('litervis',),
'livende': ('livende',),
'lovformelig': ('lovformelig',),
'lovlig': ('lovlig',),
'lukt': ('lukt',),
'lut': ('lut',),
'luta': ('luta',),
'lutende': ('lutende',),
'lykkeligvis': ('lykkeligvis',),
'lynfort': ('lynfort',),
'lys': ('lys',),
'maestoso': ('maestoso',),
'mala fide': ('mala fide',),
'malapropos': ('malapropos',),
'massevis': ('massevis',),
'med rette': ('med rette',),
'medio': ('medio',),
'medium': ('medium',),
'meget': ('meget',),
'mengdevis': ('mengdevis',),
'metervis': ('metervis',),
'mezzoforte': ('mezzoforte',),
'midsommers': ('midsommers',),
'midsommers': ('midsommers',),
'midt': ('midt',),
'midtsommers': ('midtsommers',),
'midtsommers': ('midtsommers',),
'midtvinters': ('midtvinters',),
'midvinters': ('midvinters',),
'milevis': ('milevis',),
'millionvis': ('millionvis',),
'min sann': ('min sann',),
'min sant': ('min sant',),
'min santen': ('min santen',),
'minus': ('minus',),
'mo': ('mo',),
'molto': ('molto',),
'motsols': ('motsols',),
'motstrøms': ('motstrøms',),
'mukk': ('mukk',),
'mukkende': ('mukkende',),
'muligens': ('muligens',),
'muligvis': ('muligvis',),
'murende': ('murende',),
'musende': ('musende',),
'mutters': ('mutters',),
'månedsvis': ('månedsvis',),
'naggende': ('naggende',),
'naturligvis': ('naturligvis',),
'nauende': ('nauende',),
'navnlig': ('navnlig',),
'neigu': ('neigu',),
'neimen': ('neimen',),
'nemlig': ('nemlig',),
'neppe': ('neppe',),
'nesegrus': ('nesegrus',),
'nest': ('nest',),
'nesten': ('nesten',),
'netto': ('netto',),
'nettopp': ('nettopp',),
'noenlunde': ('noenlunde',),
'noensinne': ('noensinne',),
'noensteds': ('noensteds',),
'nok': ('nok',),
'nok': ('nok',),
'noksom': ('noksom',),
'nokså': ('nokså',),
'non stop': ('non stop',),
'nonstop': ('nonstop',),
'notabene': ('notabene',),
'nu': ('nu',),
'nylig': ('nylig',),
'nyss': ('nyss',),
'': ('',),
'når': ('når',),
'nåvel': ('nåvel',),
'nære': ('nære',),
'nærere': ('nærere',),
'nærest': ('nærest',),
'nærmere': ('nærmere',),
'nærmest': ('nærmest',),
'nødvendigvis': ('nødvendigvis',),
'offside': ('offside',),
'også': ('også',),
'om att': ('om att',),
'om igjen': ('om igjen',),
'omme': ('omme',),
'omsider': ('omsider',),
'omsonst': ('omsonst',),
'omtrent': ('omtrent',),
'onnimellom': ('onnimellom',),
'opp att': ('opp att',),
'opp ned': ('opp ned',),
'oppad': ('oppad',),
'oppstrøms': ('oppstrøms',),
'oven': ('oven',),
'overalt': ('overalt',),
'overens': ('overens',),
'overhodet': ('overhodet',),
'overlag': ('overlag',),
'overmorgen': ('overmorgen',),
'overmåte': ('overmåte',),
'overvettes': ('overvettes',),
'pakkende': ('pakkende',),
'pal': ('pal',),
'par avion': ('par avion',),
'par excellence': ('par excellence',),
'parlando': ('parlando',),
'pars pro toto': ('pars pro toto',),
'partout': ('partout',),
'parvis': ('parvis',),
'per capita': ('per capita',),
'peu à peu': ('peu à peu',),
'peu om peu': ('peu om peu',),
'pianissimo': ('pianissimo',),
'piano': ('piano',),
'pinende': ('pinende',),
'pinnende': ('pinnende',),
'pist': ('pist',),
'pizzicato': ('pizzicato',),
'pladask': ('pladask',),
'plent': ('plent',),
'plenty': ('plenty',),
'pluss': ('pluss',),
'porsjonsvis': ('porsjonsvis',),
'portamento': ('portamento',),
'portato': ('portato',),
'post festum': ('post festum',),
'post meridiem': ('post meridiem',),
'post mortem': ('post mortem',),
'prestissimo': ('prestissimo',),
'presto': ('presto',),
'prima vista': ('prima vista',),
'primo': ('primo',),
'pro anno': ('pro anno',),
'pro persona': ('pro persona',),
'pro tempore': ('pro tempore',),
'proforma': ('proforma',),
'prompt': ('prompt',),
'prompte': ('prompte',),
'proppende': ('proppende',),
'prosentvis': ('prosentvis',),
'pukka': ('pukka',),
'puljevis': ('puljevis',),
'punktvis': ('punktvis',),
'pyton': ('pyton',),
'pø om pø': ('pø om pø',),
'quantum satis': ('quantum satis',),
'rammende': ('rammende',),
'rangsøles': ('rangsøles',),
'rasende': ('rasende',),
'ratevis': ('ratevis',),
'ratt': ('ratt',),
'rav': ('rav',),
'ravende': ('ravende',),
'reint': ('reint',),
'rent': ('rent',),
'respektive': ('respektive',),
'rettsøles': ('rettsøles',),
'reverenter': ('reverenter',),
'riktig nok': ('riktig nok',),
'riktignok': ('riktignok',),
'rimeligvis': ('rimeligvis',),
'ringside': ('ringside',),
'rispende': ('rispende',),
'ritardando': ('ritardando',),
'riv': ('riv',),
'rubato': ('rubato',),
'ruskende': ('ruskende',),
'rykkevis': ('rykkevis',),
'saktelig': ('saktelig',),
'saktens': ('saktens',),
'sammen': ('sammen',),
'samstundes': ('samstundes',),
'samt': ('samt',),
'sann': ('sann',),
'sannelig': ('sannelig',),
'sannsynligvis': ('sannsynligvis',),
'sans phrase': ('sans phrase',),
'scilicet': ('scilicet',),
'seinhøstes': ('seinhøstes',),
'senhøstes': ('senhøstes',),
'sia': ('sia',),
'sic': ('sic',),
'sidelengs': ('sidelengs',),
'siden': ('siden',),
'sideveges': ('sideveges',),
'sidevegs': ('sidevegs',),
'sideveis': ('sideveis',),
'sikkerlig': ('sikkerlig',),
'silde': ('silde',),
'simpelthen': ('simpelthen',),
'sine anno': ('sine anno',),
'sjelden': ('sjelden',),
'sjøleies': ('sjøleies',),
'sjøleis': ('sjøleis',),
'sjøverts': ('sjøverts',),
'skeis': ('skeis',),
'skiftevis': ('skiftevis',),
'skita': ('skita',),
'skjøns': ('skjøns',),
'skogleies': ('skogleies',),
'skokkevis': ('skokkevis',),
'skrevs': ('skrevs',),
'skrittvis': ('skrittvis',),
'skrås': ('skrås',),
'skyllende': ('skyllende',),
'skåldende': ('skåldende',),
'slettes': ('slettes',),
'sluttelig': ('sluttelig',),
'smekk': ('smekk',),
'smellende': ('smellende',),
'småningom': ('småningom',),
'sneisevis': ('sneisevis',),
'snesevis': ('snesevis',),
'snuft': ('snuft',),
'snupt': ('snupt',),
'snyt': ('snyt',),
'snyta': ('snyta',),
'snyte': ('snyte',),
'solo': ('solo',),
'sommerstid': ('sommerstid',),
'spenna': ('spenna',),
'spent': ('spent',),
'spika': ('spika',),
'spikende': ('spikende',),
'spildrende': ('spildrende',),
'spill': ('spill',),
'splinter': ('splinter',),
'splitter': ('splitter',),
'sporenstreks': ('sporenstreks',),
'sprangvis': ('sprangvis',),
'sprell': ('sprell',),
'sprut': ('sprut',),
'sprutende': ('sprutende',),
'sprøyte': ('sprøyte',),
'stakkato': ('stakkato',),
'stapp': ('stapp',),
'stappa': ('stappa',),
'stappende': ('stappende',),
'staurende': ('staurende',),
'stedvis': ('stedvis',),
'steika': ('steika',),
'stein': ('stein',),
'steinsens': ('steinsens',),
'stokk': ('stokk',),
'stokkende': ('stokkende',),
'straks': ('straks',),
'stringendo': ('stringendo',),
'stummende': ('stummende',),
'stundimellom': ('stundimellom',),
'stundom': ('stundom',),
'stundomtil': ('stundomtil',),
'stupende': ('stupende',),
'styggelig': ('styggelig',),
'styggende': ('styggende',),
'stykkevis': ('stykkevis',),
'støtt': ('støtt',),
'støtvis': ('støtvis',),
'støytvis': ('støytvis',),
'sub rosa': ('sub rosa',),
'summa summarum': ('summa summarum',),
'surr': ('surr',),
'svinaktig': ('svinaktig',),
'sydøst': ('sydøst',),
'synderlig': ('synderlig',),
'': ('',),
'så pass': ('så pass',),
'sågar': ('sågar',),
'således': ('således',),
'såleis': ('såleis',),
'såpass': ('såpass',),
'såre': ('såre',),
'særdeles': ('særdeles',),
'særs': ('særs',),
'søkk': ('søkk',),
'søkkende': ('søkkende',),
'sønder': ('sønder',),
'takimellom': ('takimellom',),
'takomtil': ('takomtil',),
'temmelig': ('temmelig',),
'ti': ('ti',),
'tidligdags': ('tidligdags',),
'tidsnok': ('tidsnok',),
'tidvis': ('tidvis',),
'tilfeldigvis': ('tilfeldigvis',),
'tilmed': ('tilmed',),
'tilnærmelsesvis': ('tilnærmelsesvis',),
'timevis': ('timevis',),
'tjokkende': ('tjokkende',),
'tomreipes': ('tomreipes',),
'tott': ('tott',),
'trill': ('trill',),
'trillende': ('trillende',),
'trinnvis': ('trinnvis',),
'troppevis': ('troppevis',),
'troppo': ('troppo',),
'troppsvis': ('troppsvis',),
'trutt': ('trutt',),
'turevis': ('turevis',),
'turvis': ('turvis',),
'tusenfold': ('tusenfold',),
'tusenvis': ('tusenvis',),
'tvers': ('tvers',),
'tvert': ('tvert',),
'tydeligvis': ('tydeligvis',),
'tynnevis': ('tynnevis',),
'tynnevis': ('tynnevis',),
'tålig': ('tålig',),
'tønnevis': ('tønnevis',),
'tønnevis': ('tønnevis',),
'ufravendt': ('ufravendt',),
'ugjerne': ('ugjerne',),
'uheldigvis': ('uheldigvis',),
'ukevis': ('ukevis',),
'ukevis': ('ukevis',),
'ulykkeligvis': ('ulykkeligvis',),
'uløyves': ('uløyves',),
'underhånden': ('underhånden',),
'undertiden': ('undertiden',),
'unntakelsesvis': ('unntakelsesvis',),
'unntaksvis': ('unntaksvis',),
'ustyggelig': ('ustyggelig',),
'utaboks': ('utaboks',),
'utbygdes': ('utbygdes',),
'utdragsvis': ('utdragsvis',),
'utelukkende': ('utelukkende',),
'utenat': ('utenat',),
'utenboks': ('utenboks',),
'uvegerlig': ('uvegerlig',),
'uviselig': ('uviselig',),
'uvislig': ('uvislig',),
'va banque': ('va banque',),
'vanligvis': ('vanligvis',),
'vann': ('vann',),
'vekevis': ('vekevis',),
'vekevis': ('vekevis',),
'vekselvis': ('vekselvis',),
'vel': ('vel',),
'vibrato': ('vibrato',),
'vice versa': ('vice versa',),
'vide': ('vide',),
'viden': ('viden',),
'vinterstid': ('vinterstid',),
'viselig': ('viselig',),
'visselig': ('visselig',),
'visst': ('visst',),
'visst nok': ('visst nok',),
'visstnok': ('visstnok',),
'vivace': ('vivace',),
'vonlig': ('vonlig',),
'vonom': ('vonom',),
'vonoms': ('vonoms',),
'vrangsøles': ('vrangsøles',),
'ytterlig': ('ytterlig',),
'åkkesom': ('åkkesom',),
'årevis': ('årevis',),
'årlig års': ('årlig års',),
'åssen': ('åssen',),
'ørende': ('ørende',),
'øyensynlig': ('øyensynlig',),
'antageligvis': ('antageligvis',),
'coolly': ('coolly',),
'kor': ('kor',),
'korfor': ('korfor',),
'kor': ('kor',),
'korfor': ('korfor',),
'medels': ('medels',),
'nasegrus': ('nasegrus',),
'overimorgen': ('overimorgen',),
'unntagelsesvis': ('unntagelsesvis',),
'åffer': ('åffer',),
'åffer': ('åffer',),
'sist': ('sist',),
'seinhaustes': ('seinhaustes',),
'stetse': ('stetse',),
'stikk': ('stikk',),
'storlig': ('storlig',),
'A': ('A',),
'for': ('for',),
'benveges': ('benveges',),
'bunkevis': ('bunkevis',),
'selv': ('selv',),
'sjøl': ('sjøl',),
'skauleies': ('skauleies',),
'da capo': ('da capo',),
'beint frem': ('beint frem',),
'beintfrem': ('beintfrem',),
'beinveges': ('beinveges',),
'beinvegs': ('beinvegs',),
'beinveis': ('beinveis',),
'benvegs': ('benvegs',),
'benveis': ('benveis',),
'en garde': ('en garde',),
'framåt': ('framåt',),
'krittende': ('krittende',),
'kvivitt': ('kvivitt',),
'maksis': ('maksis',),
'mangesteds': ('mangesteds',),
'møkka': ('møkka',),
'pill': ('pill',),
'sellende': ('sellende',),
'sirka': ('sirka',),
'subito': ('subito',),
'til sammen': ('til sammen',),
'tomrepes': ('tomrepes',),
'medurs': ('medurs',),
'moturs': ('moturs',)
}

View File

@ -0,0 +1,31 @@
# coding: utf8
from __future__ import unicode_literals
ADJECTIVE_RULES = [
["e", ""], #pene -> pen
["ere", ""], #penere -> pen
["est", ""], #penest -> pen
["este", ""] #peneste -> pen
]
NOUN_RULES = [
["en", "e"], #hansken -> hanske
["a", "e"], #veska -> veske
["et", ""], #dyret -> dyr
["er", "e"], #hasker -> hanske
["ene", "e"] #veskene -> veske
]
VERB_RULES = [
["er", "e"], #vasker -> vaske
["et", "e"], #vasket -> vaske
["es", "e"], #vaskes -> vaske
["te", "e"], #stekte -> steke
["år", "å"] #får -> få
]
PUNCT_RULES = []

View File

@ -0,0 +1,7 @@
# coding: utf8
from __future__ import unicode_literals
NOUNS = set("""
""".split())

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,7 @@
# coding: utf8
from __future__ import unicode_literals
VERBS = set("""
""".split())

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -3,64 +3,328 @@ from __future__ import unicode_literals
from ...symbols import LEMMA, PRON_LEMMA
# Used the table of pronouns at https://no.wiktionary.org/wiki/Tillegg:Pronomen_i_norsk
"""
This dict includes all the PRON and DET tag combinations found in the
dataset developed by Schibsted, Nasjonalbiblioteket and LTG (to be published
autumn 2018) and the rarely used polite form.
"""
MORPH_RULES = {
"PRP": {
"jeg": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Nom"},
"meg": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Acc"},
"PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=1|PronType=Prs": {
"jeg": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Nom"}
},
"PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=2|PronType=Prs": {
"du": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Sing", "Case": "Nom"},
"deg": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Sing", "Case": "Acc"},
"han": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Nom"},
"ham": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Acc"},
"han": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Acc"},
"hun": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Nom"},
"henne": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Acc"},
"den": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut"},
#polite form, not sure about the tag
"De": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Sing", "Case": "Nom", "Polite": "Form"}
},
"PRON__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
"hun": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Nom"}
},
"PRON__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
"han": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Nom"}
},
"PRON__Gender=Neut|Number=Sing|Person=3|PronType=Prs": {
"det": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut"},
"seg": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Reflex": "Yes"},
"vi": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Nom"},
"oss": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Acc"},
"dere": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Plur", "Case": "Nom"},
"de": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Nom"},
"dem": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc"},
"seg": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Plur", "Reflex": "Yes"},
"min": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender": "Masc"},
"mi": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
"mitt": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender": "Neu"},
"mine": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Poss": "Yes"},
"din": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Sing", "Poss": "Yes", "Gender": "Masc"},
"di": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
"ditt": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Sing", "Poss": "Yes", "Gender": "Neu"},
"dine": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Plur", "Poss": "Yes"},
"hans": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Masc"},
"hennes": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
"dens": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Neu"},
"dets": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Neu"},
"vår": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Poss": "Yes"},
"vårt": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Poss": "Yes"},
"våre": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Poss": "Yes", "Gender":"Neu"},
"deres": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Poss": "Yes", "Gender":"Neu", "Reflex":"Yes"},
"sin": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender":"Masc", "Reflex":"Yes"},
"si": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender":"Fem", "Reflex":"Yes"},
"sitt": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender":"Neu", "Reflex":"Yes"},
"sine": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Poss": "Yes", "Reflex":"Yes"},
"alt": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut"},
"intet": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut"}
},
"VBZ": {
"er": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
"er": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"er": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
"PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs": {
"den": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": ("Fem", "Masc")}
},
"VBP": {
"er": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
"PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=1|PronType=Prs": {
"vi": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Nom"}
},
"VBD": {
"var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
"vært": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
"PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=2|PronType=Prs": {
"dere": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Plur", "Case": "Nom"}
},
"PRON__Case=Nom|Number=Plur|Person=3|PronType=Prs": {
"de": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Nom"}
},
"PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=1|PronType=Prs": {
"meg": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Acc"}
},
"PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=2|PronType=Prs": {
"deg": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Sing", "Case": "Acc"},
#polite form, not sure about the tag
"Dem": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Sing", "Case": "Acc", "Polite": "Form"}
},
"PRON__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
"henne": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Acc"}
},
"PRON__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
"ham": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Acc"},
"han": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Acc"}
},
"PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=1|PronType=Prs": {
"oss": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Acc"}
},
"PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=2|PronType=Prs": {
"dere": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Plur", "Case": "Acc"}
},
"PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs": {
"dem": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc"}
},
"PRON__Case=Acc|Reflex=Yes": {
"seg": {LEMMA: PRON_LEMMA, "Person": "Three", "Number": "Sing", "Reflex": "Yes"},
"seg": {LEMMA: PRON_LEMMA, "Person": "Three", "Number": "Plur", "Reflex": "Yes"}
},
"PRON__Animacy=Anim|Case=Nom|Number=Sing|PronType=Prs": {
"man": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Case": "Nom"}
},
"DET__Gender=Masc|Number=Sing|Poss=Yes": {
"min": {LEMMA: "min", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender": "Masc"},
"din": {LEMMA: "din", "Person": "Two", "Number": "Sing", "Poss": "Yes", "Gender": "Masc"},
"hennes": {LEMMA: "hennes", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Masc"},
"hans": {LEMMA: "hans", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Masc"},
"sin": {LEMMA: "sin", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Masc", "Reflex":"Yes"},
"vår": {LEMMA: "vår", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender": "Masc"},
"deres": {LEMMA: "deres", "Person": "Two", "Number": "Sing", "Poss": "Yes", "Gender":"Masc"},
"deres": {LEMMA: "deres", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender":"Masc"},
#polite form, not sure about the tag
"Deres": {LEMMA: "Deres", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender":"Masc", "Polite": "Form"}
},
"DET__Gender=Fem|Number=Sing|Poss=Yes": {
"mi": {LEMMA: "min", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
"di": {LEMMA: "din", "Person": "Two", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
"hennes": {LEMMA: "hennes", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
"hans": {LEMMA: "hans", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
"si": {LEMMA: "sin", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Fem", "Reflex":"Yes"},
"vår": {LEMMA: "vår", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
"deres": {LEMMA: "deres", "Person": "Two", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
"deres": {LEMMA: "deres", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Fem"},
#polite form, not sure about the tag
"Deres": {LEMMA: "Deres", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender":"Fem", "Polite": "Form"}
},
"DET__Gender=Neut|Number=Sing|Poss=Yes": {
"mitt": {LEMMA: "min", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender": "Neut"},
"ditt": {LEMMA: "din", "Person": "Two", "Number": "Sing", "Poss": "Yes", "Gender": "Neut"},
"hennes": {LEMMA: "hennes", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Neut"},
"hans": {LEMMA: "hans", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Neut"},
"sitt": {LEMMA: "sin", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Neut", "Reflex":"Yes"},
"vårt": {LEMMA: "vår", "Person": "One", "Number": "Sing", "Poss": "Yes", "Gender": "Neut"},
"deres": {LEMMA: "deres", "Person": "Two", "Number": "Sing", "Poss": "Yes", "Gender": "Neut"},
"deres": {LEMMA: "deres", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender": "Neut"},
#polite form, not sure about the tag
"Deres": {LEMMA: "Deres", "Person": "Three", "Number": "Sing", "Poss": "Yes", "Gender":"Neut", "Polite": "Form"}
},
"DET__Number=Plur|Poss=Yes": {
"mine": {LEMMA: "min", "Person": "One", "Number": "Plur", "Poss": "Yes"},
"dine": {LEMMA: "din", "Person": "Two", "Number": "Plur", "Poss": "Yes"},
"hennes": {LEMMA: "hennes", "Person": "Three", "Number": "Plur", "Poss": "Yes"},
"hans": {LEMMA: "hans", "Person": "Three", "Number": "Plur", "Poss": "Yes"},
"sine": {LEMMA: "sin", "Person": "Three", "Number": "Plur", "Poss": "Yes", "Reflex":"Yes"},
"våre": {LEMMA: "vår", "Person": "One", "Number": "Plur", "Poss": "Yes"},
"deres": {LEMMA: "deres", "Person": "Two", "Number": "Plur", "Poss": "Yes"},
"deres": {LEMMA: "deres", "Person": "Three", "Number": "Plur", "Poss": "Yes"}
},
"PRON__Animacy=Anim|Number=Plur|PronType=Rcp": {
"hverandre": {LEMMA: PRON_LEMMA, "PronType": "Rcp", "Number": "Plur"}
},
"DET__Number=Plur|Poss=Yes|PronType=Rcp": {
"hverandres": {LEMMA: "hverandres", "PronType": "Rcp", "Number": "Plur", "Poss": "Yes"}
},
"PRON___": {
"som": {LEMMA: PRON_LEMMA},
"ikkenoe": {LEMMA: PRON_LEMMA}
},
"PRON__PronType=Int": {
"hva": {LEMMA: PRON_LEMMA, "PronType": "Int"}
},
"PRON__Animacy=Anim|PronType=Int": {
"hvem": {LEMMA: PRON_LEMMA, "PronType": "Int"}
},
"PRON__Animacy=Anim|Poss=Yes|PronType=Int": {
"hvis": {LEMMA:PRON_LEMMA, "PronType": "Int", "Poss": "Yes"}
},
"PRON__Number=Plur|Person=3|PronType=Prs": {
"noen": {LEMMA:PRON_LEMMA, "PronType": "Prs", "Number": "Plur", "Person": "Three"}
},
"PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs": {
"noen": {LEMMA:PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Person": "Three", "Gender": ("Fem", "Masc")},
"den": {LEMMA:PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Person": "Three", "Gender": ("Fem", "Masc")}
},
"PRON__Gender=Neut|Number=Sing|Person=3|PronType=Prs": {
"noe": {LEMMA:PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Person": "Three", "Gender": "Neut"},
"det": {LEMMA:PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Person": "Three", "Gender": "Neut"}
},
"PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs": {
"ingen": {LEMMA:PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Person": "Three", "Gender": ("Fem", "Masc"), "Polarity": "Neg"}
},
"PRON__Number=Plur|Person=3|PronType=Prs": {
"ingen": {LEMMA:PRON_LEMMA, "PronType":"Prs", "Number": "Plur", "Person": "Three"}
},
"PRON__Number=Sing": {
"ingenting": {LEMMA:PRON_LEMMA, "Number": "Sing"}
},
"PRON__Number=Plur|Person=3|PronType=Prs": {
"alle": {LEMMA:PRON_LEMMA, "PronType": "Prs", "Number": "Plur", "Person": "Three"}
},
"PRON__Animacy=Anim|Number=Sing|PronType=Prs": {
"en": {LEMMA:PRON_LEMMA, "PronType": "Prs", "Number": "Sing"}
},
"PRON__Animacy=Anim|Case=Gen,Nom|Number=Sing|PronType=Prs": {
"ens": {LEMMA:PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Case": ("Gen", "Nom")}
},
"PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs": {
"ens": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Case": "Gen"}
},
"DET__Case=Gen|Gender=Masc|Number=Sing": {
"ens": {LEMMA: "en", "Number": "Sing", "Case": "Gen"}
},
"DET__Gender=Masc|Number=Sing": {
"enhver": {LEMMA: "enhver", "Number": "Sing", "Gender": "Masc"},
"all": {LEMMA: "all", "Number": "Sing", "Gender": "Masc"},
"hver": {LEMMA: "hver", "Number": "Sing", "Gender": "Masc"}
},
"DET__Gender=Fem|Number=Sing": {
"enhver": {LEMMA: "enhver", "Number": "Sing", "Gender": "Fem"},
"all": {LEMMA: "all", "Number": "Sing", "Gender": "Fem"},
"hver": {LEMMA: "hver", "Number": "Sing", "Gender": "Fem"}
},
"DET__Gender=Neut|Number=Sing": {
"ethvert": {LEMMA: "enhver", "Number": "Sing", "Gender": "Neut"},
"alt": {LEMMA: "all", "Number": "Sing", "Gender": "Neut"},
"hvert": {LEMMA: "hver", "Number": "Sing", "Gender": "Neut"},
},
"DET__Gender=Masc|Number=Sing": {
"noen": {LEMMA: "noen", "Gender": "Masc", "Number": "Sing"},
"noe": {LEMMA: "noen", "Gender": "Masc", "Number": "Sing"}
},
"DET__Gender=Fem|Number=Sing": {
"noen": {LEMMA: "noen", "Gender": "Fem", "Number": "Sing"},
"noe": {LEMMA: "noen", "Gender": "Fem", "Number": "Sing"}
},
"DET__Gender=Neut|Number=Sing": {
"noe": {LEMMA: "noen", "Number": "Sing", "Gender": "Neut"}
},
"DET__Number=Plur": {
"noen": {LEMMA: "noen", "Number": "Plur"}
},
"DET__Gender=Neut|Number=Sing": {
"intet": {LEMMA: "ingen", "Gender": "Neut", "Number": "Sing"}
},
"DET__Gender=Masc|Number=Sing": {
"en": {LEMMA: "en", "Number": "Sing", "Gender": "Neut"}
},
"DET__Gender=Fem|Number=Sing": {
"ei": {LEMMA: "en", "Number": "Sing", "Gender": "Fem"}
},
"DET__Gender=Neut|Number=Sing": {
"et": {LEMMA: "en", "Number": "Sing", "Gender": "Neut"}
},
"DET__Gender=Neut|Number=Sing|PronType=Int": {
"hvilket": {LEMMA: "hvilken", "PronType": "Int", "Number": "Sing", "Gender": "Neut"}
},
"DET__Gender=Fem|Number=Sing|PronType=Int": {
"hvilken": {LEMMA: "hvilken", "PronType": "Int", "Number": "Sing", "Gender": "Fem"}
},
"DET__Gender=Masc|Number=Sing|PronType=Int": {
"hvilken": {LEMMA: "hvilken", "PronType": "Int", "Number": "Sing", "Gender": "Masc"}
},
"DET__Number=Plur|PronType=Int": {
"hvilke": {LEMMA: "hvilken", "PronType": "Int", "Number": "Plur"}
},
"DET__Number=Plur": {
"alle": {LEMMA: "all", "Number": "Plur"}
},
"PRON__Number=Plur|Person=3|PronType=Prs": {
"alle": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Plur", "Person": "Three"}
},
"DET__Gender=Masc|Number=Sing|PronType=Dem": {
"den": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"},
"slik": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"},
"denne": {LEMMA: "denne", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"}
},
"DET__Gender=Fem|Number=Sing|PronType=Dem": {
"den": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"},
"slik": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"},
"denne": {LEMMA: "denne", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"}
},
"DET__Gender=Neut|Number=Sing|PronType=Dem": {
"det": {LEMMA: "det", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"},
"slikt": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"},
"dette": {LEMMA: "dette", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"}
},
"DET__Number=Plur|PronType=Dem": {
"disse": {LEMMA: "disse", "PronType": "Dem", "Number": "Plur"},
"andre": {LEMMA: "annen", "PronType": "Dem", "Number": "Plur"},
"de": {LEMMA: "de", "PronType": "Dem", "Number": "Plur"},
"slike": {LEMMA: "slik", "PronType": "Dem", "Number": "Plur"}
},
"DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
"annen": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"}
},
"DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem": {
"annen": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"}
},
"DET__Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem": {
"annet": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"}
},
"DET__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
"annens": {LEMMA: "annnen", "PronType": "Dem", "Number": "Sing", "Gender": "Masc", "Case": "Gen"}
},
"DET__Case=Gen|Number=Plur|PronType=Dem": {
"andres": {LEMMA: "annen", "PronType": "Dem", "Number": "Plur", "Case": "Gen"}
},
"DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Dem": {
"dens": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Fem", "Case": "Gen"}
},
"DET__Case=Gen|Gender=Masc|Number=Sing|PronType=Dem": {
"hvis": {LEMMA: "hvis", "PronType": "Dem", "Number": "Sing", "Gender": "Masc", "Case": "Gen"},
"dens": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Masc", "Case": "Gen"}
},
"DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem": {
"dets": {LEMMA: "det", "PronType": "Dem", "Number": "Sing", "Gender": "Neut", "Case": "Gen"}
},
"DET__Case=Gen|Number=Plur": {
"alles": {LEMMA: "all", "Number": "Plur", "Case": "Gen"}
},
"DET__Definite=Def|Number=Sing|PronType=Dem": {
"andre": {LEMMA: "annen", "Number": "Sing", "PronType": "Dem"}
},
"DET__Definite=Def|PronType=Dem": {
"samme": {LEMMA: "samme", "PronType": "Dem"},
"forrige": {LEMMA: "forrige", "PronType": "Dem"},
"neste": {LEMMA: "neste", "PronType": "Dem"},
},
"DET__Definite=Def": {
"selve": {LEMMA: "selve"},
"selveste": {LEMMA: "selveste"},
},
"DET___": {
"selv": {LEMMA: "selv"},
"endel": {LEMMA: "endel"}
},
"DET__Definite=Ind|Gender=Fem|Number=Sing": {
"egen": {LEMMA: "egen", "Gender": "Fem", "Number": "Sing"}
},
"DET__Definite=Ind|Gender=Masc|Number=Sing": {
"egen": {LEMMA: "egen", "Gender": "Masc", "Number": "Sing"}
},
"DET__Definite=Ind|Gender=Neut|Number=Sing": {
"eget": {LEMMA: "egen", "Gender": "Neut", "Number": "Sing"}
},
"DET__Number=Plur": {
"egne": {LEMMA: "egen", "Number": "Plur"}
},
"DET__Gender=Masc|Number=Sing": {
"ingen": {LEMMA: "ingen", "Gender": "Masc", "Number": "Sing"}
},
"DET__Number=Plur": {
"ingen": {LEMMA: "ingen", "Number": "Plur"}
},
#same wordform and pos (verb), have to specify the exact features in order to not mix them up
"VERB__Mood=Ind|Tense=Pres|VerbForm=Fin": {
"": {LEMMA: "", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
},
"VERB__Mood=Ind|Tense=Past|VerbForm=Fin": {
"": {LEMMA: "se", "VerbForm": "Fin", "Tense": "Past", "Mood": "Ind"}
}
}
#copied from the English morph_rules.py
for tag, rules in MORPH_RULES.items():
for key, attrs in dict(rules).items():
rules[key.title()] = attrs

View File

@ -1,216 +1,188 @@
# coding: utf8
"""Based on unique combinations form this dataset:
https://github.com/UniversalDependencies/UD_Norwegian-Bokmaal
"""
Tags are a combination of POS and morphological features from a yet
unpublished dataset developed by Schibsted, Nasjonalbiblioteket and LTG. The
data format is .conllu and follows the Universal Dependencies annotation. (There
are some annotation differences compared to this dataset:
https://github.com/UniversalDependencies/UD_Norwegian-Bokmaal
mainly in the way determiners and pronouns are tagged).
"""
Models can be trained using this. dataset as well."""
from __future__ import unicode_literals
from ...symbols import POS, PUNCT, ADJ, CONJ, CCONJ, SCONJ, SYM, NUM, DET, ADV, ADP, X, VERB
from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, AUX
TAG_MAP = {
'NOUN__Definite=Ind|Gender=Neut|Number=Sing': {'morph': 'Definite=Ind|Gender=Neut|Number=Sing', POS: NOUN},
'CCONJ___': {'morph': '_', POS: CCONJ},
'NOUN__Definite=Ind|Gender=Masc|Number=Sing': {'morph': 'Definite=Ind|Gender=Masc|Number=Sing', POS: NOUN},
'ADP___': {'morph': '_', POS: ADP},
'NOUN__Definite=Def|Gender=Masc|Number=Sing': {'morph': 'Definite=Def|Gender=Masc|Number=Sing', POS: NOUN},
'NOUN__Definite=Ind|Gender=Neut|Number=Plur': {'morph': 'Definite=Ind|Gender=Neut|Number=Plur', POS: NOUN},
'PUNCT___': {'morph': '_', POS: PUNCT},
'VERB__Mood=Ind|Tense=Past|VerbForm=Fin': {'morph': 'Mood=Ind|Tense=Past|VerbForm=Fin', POS: VERB},
'DET__Gender=Masc|Number=Sing|PronType=Dem': {'morph': 'Gender=Masc|Number=Sing|PronType=Dem', POS: DET},
'ADJ__Definite=Def|Degree=Pos|Number=Sing': {'morph': 'Definite=Def|Degree=Pos|Number=Sing', POS: ADJ},
'PROPN___': {'morph': '_', POS: PROPN},
'X___': {'morph': '_', POS: X},
'VERB__Mood=Ind|Tense=Pres|VerbForm=Fin': {'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin', POS: VERB},
'NOUN__Definite=Def|Gender=Neut|Number=Sing': {'morph': 'Definite=Def|Gender=Neut|Number=Sing', POS: NOUN},
'PRON__PronType=Rel': {'morph': 'PronType=Rel', POS: PRON},
'AUX__Mood=Ind|Tense=Pres|VerbForm=Fin': {'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin', POS: AUX},
'ADJ__Definite=Ind|Gender=Neut|Number=Sing|VerbForm=Part': {'morph': 'Definite=Ind|Gender=Neut|Number=Sing|VerbForm=Part', POS: ADJ},
'ADJ__Definite=Ind|Degree=Pos|Number=Sing': {'morph': 'Definite=Ind|Degree=Pos|Number=Sing', POS: ADJ},
'NOUN__Definite=Ind|Gender=Fem|Number=Sing': {'morph': 'Definite=Ind|Gender=Fem|Number=Sing', POS: NOUN},
'ADJ__Number=Plur|VerbForm=Part': {'morph': 'Number=Plur|VerbForm=Part', POS: ADJ},
'NOUN__Definite=Ind|Gender=Fem|Number=Plur': {'morph': 'Definite=Ind|Gender=Fem|Number=Plur', POS: NOUN},
'ADV___': {'morph': '_', POS: ADV},
'PRON__Gender=Neut|Number=Sing|Person=3|PronType=Prs': {'morph': 'Gender=Neut|Number=Sing|Person=3|PronType=Prs', POS: PRON},
'ADJ__Definite=Ind|Number=Sing|VerbForm=Part': {'morph': 'Definite=Ind|Number=Sing|VerbForm=Part', POS: ADJ},
'VERB__VerbForm=Part': {'morph': 'VerbForm=Part', POS: VERB},
'NOUN__Definite=Ind|Gender=Masc|Number=Plur': {'morph': 'Definite=Ind|Gender=Masc|Number=Plur', POS: NOUN},
'ADJ__Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing': {'morph': 'Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing', POS: ADJ},
'ADJ__Degree=Pos|Number=Plur': {'morph': 'Degree=Pos|Number=Plur', POS: ADJ},
'NUM__Number=Plur|NumType=Card': {'morph': 'Number=Plur|NumType=Card', POS: NUM},
'NOUN__Definite=Def|Gender=Masc|Number=Plur': {'morph': 'Definite=Def|Gender=Masc|Number=Plur', POS: NOUN},
'PRON__Case=Acc|PronType=Prs|Reflex=Yes': {'morph': 'Case=Acc|PronType=Prs|Reflex=Yes', POS: PRON},
'NOUN__Case=Gen|Definite=Ind|Gender=Neut|Number=Sing': {'morph': 'Case=Gen|Definite=Ind|Gender=Neut|Number=Sing', POS: NOUN},
'PART___': {'morph': '_', POS: PART},
'VERB__VerbForm=Inf': {'morph': 'VerbForm=Inf', POS: VERB},
'PRON__Case=Nom|Number=Plur|Person=3|PronType=Prs': {'morph': 'Case=Nom|Number=Plur|Person=3|PronType=Prs', POS: PRON},
'AUX__Mood=Ind|Tense=Past|VerbForm=Fin': {'morph': 'Mood=Ind|Tense=Past|VerbForm=Fin', POS: AUX},
'PROPN__Gender=Fem': {'morph': 'Gender=Fem', POS: PROPN},
'NOUN___': {'morph': '_', POS: NOUN},
'PROPN__Gender=Masc': {'morph': 'Gender=Masc', POS: PROPN},
'DET__Gender=Neut|Number=Sing|PronType=Dem': {'morph': 'Gender=Neut|Number=Sing|PronType=Dem', POS: DET},
'DET__Gender=Masc|Number=Sing|PronType=Art': {'morph': 'Gender=Masc|Number=Sing|PronType=Art', POS: DET},
'NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Sing': {'morph': 'Case=Gen|Definite=Def|Gender=Masc|Number=Sing', POS: NOUN},
'PROPN__Abbr=Yes': {'morph': 'Abbr=Yes', POS: PROPN},
'ADV__Polarity=Neg': {'morph': 'Polarity=Neg', POS: ADV},
'SCONJ___': {'morph': '_', POS: SCONJ},
'DET__Number=Plur|Poss=Yes|PronType=Prs': {'morph': 'Number=Plur|Poss=Yes|PronType=Prs', POS: DET},
'NOUN__Case=Gen|Definite=Ind|Gender=Neut|Number=Plur': {'morph': 'Case=Gen|Definite=Ind|Gender=Neut|Number=Plur', POS: NOUN},
'PROPN__Case=Gen': {'morph': 'Case=Gen', POS: PROPN},
'DET__Gender=Fem|Number=Sing|PronType=Dem': {'morph': 'Gender=Fem|Number=Sing|PronType=Dem', POS: DET},
'DET__Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs': {'morph': 'Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs', POS: DET},
'ADJ__Definite=Def|Degree=Sup': {'morph': 'Definite=Def|Degree=Sup', POS: ADJ},
'PROPN__Case=Gen|Gender=Fem': {'morph': 'Case=Gen|Gender=Fem', POS: PROPN},
'DET__Number=Plur|PronType=Dem': {'morph': 'Number=Plur|PronType=Dem', POS: DET},
'NOUN__Case=Gen|Definite=Def|Gender=Neut|Number=Sing': {'morph': 'Case=Gen|Definite=Def|Gender=Neut|Number=Sing', POS: NOUN},
'ADJ__Definite=Ind|Degree=Sup': {'morph': 'Definite=Ind|Degree=Sup', POS: ADJ},
'NOUN__Definite=Def|Gender=Fem|Number=Plur': {'morph': 'Definite=Def|Gender=Fem|Number=Plur', POS: NOUN},
'PROPN__Gender=Neut': {'morph': 'Gender=Neut', POS: PROPN},
'DET__Number=Plur|PronType=Int': {'morph': 'Number=Plur|PronType=Int', POS: DET},
'NOUN__Definite=Def|Gender=Neut|Number=Plur': {'morph': 'Definite=Def|Gender=Neut|Number=Plur', POS: NOUN},
'DET__Definite=Def|PronType=Dem': {'morph': 'Definite=Def|PronType=Dem', POS: DET},
'DET__Gender=Neut|Number=Sing|PronType=Art': {'morph': 'Gender=Neut|Number=Sing|PronType=Art', POS: DET},
'VERB__Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass': {'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass', POS: VERB},
'PROPN__Abbr=Yes|Case=Gen': {'morph': 'Abbr=Yes|Case=Gen', POS: PROPN},
'PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs': {'morph': 'Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs', POS: PRON},
'ADJ__Degree=Cmp': {'morph': 'Degree=Cmp', POS: ADJ},
'ADJ__VerbForm=Part': {'morph': 'VerbForm=Part', POS: ADJ},
'DET__Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs': {'morph': 'Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs', POS: DET},
'ADP__Abbr=Yes': {'morph': 'Abbr=Yes', POS: ADP},
'DET__Definite=Ind|Gender=Neut|Number=Sing|PronType=Prs': {'morph': 'Definite=Ind|Gender=Neut|Number=Sing|PronType=Prs', POS: DET},
'NOUN__Case=Gen|Definite=Def|Gender=Neut|Number=Plur': {'morph': 'Case=Gen|Definite=Def|Gender=Neut|Number=Plur', POS: NOUN},
'AUX__VerbForm=Part': {'morph': 'VerbForm=Part', POS: AUX},
'PRON__PronType=Int': {'morph': 'PronType=Int', POS: PRON},
'DET__Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs': {'morph': 'Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs', POS: DET},
'PRON__Number=Plur|Person=3|PronType=Ind,Prs': {'morph': 'Number=Plur|Person=3|PronType=Ind,Prs', POS: PRON},
'DET__Number=Plur|PronType=Ind': {'morph': 'Number=Plur|PronType=Ind', POS: DET},
'ADJ__Degree=Pos': {'morph': 'Degree=Pos', POS: ADJ},
'PRON__Animacy=Hum|Case=Nom|Number=Plur|Person=1|PronType=Prs': {'morph': 'Animacy=Hum|Case=Nom|Number=Plur|Person=1|PronType=Prs', POS: PRON},
'VERB__VerbForm=Inf|Voice=Pass': {'morph': 'VerbForm=Inf|Voice=Pass', POS: VERB},
'DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem': {'morph': 'Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem', POS: DET},
'DET__Gender=Neut|Number=Sing|PronType=Ind': {'morph': 'Gender=Neut|Number=Sing|PronType=Ind', POS: DET},
'PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs': {'morph': 'Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs', POS: PRON},
'PRON__Animacy=Hum|Case=Nom|Number=Sing|Person=1|PronType=Prs': {'morph': 'Animacy=Hum|Case=Nom|Number=Sing|Person=1|PronType=Prs', POS: PRON},
'DET__Number=Plur|Polarity=Neg|PronType=Neg': {'morph': 'Number=Plur|Polarity=Neg|PronType=Neg', POS: DET},
'NUM__NumType=Card': {'morph': 'NumType=Card', POS: NUM},
'DET__Gender=Masc|Number=Sing|PronType=Ind': {'morph': 'Gender=Masc|Number=Sing|PronType=Ind', POS: DET},
'DET__PronType=Prs': {'morph': 'PronType=Prs', POS: DET},
'DET__Gender=Fem|Number=Sing|PronType=Ind': {'morph': 'Gender=Fem|Number=Sing|PronType=Ind', POS: DET},
'PROPN__Case=Gen|Gender=Neut': {'morph': 'Case=Gen|Gender=Neut', POS: PROPN},
'DET__Gender=Masc|Number=Sing|Polarity=Neg|PronType=Neg': {'morph': 'Gender=Masc|Number=Sing|Polarity=Neg|PronType=Neg', POS: DET},
'ADJ__Definite=Def|Number=Sing|VerbForm=Part': {'morph': 'Definite=Def|Number=Sing|VerbForm=Part', POS: ADJ},
'PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs': {'morph': 'Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs', POS: PRON},
'AUX__VerbForm=Inf': {'morph': 'VerbForm=Inf', POS: AUX},
'PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs': {'morph': 'Case=Acc|Number=Plur|Person=3|PronType=Prs', POS: PRON},
'ADJ__Case=Gen|Degree=Pos|Number=Plur': {'morph': 'Case=Gen|Degree=Pos|Number=Plur', POS: ADJ},
'DET__Number=Plur|PronType=Tot': {'morph': 'Number=Plur|PronType=Tot', POS: DET},
'DET__Case=Gen|Gender=Masc|Number=Sing|PronType=Dem': {'morph': 'Case=Gen|Gender=Masc|Number=Sing|PronType=Dem', POS: DET},
'DET__Number=Plur|PronType=Prs': {'morph': 'Number=Plur|PronType=Prs', POS: DET},
'SYM___': {'morph': '_', POS: SYM},
'NUM__Gender=Neut|Number=Sing|NumType=Card': {'morph': 'Gender=Neut|Number=Sing|NumType=Card', POS: NUM},
'PRON__Animacy=Hum|Case=Nom|Number=Sing|PronType=Prs': {'morph': 'Animacy=Hum|Case=Nom|Number=Sing|PronType=Prs', POS: PRON},
'DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Prs': {'morph': 'Definite=Ind|Gender=Masc|Number=Sing|PronType=Prs', POS: DET},
'NOUN__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing': {'morph': 'Case=Gen|Definite=Ind|Gender=Masc|Number=Sing', POS: NOUN},
'ADV__Abbr=Yes': {'morph': 'Abbr=Yes', POS: ADV},
'DET__Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem': {'morph': 'Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem', POS: DET},
'DET__Gender=Masc|Number=Sing|PronType=Tot': {'morph': 'Gender=Masc|Number=Sing|PronType=Tot', POS: DET},
'DET__Definite=Def|PronType=Prs': {'morph': 'Definite=Def|PronType=Prs', POS: DET},
'PRON__Animacy=Hum|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs': {'morph': 'Animacy=Hum|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs', POS: PRON},
'NOUN__Gender=Neut': {'morph': 'Gender=Neut', POS: NOUN},
'DET__Gender=Neut|Number=Sing|PronType=Int': {'morph': 'Gender=Neut|Number=Sing|PronType=Int', POS: DET},
'NUM__Definite=Def|NumType=Card': {'morph': 'Definite=Def|NumType=Card', POS: NUM},
'VERB__Mood=Imp|VerbForm=Fin': {'morph': 'Mood=Imp|VerbForm=Fin', POS: VERB},
'NOUN__Definite=Ind|Number=Plur': {'morph': 'Definite=Ind|Number=Plur', POS: NOUN},
'DET__Gender=Neut|Number=Sing|PronType=Tot': {'morph': 'Gender=Neut|Number=Sing|PronType=Tot', POS: DET},
'DET__Gender=Fem|Number=Sing|PronType=Tot': {'morph': 'Gender=Fem|Number=Sing|PronType=Tot', POS: DET},
'PRON__Animacy=Hum|Case=Acc|Number=Plur|Person=1|PronType=Prs': {'morph': 'Animacy=Hum|Case=Acc|Number=Plur|Person=1|PronType=Prs', POS: PRON},
'PRON__Gender=Fem,Masc|Number=Sing|Person=3|Polarity=Neg|PronType=Neg,Prs': {'morph': 'Gender=Fem,Masc|Number=Sing|Person=3|Polarity=Neg|PronType=Neg,Prs', POS: PRON},
'PRON__Number=Plur|Person=3|Polarity=Neg|PronType=Neg,Prs': {'morph': 'Number=Plur|Person=3|Polarity=Neg|PronType=Neg,Prs', POS: PRON},
'NUM__Definite=Def|Number=Sing|NumType=Card': {'morph': 'Definite=Def|Number=Sing|NumType=Card', POS: NUM},
'NUM__Gender=Masc|Number=Sing|NumType=Card': {'morph': 'Gender=Masc|Number=Sing|NumType=Card', POS: NUM},
'DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem': {'morph': 'Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem', POS: DET},
'NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Plur': {'morph': 'Case=Gen|Definite=Def|Gender=Fem|Number=Plur', POS: NOUN},
'DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem': {'morph': 'Case=Gen|Gender=Neut|Number=Sing|PronType=Dem', POS: DET},
'PRON__Animacy=Hum|Number=Sing|PronType=Art,Prs': {'morph': 'Animacy=Hum|Number=Sing|PronType=Art,Prs', POS: PRON},
'AUX__Mood=Imp|VerbForm=Fin': {'morph': 'Mood=Imp|VerbForm=Fin', POS: AUX},
'PRON__Number=Plur|Person=3|PronType=Prs,Tot': {'morph': 'Number=Plur|Person=3|PronType=Prs,Tot', POS: PRON},
'ADJ__Number=Plur': {'morph': 'Number=Plur', POS: ADJ},
'NOUN__Gender=Masc': {'morph': 'Gender=Masc', POS: NOUN},
'NOUN__Abbr=Yes': {'morph': 'Abbr=Yes', POS: NOUN},
'NOUN__Case=Gen|Definite=Ind|Gender=Masc|Number=Plur': {'morph': 'Case=Gen|Definite=Ind|Gender=Masc|Number=Plur', POS: NOUN},
'PRON__Gender=Neut|Number=Sing|Person=3|PronType=Ind,Prs': {'morph': 'Gender=Neut|Number=Sing|Person=3|PronType=Ind,Prs', POS: PRON},
'INTJ___': {'morph': '_', POS: INTJ},
'PRON__Animacy=Hum|Case=Nom|Number=Sing|Person=2|PronType=Prs': {'morph': 'Animacy=Hum|Case=Nom|Number=Sing|Person=2|PronType=Prs', POS: PRON},
'PRON__Animacy=Hum|Case=Acc|Number=Sing|Person=1|PronType=Prs': {'morph': 'Animacy=Hum|Case=Acc|Number=Sing|Person=1|PronType=Prs', POS: PRON},
'NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Plur': {'morph': 'Case=Gen|Definite=Def|Gender=Masc|Number=Plur', POS: NOUN},
'ADJ___': {'morph': '_', POS: ADJ},
'PRON__Animacy=Hum|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs': {'morph': 'Animacy=Hum|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs', POS: PRON},
'PRON__Animacy=Hum|Case=Acc|Number=Sing|Person=2|PronType=Prs': {'morph': 'Animacy=Hum|Case=Acc|Number=Sing|Person=2|PronType=Prs', POS: PRON},
'NOUN__Definite=Def|Gender=Fem|Number=Sing': {'morph': 'Definite=Def|Gender=Fem|Number=Sing', POS: NOUN},
'PRON__Number=Sing|Polarity=Neg|PronType=Neg': {'morph': 'Number=Sing|Polarity=Neg|PronType=Neg', POS: PRON},
'NOUN__Case=Gen': {'morph': 'Case=Gen', POS: NOUN},
'ADJ__Definite=Ind|Number=Sing': {'morph': 'Definite=Ind|Number=Sing', POS: ADJ},
'PROPN__Case=Gen|Gender=Masc': {'morph': 'Case=Gen|Gender=Masc', POS: PROPN},
'PRON__Animacy=Hum|Number=Plur|PronType=Rcp': {'morph': 'Animacy=Hum|Number=Plur|PronType=Rcp', POS: PRON},
'NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Sing': {'morph': 'Case=Gen|Definite=Ind|Gender=Fem|Number=Sing', POS: NOUN},
'PRON__Number=Plur|Person=3|PronType=Prs': {'morph': 'Number=Plur|Person=3|PronType=Prs', POS: PRON},
'PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Ind,Prs': {'morph': 'Gender=Fem,Masc|Number=Sing|Person=3|PronType=Ind,Prs', POS: PRON},
'DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Prs': {'morph': 'Definite=Ind|Gender=Fem|Number=Sing|PronType=Prs', POS: DET},
'NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing': {'morph': 'Case=Gen|Definite=Def|Gender=Fem|Number=Sing', POS: NOUN},
'DET__Gender=Fem|Number=Sing|PronType=Art': {'morph': 'Gender=Fem|Number=Sing|PronType=Art', POS: DET},
'ADJ__Case=Gen|Definite=Def|Degree=Pos|Number=Sing': {'morph': 'Case=Gen|Definite=Def|Degree=Pos|Number=Sing', POS: ADJ},
'DET__Gender=Masc|Number=Sing|PronType=Int': {'morph': 'Gender=Masc|Number=Sing|PronType=Int', POS: DET},
'NUM__Number=Sing|NumType=Card': {'morph': 'Number=Sing|NumType=Card', POS: NUM},
'PRON__Animacy=Hum|Case=Acc|Number=Plur|Person=2|PronType=Prs': {'morph': 'Animacy=Hum|Case=Acc|Number=Plur|Person=2|PronType=Prs', POS: PRON},
'PRON__Animacy=Hum|Case=Nom|Number=Plur|Person=2|PronType=Prs': {'morph': 'Animacy=Hum|Case=Nom|Number=Plur|Person=2|PronType=Prs', POS: PRON},
'ADJ__Case=Gen|Definite=Def|Number=Sing': {'morph': 'Case=Gen|Definite=Def|Number=Sing', POS: ADJ},
'ADJ__Case=Gen|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing': {'morph': 'Case=Gen|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing', POS: ADJ},
'ADJ__Degree=Sup': {'morph': 'Degree=Sup', POS: ADJ},
'PRON__Animacy=Hum|PronType=Int': {'morph': 'Animacy=Hum|PronType=Int', POS: PRON},
'DET__PronType=Ind': {'morph': 'PronType=Ind', POS: DET},
'DET__Definite=Def|Number=Sing|PronType=Dem': {'morph': 'Definite=Def|Number=Sing|PronType=Dem', POS: DET},
'NOUN__Gender=Fem': {'morph': 'Gender=Fem', POS: NOUN},
'DET__Case=Gen|Number=Plur|PronType=Dem': {'morph': 'Case=Gen|Number=Plur|PronType=Dem', POS: DET},
'PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs,Tot': {'morph': 'Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs,Tot', POS: PRON},
'NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Plur': {'morph': 'Case=Gen|Definite=Ind|Gender=Fem|Number=Plur', POS: NOUN},
'DET__Gender=Neut|Number=Sing|Polarity=Neg|PronType=Neg': {'morph': 'Gender=Neut|Number=Sing|Polarity=Neg|PronType=Neg', POS: DET},
'NOUN__Number=Plur': {'morph': 'Number=Plur', POS: NOUN},
'ADJ__Case=Gen|Definite=Ind|Degree=Pos|Number=Sing': {'morph': 'Case=Gen|Definite=Ind|Degree=Pos|Number=Sing', POS: ADJ},
'VERB__Definite=Ind|Number=Sing|VerbForm=Part': {'morph': 'Definite=Ind|Number=Sing|VerbForm=Part', POS: VERB},
'ADJ__Case=Gen|Definite=Def|Number=Sing|VerbForm=Part': {'morph': 'Case=Gen|Definite=Def|Number=Sing|VerbForm=Part', POS: ADJ},
'VERB__Mood=Ind|Tense=Past|VerbForm=Fin|Voice=Pass': {'morph': 'Mood=Ind|Tense=Past|VerbForm=Fin|Voice=Pass', POS: VERB},
'DET__Gender=Neut|Number=Sing|PronType=Dem,Ind': {'morph': 'Gender=Neut|Number=Sing|PronType=Dem,Ind', POS: DET},
'PRON__Animacy=Hum|Poss=Yes|PronType=Int': {'morph': 'Animacy=Hum|Poss=Yes|PronType=Int', POS: PRON},
'ADJ__Abbr=Yes': {'morph': 'Abbr=Yes', POS: ADJ},
'DET__Case=Gen|Gender=Masc|Number=Sing|PronType=Art': {'morph': 'Case=Gen|Gender=Masc|Number=Sing|PronType=Art', POS: DET},
'NOUN__Abbr=Yes|Definite=Def,Ind|Gender=Masc|Number=Sing': {'morph': 'Abbr=Yes|Definite=Def,Ind|Gender=Masc|Number=Sing', POS: NOUN},
'PRON__PronType=Prs': {'morph': 'PronType=Prs', POS: PRON},
'DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Dem': {'morph': 'Case=Gen|Gender=Fem|Number=Sing|PronType=Dem', POS: DET},
'DET__Number=Plur|Poss=Yes|PronType=Rcp': {'morph': 'Number=Plur|Poss=Yes|PronType=Rcp', POS: DET},
'ADJ__Definite=Ind|Degree=Pos': {'morph': 'Definite=Ind|Degree=Pos', POS: ADJ},
'DET__Number=Plur|PronType=Art': {'morph': 'Number=Plur|PronType=Art', POS: DET},
'NUM__Case=Gen|Number=Plur|NumType=Card': {'morph': 'Case=Gen|Number=Plur|NumType=Card', POS: NUM},
'NOUN__Abbr=Yes|Definite=Def,Ind|Gender=Neut|Number=Plur,Sing': {'morph': 'Abbr=Yes|Definite=Def,Ind|Gender=Neut|Number=Plur,Sing', POS: NOUN},
'DET__Case=Gen|Number=Plur|PronType=Tot': {'morph': 'Case=Gen|Number=Plur|PronType=Tot', POS: DET},
'NOUN__Abbr=Yes|Definite=Def,Ind|Gender=Masc|Number=Plur,Sing': {'morph': 'Abbr=Yes|Definite=Def,Ind|Gender=Masc|Number=Plur,Sing', POS: NOUN},
'DET__Gender=Fem|Number=Sing|PronType=Int': {'morph': 'Gender=Fem|Number=Sing|PronType=Int', POS: DET},
'ADJ__Definite=Ind|Gender=Neut|Number=Sing': {'morph': 'Definite=Ind|Gender=Neut|Number=Sing', POS: ADJ},
'DET__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem': {'morph': 'Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem', POS: DET},
'DET__Gender=Fem|Number=Sing|PronType=Prs': {'morph': 'Gender=Fem|Number=Sing|PronType=Prs', POS: DET},
'PRON__Animacy=Hum|Case=Gen,Nom|Number=Sing|PronType=Art,Prs': {'morph': 'Animacy=Hum|Case=Gen,Nom|Number=Sing|PronType=Art,Prs', POS: PRON},
'ADJ__Definite=Def|Degree=Pos|Gender=Masc|Number=Sing': {'morph': 'Definite=Def|Degree=Pos|Gender=Masc|Number=Sing', POS: ADJ},
'PRON__Animacy=Hum|Case=Gen|Number=Sing|PronType=Art,Prs': {'morph': 'Animacy=Hum|Case=Gen|Number=Sing|PronType=Art,Prs', POS: PRON},
'NUM__Gender=Fem|Number=Sing|NumType=Card': {'morph': 'Gender=Fem|Number=Sing|NumType=Card', POS: NUM},
'NOUN__Definite=Ind|Gender=Masc': {'morph': 'Definite=Ind|Gender=Masc', POS: NOUN},
'NOUN__Definite=Def|Number=Plur': {'morph': 'Definite=Def|Number=Plur', POS: NOUN},
'ADJ__Number=Sing|VerbForm=Part': {'morph': 'Number=Sing|VerbForm=Part', POS: ADJ},
'ADJ__Definite=Ind|Gender=Masc|Number=Sing|VerbForm=Part': {'morph': 'Definite=Ind|Gender=Masc|Number=Sing|VerbForm=Part', POS: ADJ},
'NOUN__Abbr=Yes|Gender=Masc': {'morph': 'Abbr=Yes|Gender=Masc', POS: NOUN},
'NOUN__Abbr=Yes|Case=Gen': {'morph': 'Abbr=Yes|Case=Gen', POS: NOUN},
'VERB__Abbr=Yes|Mood=Ind|Tense=Pres|VerbForm=Fin': {'morph': 'Abbr=Yes|Mood=Ind|Tense=Pres|VerbForm=Fin', POS: VERB},
'ADJ__Abbr=Yes|Degree=Pos': {'morph': 'Abbr=Yes|Degree=Pos', POS: ADJ},
'NOUN__Case=Gen|Gender=Fem': {'morph': 'Case=Gen|Gender=Fem', POS: NOUN},
'ADJ__Case=Gen|Degree=Cmp': {'morph': 'Case=Gen|Degree=Cmp', POS: ADJ},
'ADJ__Case=Gen|Degree=Pos|Number=Plur': {'morph': 'Case=Gen|Degree=Pos|Number=Plur', POS: ADJ},
'ADJ__Definite=Def|Degree=Pos|Gender=Masc|Number=Sing': {'morph': 'Definite=Def|Degree=Pos|Gender=Masc|Number=Sing', POS: ADJ},
'ADJ__Definite=Def|Degree=Pos|Number=Sing': {'morph': 'Definite=Def|Degree=Pos|Number=Sing', POS: ADJ},
'ADJ__Definite=Def|Degree=Sup': {'morph': 'Definite=Def|Degree=Sup', POS: ADJ},
'ADJ__Definite=Def|Number=Sing': {'morph': 'Definite=Def|Number=Sing', POS: ADJ},
'ADJ__Definite=Ind|Degree=Pos': {'morph': 'Definite=Ind|Degree=Pos', POS: ADJ},
'ADJ__Definite=Ind|Degree=Pos|Gender=Masc|Number=Sing': {'morph': 'Definite=Ind|Degree=Pos|Gender=Masc|Number=Sing', POS: ADJ},
'ADJ__Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing': {'morph': 'Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing', POS: ADJ},
'ADJ__Definite=Ind|Degree=Pos|Number=Sing': {'morph': 'Definite=Ind|Degree=Pos|Number=Sing', POS: ADJ},
'ADJ__Definite=Ind|Degree=Sup': {'morph': 'Definite=Ind|Degree=Sup', POS: ADJ},
'ADJ__Definite=Ind|Gender=Masc|Number=Sing': {'morph': 'Definite=Ind|Gender=Masc|Number=Sing', POS: ADJ},
'ADJ__Definite=Ind|Gender=Neut|Number=Sing': {'morph': 'Definite=Ind|Gender=Neut|Number=Sing', POS: ADJ},
'ADJ__Definite=Ind|Number=Sing': {'morph': 'Definite=Ind|Number=Sing', POS: ADJ},
'ADJ__Degree=Cmp': {'morph': 'Degree=Cmp', POS: ADJ},
'ADJ__Degree=Pos': {'morph': 'Degree=Pos', POS: ADJ},
'ADJ__Degree=Pos|Number=Plur': {'morph': 'Degree=Pos|Number=Plur', POS: ADJ},
'ADJ__Degree=Sup': {'morph': 'Degree=Sup', POS: ADJ},
'ADJ__Number=Plur': {'morph': 'Number=Plur', POS: ADJ},
'ADJ__Number=Plur|VerbForm=Part': {'morph': 'Number=Plur|VerbForm=Part', POS: ADJ},
'ADJ__Number=Sing': {'morph': 'Number=Sing', POS: ADJ},
'ADJ___': {'morph': '_', POS: ADJ},
'ADP___': {'morph': '_', POS: ADP},
'ADV___': {'morph': '_', POS: ADV},
'AUX__Mood=Imp|VerbForm=Fin': {'morph': 'Mood=Imp|VerbForm=Fin', POS: AUX},
'AUX__Mood=Ind|Tense=Past|VerbForm=Fin': {'morph': 'Mood=Ind|Tense=Past|VerbForm=Fin', POS: AUX},
'AUX__Mood=Ind|Tense=Pres|VerbForm=Fin': {'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin', POS: AUX},
'AUX__Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass': {'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass', POS: AUX},
'AUX__VerbForm=Inf': {'morph': 'VerbForm=Inf', POS: AUX},
'AUX__VerbForm=Part': {'morph': 'VerbForm=Part', POS: AUX},
'CONJ___': {'morph': '_', POS: CONJ},
'DET__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem': {'morph': 'Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem', POS: DET},
'DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Dem': {'morph': 'Case=Gen|Gender=Fem|Number=Sing|PronType=Dem', POS: DET},
'DET__Case=Gen|Gender=Masc|Number=Sing': {'morph': 'Case=Gen|Gender=Masc|Number=Sing', POS: DET},
'DET__Case=Gen|Gender=Masc|Number=Sing|PronType=Dem': {'morph': 'Case=Gen|Gender=Masc|Number=Sing|PronType=Dem', POS: DET},
'DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem': {'morph': 'Case=Gen|Gender=Neut|Number=Sing|PronType=Dem', POS: DET},
'DET__Case=Gen|Number=Plur': {'morph': 'Case=Gen|Number=Plur', POS: DET},
'DET__Case=Gen|Number=Plur|PronType=Dem': {'morph': 'Case=Gen|Number=Plur|PronType=Dem', POS: DET},
'DET__Definite=Def': {'morph': 'Definite=Def', POS: DET},
'DET__Definite=Def|Number=Sing|PronType=Dem': {'morph': 'Definite=Def|Number=Sing|PronType=Dem', POS: DET},
'DET__Definite=Def|PronType=Dem': {'morph': 'Definite=Def|PronType=Dem', POS: DET},
'DET__Definite=Ind|Gender=Fem|Number=Sing': {'morph': 'Definite=Ind|Gender=Fem|Number=Sing', POS: DET},
'DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem': {'morph': 'Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem', POS: DET},
'DET__Definite=Ind|Gender=Masc|Number=Sing': {'morph': 'Definite=Ind|Gender=Masc|Number=Sing', POS: DET},
'DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem': {'morph': 'Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem', POS: DET},
'DET__Definite=Ind|Gender=Neut|Number=Sing': {'morph': 'Definite=Ind|Gender=Neut|Number=Sing', POS: DET},
'DET__Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem': {'morph': 'Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem', POS: DET},
'DET__Degree=Pos|Number=Plur': {'morph': 'Degree=Pos|Number=Plur', POS: DET},
'DET__Gender=Fem|Number=Sing': {'morph': 'Gender=Fem|Number=Sing', POS: DET},
'DET__Gender=Fem|Number=Sing|Poss=Yes': {'morph': 'Gender=Fem|Number=Sing|Poss=Yes', POS: DET},
'DET__Gender=Fem|Number=Sing|PronType=Dem': {'morph': 'Gender=Fem|Number=Sing|PronType=Dem', POS: DET},
'DET__Gender=Fem|Number=Sing|PronType=Int': {'morph': 'Gender=Fem|Number=Sing|PronType=Int', POS: DET},
'DET__Gender=Masc|Number=Sing': {'morph': 'Gender=Masc|Number=Sing', POS: DET},
'DET__Gender=Masc|Number=Sing|Poss=Yes': {'morph': 'Gender=Masc|Number=Sing|Poss=Yes', POS: DET},
'DET__Gender=Masc|Number=Sing|PronType=Dem': {'morph': 'Gender=Masc|Number=Sing|PronType=Dem', POS: DET},
'DET__Gender=Masc|Number=Sing|PronType=Int': {'morph': 'Gender=Masc|Number=Sing|PronType=Int', POS: DET},
'DET__Gender=Neut|Number=Sing': {'morph': 'Gender=Neut|Number=Sing', POS: DET},
'DET__Gender=Neut|Number=Sing|Poss=Yes': {'morph': 'Gender=Neut|Number=Sing|Poss=Yes', POS: DET},
'DET__Gender=Neut|Number=Sing|PronType=Dem': {'morph': 'Gender=Neut|Number=Sing|PronType=Dem', POS: DET},
'DET__Gender=Neut|Number=Sing|PronType=Int': {'morph': 'Gender=Neut|Number=Sing|PronType=Int', POS: DET},
'DET__Number=Plur': {'morph': 'Number=Plur', POS: DET},
'DET__Number=Plur|Poss=Yes': {'morph': 'Number=Plur|Poss=Yes', POS: DET},
'DET__Number=Plur|Poss=Yes|PronType=Rcp': {'morph': 'Number=Plur|Poss=Yes|PronType=Rcp', POS: DET},
'DET__Number=Plur|PronType=Dem': {'morph': 'Number=Plur|PronType=Dem', POS: DET},
'DET__Number=Plur|PronType=Int': {'morph': 'Number=Plur|PronType=Int', POS: DET},
'DET___': {'morph': '_', POS: DET},
'INTJ___': {'morph': '_', POS: INTJ},
'NOUN__Case=Gen': {'morph': 'Case=Gen', POS: NOUN},
'NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Plur': {'morph': 'Case=Gen|Definite=Def|Gender=Fem|Number=Plur', POS: NOUN},
'NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing': {'morph': 'Case=Gen|Definite=Def|Gender=Fem|Number=Sing', POS: NOUN},
'NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Plur': {'morph': 'Case=Gen|Definite=Def|Gender=Masc|Number=Plur', POS: NOUN},
'NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Sing': {'morph': 'Case=Gen|Definite=Def|Gender=Masc|Number=Sing', POS: NOUN},
'NOUN__Case=Gen|Definite=Def|Gender=Neut|Number=Plur': {'morph': 'Case=Gen|Definite=Def|Gender=Neut|Number=Plur', POS: NOUN},
'NOUN__Case=Gen|Definite=Def|Gender=Neut|Number=Sing': {'morph': 'Case=Gen|Definite=Def|Gender=Neut|Number=Sing', POS: NOUN},
'NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Plur': {'morph': 'Case=Gen|Definite=Ind|Gender=Fem|Number=Plur', POS: NOUN},
'NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Sing': {'morph': 'Case=Gen|Definite=Ind|Gender=Fem|Number=Sing', POS: NOUN},
'NOUN__Case=Gen|Definite=Ind|Gender=Masc|Number=Plur': {'morph': 'Case=Gen|Definite=Ind|Gender=Masc|Number=Plur', POS: NOUN},
'NOUN__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing': {'morph': 'Case=Gen|Definite=Ind|Gender=Masc|Number=Sing', POS: NOUN},
'NOUN__Case=Gen|Definite=Ind|Gender=Neut|Number=Plur': {'morph': 'Case=Gen|Definite=Ind|Gender=Neut|Number=Plur', POS: NOUN},
'NOUN__Case=Gen|Definite=Ind|Gender=Neut|Number=Sing': {'morph': 'Case=Gen|Definite=Ind|Gender=Neut|Number=Sing', POS: NOUN},
'NOUN__Case=Gen|Gender=Fem': {'morph': 'Case=Gen|Gender=Fem', POS: NOUN},
'NOUN__Definite=Def,Ind|Gender=Masc|Number=Plur,Sing': {'morph': 'Definite=Def', POS: NOUN},
'NOUN__Definite=Def,Ind|Gender=Masc|Number=Sing': {'morph': 'Definite=Def', POS: NOUN},
'NOUN__Definite=Def,Ind|Gender=Neut|Number=Plur,Sing': {'morph': 'Definite=Def', POS: NOUN},
'NOUN__Definite=Def|Gender=Fem|Number=Plur': {'morph': 'Definite=Def|Gender=Fem|Number=Plur', POS: NOUN},
'NOUN__Definite=Def|Gender=Fem|Number=Sing': {'morph': 'Definite=Def|Gender=Fem|Number=Sing', POS: NOUN},
'NOUN__Definite=Def|Gender=Masc|Number=Plur': {'morph': 'Definite=Def|Gender=Masc|Number=Plur', POS: NOUN},
'NOUN__Definite=Def|Gender=Masc|Number=Sing': {'morph': 'Definite=Def|Gender=Masc|Number=Sing', POS: NOUN},
'NOUN__Definite=Def|Gender=Neut|Number=Plur': {'morph': 'Definite=Def|Gender=Neut|Number=Plur', POS: NOUN},
'NOUN__Definite=Def|Gender=Neut|Number=Sing': {'morph': 'Definite=Def|Gender=Neut|Number=Sing', POS: NOUN},
'NOUN__Definite=Def|Number=Plur': {'morph': 'Definite=Def|Number=Plur', POS: NOUN},
'NOUN__Definite=Ind|Gender=Fem|Number=Plur': {'morph': 'Definite=Ind|Gender=Fem|Number=Plur', POS: NOUN},
'NOUN__Definite=Ind|Gender=Fem|Number=Sing': {'morph': 'Definite=Ind|Gender=Fem|Number=Sing', POS: NOUN},
'NOUN__Definite=Ind|Gender=Masc': {'morph': 'Definite=Ind|Gender=Masc', POS: NOUN},
'NOUN__Definite=Ind|Gender=Masc|Number=Plur': {'morph': 'Definite=Ind|Gender=Masc|Number=Plur', POS: NOUN},
'NOUN__Definite=Ind|Gender=Masc|Number=Sing': {'morph': 'Definite=Ind|Gender=Masc|Number=Sing', POS: NOUN},
'NOUN__Definite=Ind|Gender=Neut|Number=Plur': {'morph': 'Definite=Ind|Gender=Neut|Number=Plur', POS: NOUN},
'NOUN__Definite=Ind|Gender=Neut|Number=Sing': {'morph': 'Definite=Ind|Gender=Neut|Number=Sing', POS: NOUN},
'NOUN__Definite=Ind|Number=Plur': {'morph': 'Definite=Ind|Number=Plur', POS: NOUN},
'NOUN__Gender=Fem': {'morph': 'Gender=Fem', POS: NOUN},
'NOUN__Gender=Masc': {'morph': 'Gender=Masc', POS: NOUN},
'NOUN__Gender=Masc|Number=Sing': {'morph': 'Gender=Masc|Number=Sing', POS: NOUN},
'NOUN__Gender=Neut': {'morph': 'Gender=Neut', POS: NOUN},
'NOUN__Number=Plur': {'morph': 'Number=Plur', POS: NOUN},
'NOUN___': {'morph': '_', POS: NOUN},
'NUM__Case=Gen|Number=Plur': {'morph': 'Case=Gen|Number=Plur', POS: NUM},
'NUM__Definite=Def': {'morph': 'Definite=Def', POS: NUM},
'NUM__Definite=Def|Number=Sing': {'morph': 'Definite=Def|Number=Sing', POS: NUM},
'NUM__Gender=Fem|Number=Sing': {'morph': 'Gender=Fem|Number=Sing', POS: NUM},
'NUM__Gender=Masc|Number=Sing': {'morph': 'Gender=Masc|Number=Sing', POS: NUM},
'NUM__Gender=Neut|Number=Sing': {'morph': 'Gender=Neut|Number=Sing', POS: NUM},
'NUM__Number=Plur': {'morph': 'Number=Plur', POS: NUM},
'NUM__Number=Sing': {'morph': 'Number=Sing', POS: NUM},
'NUM___': {'morph': '_', POS: NUM},
'PART___': {'morph': '_', POS: PART},
'PRON__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs': {'morph': 'Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs': {'morph': 'Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=1|PronType=Prs': {'morph': 'Animacy=Anim|Case=Acc|Number=Plur|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=2|PronType=Prs': {'morph': 'Animacy=Anim|Case=Acc|Number=Plur|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=1|PronType=Prs': {'morph': 'Animacy=Anim|Case=Acc|Number=Sing|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=2|PronType=Prs': {'morph': 'Animacy=Anim|Case=Acc|Number=Sing|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Gen,Nom|Number=Sing|PronType=Prs': {'morph': 'Animacy=Anim|Case=Gen', POS: PRON},
'PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs': {'morph': 'Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs', POS: PRON},
'PRON__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs': {'morph': 'Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs': {'morph': 'Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=1|PronType=Prs': {'morph': 'Animacy=Anim|Case=Nom|Number=Plur|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=2|PronType=Prs': {'morph': 'Animacy=Anim|Case=Nom|Number=Plur|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=1|PronType=Prs': {'morph': 'Animacy=Anim|Case=Nom|Number=Sing|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=2|PronType=Prs': {'morph': 'Animacy=Anim|Case=Nom|Number=Sing|Person=', POS: PRON},
'PRON__Animacy=Anim|Case=Nom|Number=Sing|PronType=Prs': {'morph': 'Animacy=Anim|Case=Nom|Number=Sing|PronType=Prs', POS: PRON},
'PRON__Animacy=Anim|Number=Plur|PronType=Rcp': {'morph': 'Animacy=Anim|Number=Plur|PronType=Rcp', POS: PRON},
'PRON__Animacy=Anim|Number=Sing|PronType=Prs': {'morph': 'Animacy=Anim|Number=Sing|PronType=Prs', POS: PRON},
'PRON__Animacy=Anim|Poss=Yes|PronType=Int': {'morph': 'Animacy=Anim|Poss=Yes|PronType=Int', POS: PRON},
'PRON__Animacy=Anim|PronType=Int': {'morph': 'Animacy=Anim|PronType=Int', POS: PRON},
'PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs': {'morph': 'Case=Acc|Number=Plur|Person=', POS: PRON},
'PRON__Case=Acc|Reflex=Yes': {'morph': 'Case=Acc|Reflex=Yes', POS: PRON},
'PRON__Case=Nom|Number=Plur|Person=3|PronType=Prs': {'morph': 'Case=Nom|Number=Plur|Person=', POS: PRON},
'PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs': {'morph': 'Gender=Fem', POS: PRON},
'PRON__Gender=Neut|Number=Sing|Person=3|PronType=Prs': {'morph': 'Gender=Neut|Number=Sing|Person=', POS: PRON},
'PRON__Number=Plur|Person=3|PronType=Prs': {'morph': 'Number=Plur|Person=', POS: PRON},
'PRON__Number=Sing': {'morph': 'Number=Sing', POS: PRON},
'PRON__PronType=Int': {'morph': 'PronType=Int', POS: PRON},
'PRON___': {'morph': '_', POS: PRON},
'PROPN__Case=Gen': {'morph': 'Case=Gen', POS: PROPN},
'PROPN__Case=Gen|Gender=Fem': {'morph': 'Case=Gen|Gender=Fem', POS: PROPN},
'PROPN__Case=Gen|Gender=Masc': {'morph': 'Case=Gen|Gender=Masc', POS: PROPN},
'PROPN__Case=Gen|Gender=Neut': {'morph': 'Case=Gen|Gender=Neut', POS: PROPN},
'PROPN__Gender=Fem': {'morph': 'Gender=Fem', POS: PROPN},
'PROPN__Gender=Masc': {'morph': 'Gender=Masc', POS: PROPN},
'PROPN__Gender=Neut': {'morph': 'Gender=Neut', POS: PROPN},
'PROPN___': {'morph': '_', POS: PROPN},
'PUNCT___': {'morph': '_', POS: PUNCT},
'SCONJ___': {'morph': '_', POS: SCONJ},
'SYM___': {'morph': '_', POS: SYM},
'VERB__Definite=Ind|Number=Sing': {'morph': 'Definite=Ind|Number=Sing', POS: VERB},
'VERB__Mood=Imp|VerbForm=Fin': {'morph': 'Mood=Imp|VerbForm=Fin', POS: VERB},
'VERB__Mood=Ind|Tense=Past|VerbForm=Fin': {'morph': 'Mood=Ind|Tense=Past|VerbForm=Fin', POS: VERB},
'VERB__Mood=Ind|Tense=Past|VerbForm=Fin|Voice=Pass': {'morph': 'Mood=Ind|Tense=Past|VerbForm=Fin|Voice=Pass', POS: VERB},
'VERB__Mood=Ind|Tense=Pres|VerbForm=Fin': {'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin', POS: VERB},
'VERB__Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass': {'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass', POS: VERB},
'VERB__VerbForm=Inf': {'morph': 'VerbForm=Inf', POS: VERB},
'VERB__VerbForm=Inf|Voice=Pass': {'morph': 'VerbForm=Inf|Voice=Pass', POS: VERB},
'VERB__VerbForm=Part': {'morph': 'VerbForm=Part', POS: VERB},
'VERB___': {'morph': '_', POS: VERB},
'X___': {'morph': '_', POS: X}
}