diff --git a/.appveyor.yml b/.appveyor.yml index 0021776aa..dd1824ead 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -32,7 +32,7 @@ test_script: # Note that you must use the environment variable %PYTHON% to refer to # the interpreter you're using - Appveyor does not do anything special # to put the Python version you want to use on PATH. - - "%PYTHON%\\python.exe -m pytest spacy/ --no-print-logs" + - "%PYTHON%\\python.exe -m pytest spacy/" after_test: # This step builds your wheels. diff --git a/.buildkite/train.yml b/.buildkite/train.yml deleted file mode 100644 index b257db87c..000000000 --- a/.buildkite/train.yml +++ /dev/null @@ -1,11 +0,0 @@ -steps: - - - command: "fab env clean make test wheel" - label: ":dizzy: :python:" - artifact_paths: "dist/*.whl" - - wait - - trigger: "spacy-train-from-wheel" - label: ":dizzy: :train:" - build: - env: - SPACY_VERSION: "{$SPACY_VERSION}" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 396472519..0c0ba3144 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -182,7 +182,7 @@ If you've made a contribution to spaCy, you should fill in the [spaCy contributor agreement](.github/CONTRIBUTOR_AGREEMENT.md) to ensure that your contribution can be used across the project. If you agree to be bound by the terms of the agreement, fill in the [template](.github/CONTRIBUTOR_AGREEMENT.md) -and include it with your pull request, or submit it separately to +and include it with your pull request, or sumit it separately to [`.github/contributors/`](/.github/contributors). The name of the file should be your GitHub username, with the extension `.md`. For example, the user example_user would create the file `.github/contributors/example_user.md`. diff --git a/examples/training/conllu.py b/examples/training/conllu.py deleted file mode 100644 index 45c55a1e8..000000000 --- a/examples/training/conllu.py +++ /dev/null @@ -1,392 +0,0 @@ -'''Train for CONLL 2017 UD treebank evaluation. Takes .conllu files, writes -.conllu format for development data, allowing the official scorer to be used. -''' -from __future__ import unicode_literals -import plac -import tqdm -import attr -from pathlib import Path -import re -import sys -import json - -import spacy -import spacy.util -from spacy.tokens import Token, Doc -from spacy.gold import GoldParse -from spacy.syntax.nonproj import projectivize -from collections import defaultdict, Counter -from timeit import default_timer as timer -from spacy.matcher import Matcher - -import itertools -import random -import numpy.random -import cytoolz - -import conll17_ud_eval - -import spacy.lang.zh -import spacy.lang.ja - -spacy.lang.zh.Chinese.Defaults.use_jieba = False -spacy.lang.ja.Japanese.Defaults.use_janome = False - -random.seed(0) -numpy.random.seed(0) - -def minibatch_by_words(items, size=5000): - random.shuffle(items) - if isinstance(size, int): - size_ = itertools.repeat(size) - else: - size_ = size - items = iter(items) - while True: - batch_size = next(size_) - batch = [] - while batch_size >= 0: - try: - doc, gold = next(items) - except StopIteration: - if batch: - yield batch - return - batch_size -= len(doc) - batch.append((doc, gold)) - if batch: - yield batch - else: - break - -################ -# Data reading # -################ - -space_re = re.compile('\s+') -def split_text(text): - return [space_re.sub(' ', par.strip()) for par in text.split('\n\n')] - - -def read_data(nlp, conllu_file, text_file, raw_text=True, oracle_segments=False, - max_doc_length=None, limit=None): - '''Read the CONLLU format into (Doc, GoldParse) tuples. If raw_text=True, - include Doc objects created using nlp.make_doc and then aligned against - the gold-standard sequences. If oracle_segments=True, include Doc objects - created from the gold-standard segments. At least one must be True.''' - if not raw_text and not oracle_segments: - raise ValueError("At least one of raw_text or oracle_segments must be True") - paragraphs = split_text(text_file.read()) - conllu = read_conllu(conllu_file) - # sd is spacy doc; cd is conllu doc - # cs is conllu sent, ct is conllu token - docs = [] - golds = [] - for doc_id, (text, cd) in enumerate(zip(paragraphs, conllu)): - sent_annots = [] - for cs in cd: - sent = defaultdict(list) - for id_, word, lemma, pos, tag, morph, head, dep, _, space_after in cs: - if '.' in id_: - continue - if '-' in id_: - continue - id_ = int(id_)-1 - head = int(head)-1 if head != '0' else id_ - sent['words'].append(word) - sent['tags'].append(tag) - sent['heads'].append(head) - sent['deps'].append('ROOT' if dep == 'root' else dep) - sent['spaces'].append(space_after == '_') - sent['entities'] = ['-'] * len(sent['words']) - sent['heads'], sent['deps'] = projectivize(sent['heads'], - sent['deps']) - if oracle_segments: - docs.append(Doc(nlp.vocab, words=sent['words'], spaces=sent['spaces'])) - golds.append(GoldParse(docs[-1], **sent)) - - sent_annots.append(sent) - if raw_text and max_doc_length and len(sent_annots) >= max_doc_length: - doc, gold = _make_gold(nlp, None, sent_annots) - sent_annots = [] - docs.append(doc) - golds.append(gold) - if limit and len(docs) >= limit: - return docs, golds - - if raw_text and sent_annots: - doc, gold = _make_gold(nlp, None, sent_annots) - docs.append(doc) - golds.append(gold) - if limit and len(docs) >= limit: - return docs, golds - return docs, golds - - -def read_conllu(file_): - docs = [] - sent = [] - doc = [] - for line in file_: - if line.startswith('# newdoc'): - if doc: - docs.append(doc) - doc = [] - elif line.startswith('#'): - continue - elif not line.strip(): - if sent: - doc.append(sent) - sent = [] - else: - sent.append(list(line.strip().split('\t'))) - if len(sent[-1]) != 10: - print(repr(line)) - raise ValueError - if sent: - doc.append(sent) - if doc: - docs.append(doc) - return docs - - -def _make_gold(nlp, text, sent_annots): - # Flatten the conll annotations, and adjust the head indices - flat = defaultdict(list) - for sent in sent_annots: - flat['heads'].extend(len(flat['words'])+head for head in sent['heads']) - for field in ['words', 'tags', 'deps', 'entities', 'spaces']: - flat[field].extend(sent[field]) - # Construct text if necessary - assert len(flat['words']) == len(flat['spaces']) - if text is None: - text = ''.join(word+' '*space for word, space in zip(flat['words'], flat['spaces'])) - doc = nlp.make_doc(text) - flat.pop('spaces') - gold = GoldParse(doc, **flat) - return doc, gold - -############################# -# Data transforms for spaCy # -############################# - -def golds_to_gold_tuples(docs, golds): - '''Get out the annoying 'tuples' format used by begin_training, given the - GoldParse objects.''' - tuples = [] - for doc, gold in zip(docs, golds): - text = doc.text - ids, words, tags, heads, labels, iob = zip(*gold.orig_annot) - sents = [((ids, words, tags, heads, labels, iob), [])] - tuples.append((text, sents)) - return tuples - - -############## -# Evaluation # -############## - -def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None): - with text_loc.open('r', encoding='utf8') as text_file: - texts = split_text(text_file.read()) - docs = list(nlp.pipe(texts)) - with sys_loc.open('w', encoding='utf8') as out_file: - write_conllu(docs, out_file) - with gold_loc.open('r', encoding='utf8') as gold_file: - gold_ud = conll17_ud_eval.load_conllu(gold_file) - with sys_loc.open('r', encoding='utf8') as sys_file: - sys_ud = conll17_ud_eval.load_conllu(sys_file) - scores = conll17_ud_eval.evaluate(gold_ud, sys_ud) - return scores - - -def write_conllu(docs, file_): - merger = Matcher(docs[0].vocab) - merger.add('SUBTOK', None, [{'DEP': 'subtok', 'op': '+'}]) - for i, doc in enumerate(docs): - matches = merger(doc) - spans = [doc[start:end+1] for _, start, end in matches] - offsets = [(span.start_char, span.end_char) for span in spans] - for start_char, end_char in offsets: - doc.merge(start_char, end_char) - file_.write("# newdoc id = {i}\n".format(i=i)) - for j, sent in enumerate(doc.sents): - file_.write("# sent_id = {i}.{j}\n".format(i=i, j=j)) - file_.write("# text = {text}\n".format(text=sent.text)) - for k, token in enumerate(sent): - file_.write(token._.get_conllu_lines(k) + '\n') - file_.write('\n') - - -def print_progress(itn, losses, ud_scores): - fields = { - 'dep_loss': losses.get('parser', 0.0), - 'tag_loss': losses.get('tagger', 0.0), - 'words': ud_scores['Words'].f1 * 100, - 'sents': ud_scores['Sentences'].f1 * 100, - 'tags': ud_scores['XPOS'].f1 * 100, - 'uas': ud_scores['UAS'].f1 * 100, - 'las': ud_scores['LAS'].f1 * 100, - } - header = ['Epoch', 'Loss', 'LAS', 'UAS', 'TAG', 'SENT', 'WORD'] - if itn == 0: - print('\t'.join(header)) - tpl = '\t'.join(( - '{:d}', - '{dep_loss:.1f}', - '{las:.1f}', - '{uas:.1f}', - '{tags:.1f}', - '{sents:.1f}', - '{words:.1f}', - )) - print(tpl.format(itn, **fields)) - -#def get_sent_conllu(sent, sent_id): -# lines = ["# sent_id = {sent_id}".format(sent_id=sent_id)] - -def get_token_conllu(token, i): - if token._.begins_fused: - n = 1 - while token.nbor(n)._.inside_fused: - n += 1 - id_ = '%d-%d' % (i, i+n) - lines = [id_, token.text, '_', '_', '_', '_', '_', '_', '_', '_'] - else: - lines = [] - if token.head.i == token.i: - head = 0 - else: - head = i + (token.head.i - token.i) + 1 - fields = [str(i+1), token.text, token.lemma_, token.pos_, token.tag_, '_', - str(head), token.dep_.lower(), '_', '_'] - lines.append('\t'.join(fields)) - return '\n'.join(lines) - -Token.set_extension('get_conllu_lines', method=get_token_conllu) -Token.set_extension('begins_fused', default=False) -Token.set_extension('inside_fused', default=False) - - -################## -# Initialization # -################## - - -def load_nlp(corpus, config): - lang = corpus.split('_')[0] - nlp = spacy.blank(lang) - if config.vectors: - nlp.vocab.from_disk(config.vectors / 'vocab') - return nlp - -def initialize_pipeline(nlp, docs, golds, config): - nlp.add_pipe(nlp.create_pipe('parser')) - if config.multitask_tag: - nlp.parser.add_multitask_objective('tag') - if config.multitask_sent: - nlp.parser.add_multitask_objective('sent_start') - nlp.parser.moves.add_action(2, 'subtok') - nlp.add_pipe(nlp.create_pipe('tagger')) - for gold in golds: - for tag in gold.tags: - if tag is not None: - nlp.tagger.add_label(tag) - # Replace labels that didn't make the frequency cutoff - actions = set(nlp.parser.labels) - label_set = set([act.split('-')[1] for act in actions if '-' in act]) - for gold in golds: - for i, label in enumerate(gold.labels): - if label is not None and label not in label_set: - gold.labels[i] = label.split('||')[0] - return nlp.begin_training(lambda: golds_to_gold_tuples(docs, golds)) - - -######################## -# Command line helpers # -######################## - -@attr.s -class Config(object): - vectors = attr.ib(default=None) - max_doc_length = attr.ib(default=10) - multitask_tag = attr.ib(default=True) - multitask_sent = attr.ib(default=True) - nr_epoch = attr.ib(default=30) - batch_size = attr.ib(default=1000) - dropout = attr.ib(default=0.2) - - @classmethod - def load(cls, loc): - with Path(loc).open('r', encoding='utf8') as file_: - cfg = json.load(file_) - return cls(**cfg) - - -class Dataset(object): - def __init__(self, path, section): - self.path = path - self.section = section - self.conllu = None - self.text = None - for file_path in self.path.iterdir(): - name = file_path.parts[-1] - if section in name and name.endswith('conllu'): - self.conllu = file_path - elif section in name and name.endswith('txt'): - self.text = file_path - if self.conllu is None: - msg = "Could not find .txt file in {path} for {section}" - raise IOError(msg.format(section=section, path=path)) - if self.text is None: - msg = "Could not find .txt file in {path} for {section}" - self.lang = self.conllu.parts[-1].split('-')[0].split('_')[0] - - -class TreebankPaths(object): - def __init__(self, ud_path, treebank, **cfg): - self.train = Dataset(ud_path / treebank, 'train') - self.dev = Dataset(ud_path / treebank, 'dev') - self.lang = self.train.lang - - -@plac.annotations( - ud_dir=("Path to Universal Dependencies corpus", "positional", None, Path), - corpus=("UD corpus to train and evaluate on, e.g. en, es_ancora, etc", - "positional", None, str), - parses_dir=("Directory to write the development parses", "positional", None, Path), - config=("Path to json formatted config file", "positional", None, Config.load), - limit=("Size limit", "option", "n", int) -) -def main(ud_dir, parses_dir, config, corpus, limit=0): - paths = TreebankPaths(ud_dir, corpus) - if not (parses_dir / corpus).exists(): - (parses_dir / corpus).mkdir() - print("Train and evaluate", corpus, "using lang", paths.lang) - nlp = load_nlp(paths.lang, config) - - docs, golds = read_data(nlp, paths.train.conllu.open(), paths.train.text.open(), - max_doc_length=config.max_doc_length, limit=limit) - - optimizer = initialize_pipeline(nlp, docs, golds, config) - - for i in range(config.nr_epoch): - docs = [nlp.make_doc(doc.text) for doc in docs] - batches = minibatch_by_words(list(zip(docs, golds)), size=config.batch_size) - losses = {} - n_train_words = sum(len(doc) for doc in docs) - with tqdm.tqdm(total=n_train_words, leave=False) as pbar: - for batch in batches: - batch_docs, batch_gold = zip(*batch) - pbar.update(sum(len(doc) for doc in batch_docs)) - nlp.update(batch_docs, batch_gold, sgd=optimizer, - drop=config.dropout, losses=losses) - - out_path = parses_dir / corpus / 'epoch-{i}.conllu'.format(i=i) - with nlp.use_params(optimizer.averages): - scores = evaluate(nlp, paths.dev.text, paths.dev.conllu, out_path) - print_progress(i, losses, scores) - - -if __name__ == '__main__': - plac.call(main) diff --git a/examples/vectors_tensorboard_standalone.py b/examples/vectors_tensorboard_standalone.py new file mode 100644 index 000000000..7a9abf785 --- /dev/null +++ b/examples/vectors_tensorboard_standalone.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +# coding: utf8 +"""Export spaCy model vectors for use in TensorBoard's standalone embedding projector. +https://github.com/tensorflow/embedding-projector-standalone + +Usage: + + python vectors_tensorboard_standalone.py ./myVectorModel ./output [name] + +This outputs two files that have to be copied into the "oss_data" of the standalone projector: + + [name]_labels.tsv - metadata such as human readable labels for vectors + [name]_tensors.bytes - numpy.ndarray of numpy.float32 precision vectors + +""" +from __future__ import unicode_literals + +import json +import math +from os import path + +import numpy +import plac +import spacy +import tqdm + + +@plac.annotations( + vectors_loc=("Path to spaCy model that contains vectors", "positional", None, str), + out_loc=("Path to output folder writing tensors and labels data", "positional", None, str), + name=("Human readable name for tsv file and vectors tensor", "positional", None, str), +) +def main(vectors_loc, out_loc, name="spaCy_vectors"): + # A tab-separated file that contains information about the vectors for visualization + # + # Learn more: https://www.tensorflow.org/programmers_guide/embedding#metadata + meta_file = "{}_labels.tsv".format(name) + out_meta_file = path.join(out_loc, meta_file) + + print('Loading spaCy vectors model: {}'.format(vectors_loc)) + model = spacy.load(vectors_loc) + + print('Finding lexemes with vectors attached: {}'.format(vectors_loc)) + voacb_strings = [ + w for w in tqdm.tqdm(model.vocab.strings, total=len(model.vocab.strings), leave=False) + if model.vocab.has_vector(w) + ] + vector_count = len(voacb_strings) + + print('Building Projector labels for {} vectors: {}'.format(vector_count, out_meta_file)) + vector_dimensions = model.vocab.vectors.shape[1] + tf_vectors_variable = numpy.zeros((vector_count, vector_dimensions), dtype=numpy.float32) + + # Write a tab-separated file that contains information about the vectors for visualization + # + # Reference: https://www.tensorflow.org/programmers_guide/embedding#metadata + with open(out_meta_file, 'wb') as file_metadata: + # Define columns in the first row + file_metadata.write("Text\tFrequency\n".encode('utf-8')) + # Write out a row for each vector that we add to the tensorflow variable we created + vec_index = 0 + + for text in tqdm.tqdm(voacb_strings, total=len(voacb_strings), leave=False): + # https://github.com/tensorflow/tensorflow/issues/9094 + text = '' if text.lstrip() == '' else text + lex = model.vocab[text] + + # Store vector data and metadata + tf_vectors_variable[vec_index] = numpy.float64(model.vocab.get_vector(text)) + file_metadata.write("{}\t{}\n".format(text, math.exp(lex.prob) * len(voacb_strings)).encode('utf-8')) + vec_index += 1 + + # Write out "[name]_tensors.bytes" file for standalone embeddings projector to load + tensor_path = '{}_tensors.bytes'.format(name) + tf_vectors_variable.tofile(path.join(out_loc, tensor_path)) + + print('Done.') + print('Add the following entry to "oss_data/oss_demo_projector_config.json"') + print(json.dumps({ + "tensorName": name, + "tensorShape": [vector_count, vector_dimensions], + "tensorPath": 'oss_data/{}'.format(tensor_path), + "metadataPath": 'oss_data/{}'.format(meta_file) + }, indent=2)) + + +if __name__ == '__main__': + plac.call(main) diff --git a/fabfile.py b/fabfile.py index cca5f183e..2894fe477 100644 --- a/fabfile.py +++ b/fabfile.py @@ -1,92 +1,49 @@ # coding: utf-8 from __future__ import unicode_literals, print_function -import contextlib -from pathlib import Path from fabric.api import local, lcd, env, settings, prefix +from fabtools.python import virtualenv from os import path, environ -import shutil PWD = path.dirname(__file__) ENV = environ['VENV_DIR'] if 'VENV_DIR' in environ else '.env' -VENV_DIR = Path(PWD) / ENV +VENV_DIR = path.join(PWD, ENV) -@contextlib.contextmanager -def virtualenv(name, create=False, python='/usr/bin/python3.6'): - python = Path(python).resolve() - env_path = VENV_DIR - if create: - if env_path.exists(): - shutil.rmtree(str(env_path)) - local('{python} -m venv {env_path}'.format(python=python, env_path=VENV_DIR)) - def wrapped_local(cmd, env_vars=[], capture=False, direct=False): - return local('source {}/bin/activate && {}'.format(env_path, cmd), - shell='/bin/bash', capture=False) - yield wrapped_local - - -def env(lang='python3.6'): - if VENV_DIR.exists(): +def env(lang='python2.7'): + if path.exists(VENV_DIR): local('rm -rf {env}'.format(env=VENV_DIR)) - if lang.startswith('python3'): - local('{lang} -m venv {env}'.format(lang=lang, env=VENV_DIR)) - else: - local('{lang} -m pip install virtualenv --no-cache-dir'.format(lang=lang)) - local('{lang} -m virtualenv {env} --no-cache-dir'.format(lang=lang, env=VENV_DIR)) - with virtualenv(VENV_DIR) as venv_local: - print(venv_local('python --version', capture=True)) - venv_local('pip install --upgrade setuptools --no-cache-dir') - venv_local('pip install pytest --no-cache-dir') - venv_local('pip install wheel --no-cache-dir') - venv_local('pip install -r requirements.txt --no-cache-dir') - venv_local('pip install pex --no-cache-dir') - + local('pip install virtualenv') + local('python -m virtualenv -p {lang} {env}'.format(lang=lang, env=VENV_DIR)) def install(): - with virtualenv(VENV_DIR) as venv_local: - venv_local('pip install dist/*.tar.gz') + with virtualenv(VENV_DIR): + local('pip install --upgrade setuptools') + local('pip install dist/*.tar.gz') + local('pip install pytest') def make(): - with lcd(path.dirname(__file__)): - local('export PYTHONPATH=`pwd` && source .env/bin/activate && python setup.py build_ext --inplace', - shell='/bin/bash') + with virtualenv(VENV_DIR): + with lcd(path.dirname(__file__)): + local('pip install cython') + local('pip install murmurhash') + local('pip install -r requirements.txt') + local('python setup.py build_ext --inplace') def sdist(): - with virtualenv(VENV_DIR) as venv_local: + with virtualenv(VENV_DIR): with lcd(path.dirname(__file__)): local('python setup.py sdist') -def wheel(): - with virtualenv(VENV_DIR) as venv_local: - with lcd(path.dirname(__file__)): - venv_local('python setup.py bdist_wheel') - -def pex(): - with virtualenv(VENV_DIR) as venv_local: - with lcd(path.dirname(__file__)): - sha = local('git rev-parse --short HEAD', capture=True) - venv_local('pex dist/*.whl -e spacy -o dist/spacy-%s.pex' % sha, - direct=True) - - def clean(): with lcd(path.dirname(__file__)): - local('rm -f dist/*.whl') - local('rm -f dist/*.pex') - with virtualenv(VENV_DIR) as venv_local: - venv_local('python setup.py clean --all') + local('python setup.py clean --all') def test(): - with virtualenv(VENV_DIR) as venv_local: + with virtualenv(VENV_DIR): with lcd(path.dirname(__file__)): - venv_local('pytest -x spacy/tests') - -def train(): - args = environ.get('SPACY_TRAIN_ARGS', '') - with virtualenv(VENV_DIR) as venv_local: - venv_local('spacy train {args}'.format(args=args)) + local('py.test -x spacy/tests') diff --git a/requirements.txt b/requirements.txt index a0d05d6d0..154e172eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,8 +5,8 @@ cymem>=1.30,<1.32 preshed>=1.0.0,<2.0.0 thinc>=6.11.1.dev10,<6.12.0 murmurhash>=0.28,<0.29 -cytoolz>=0.9.0,<0.10.0 plac<1.0.0,>=0.9.6 +six ujson>=1.35 dill>=0.2,<0.3 requests>=2.13.0,<3.0.0 @@ -16,3 +16,4 @@ pytest>=3.0.6,<4.0.0 mock>=2.0.0,<3.0.0 msgpack-python==0.5.4 msgpack-numpy==0.4.1 +html5lib==1.0b8 diff --git a/setup.py b/setup.py index 00232cc85..40140d3f2 100755 --- a/setup.py +++ b/setup.py @@ -18,7 +18,6 @@ PACKAGES = find_packages() MOD_NAMES = [ - 'spacy._align', 'spacy.parts_of_speech', 'spacy.strings', 'spacy.lexeme', @@ -192,6 +191,8 @@ def setup_package(): 'preshed>=1.0.0,<2.0.0', 'thinc>=6.11.1.dev10,<6.12.0', 'plac<1.0.0,>=0.9.6', + 'six', + 'html5lib==1.0b8', 'pathlib', 'ujson>=1.35', 'dill>=0.2,<0.3', @@ -200,7 +201,6 @@ def setup_package(): 'ftfy>=4.4.2,<5.0.0', 'msgpack-python==0.5.4', 'msgpack-numpy==0.4.1'], - setup_requires=['wheel'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Console', diff --git a/spacy/__main__.py b/spacy/__main__.py index 897d890c2..5a302d77e 100644 --- a/spacy/__main__.py +++ b/spacy/__main__.py @@ -8,7 +8,6 @@ if __name__ == '__main__': import sys from spacy.cli import download, link, info, package, train, convert from spacy.cli import vocab, init_model, profile, evaluate, validate - from spacy.cli import ud_train, ud_evaluate from spacy.util import prints commands = { @@ -16,9 +15,7 @@ if __name__ == '__main__': 'link': link, 'info': info, 'train': train, - 'ud-train': ud_train, 'evaluate': evaluate, - 'ud-evaluate': ud_evaluate, 'convert': convert, 'package': package, 'vocab': vocab, diff --git a/spacy/_align.pyx b/spacy/_align.pyx deleted file mode 100644 index 07b6efbd4..000000000 --- a/spacy/_align.pyx +++ /dev/null @@ -1,251 +0,0 @@ -# cython: infer_types=True -'''Do Levenshtein alignment, for evaluation of tokenized input. - -Random notes: - - r i n g - 0 1 2 3 4 -r 1 0 1 2 3 -a 2 1 1 2 3 -n 3 2 2 1 2 -g 4 3 3 2 1 - -0,0: (1,1)=min(0+0,1+1,1+1)=0 S -1,0: (2,1)=min(1+1,0+1,2+1)=1 D -2,0: (3,1)=min(2+1,3+1,1+1)=2 D -3,0: (4,1)=min(3+1,4+1,2+1)=3 D -0,1: (1,2)=min(1+1,2+1,0+1)=1 D -1,1: (2,2)=min(0+1,1+1,1+1)=1 S -2,1: (3,2)=min(1+1,1+1,2+1)=2 S or I -3,1: (4,2)=min(2+1,2+1,3+1)=3 S or I -0,2: (1,3)=min(2+1,3+1,1+1)=2 I -1,2: (2,3)=min(1+1,2+1,1+1)=2 S or I -2,2: (3,3) -3,2: (4,3) -At state (i, j) we're asking "How do I transform S[:i+1] to T[:j+1]?" - -We know the costs to transition: - -S[:i] -> T[:j] (at D[i,j]) -S[:i+1] -> T[:j] (at D[i+1,j]) -S[:i] -> T[:j+1] (at D[i,j+1]) - -Further, we now we can tranform: -S[:i+1] -> S[:i] (DEL) for 1, -T[:j+1] -> T[:j] (INS) for 1. -S[i+1] -> T[j+1] (SUB) for 0 or 1 - -Therefore we have the costs: -SUB: Cost(S[:i]->T[:j]) + Cost(S[i]->S[j]) -i.e. D[i, j] + S[i+1] != T[j+1] -INS: Cost(S[:i+1]->T[:j]) + Cost(T[:j+1]->T[:j]) -i.e. D[i+1,j] + 1 -DEL: Cost(S[:i]->T[:j+1]) + Cost(S[:i+1]->S[:i]) -i.e. D[i,j+1] + 1 - - Source string S has length m, with index i - Target string T has length n, with index j - - Output two alignment vectors: i2j (length m) and j2i (length n) - # function LevenshteinDistance(char s[1..m], char t[1..n]): - # for all i and j, d[i,j] will hold the Levenshtein distance between - # the first i characters of s and the first j characters of t - # note that d has (m+1)*(n+1) values - # set each element in d to zero - ring rang - - r i n g - - 0 0 0 0 0 - r 0 0 0 0 0 - a 0 0 0 0 0 - n 0 0 0 0 0 - g 0 0 0 0 0 - - # source prefixes can be transformed into empty string by - # dropping all characters - # d[i, 0] := i - ring rang - - r i n g - - 0 0 0 0 0 - r 1 0 0 0 0 - a 2 0 0 0 0 - n 3 0 0 0 0 - g 4 0 0 0 0 - - # target prefixes can be reached from empty source prefix - # by inserting every character - # d[0, j] := j - - r i n g - - 0 1 2 3 4 - r 1 0 0 0 0 - a 2 0 0 0 0 - n 3 0 0 0 0 - g 4 0 0 0 0 - -''' -from __future__ import unicode_literals -from libc.stdint cimport uint32_t -import numpy -cimport numpy as np -from .compat import unicode_ -from murmurhash.mrmr cimport hash32 - - -def align(S, T): - cdef int m = len(S) - cdef int n = len(T) - cdef np.ndarray matrix = numpy.zeros((m+1, n+1), dtype='int32') - cdef np.ndarray i2j = numpy.zeros((m,), dtype='i') - cdef np.ndarray j2i = numpy.zeros((n,), dtype='i') - - cdef np.ndarray S_arr = _convert_sequence(S) - cdef np.ndarray T_arr = _convert_sequence(T) - - fill_matrix(matrix.data, - S_arr.data, m, T_arr.data, n) - fill_i2j(i2j, matrix) - fill_j2i(j2i, matrix) - for i in range(i2j.shape[0]): - if i2j[i] >= 0 and len(S[i]) != len(T[i2j[i]]): - i2j[i] = -1 - for j in range(j2i.shape[0]): - if j2i[j] >= 0 and len(T[j]) != len(S[j2i[j]]): - j2i[j] = -1 - return matrix[-1,-1], i2j, j2i, matrix - - -def multi_align(np.ndarray i2j, np.ndarray j2i, i_lengths, j_lengths): - '''Let's say we had: - - Guess: [aa bb cc dd] - Truth: [aa bbcc dd] - i2j: [0, None, -2, 2] - j2i: [0, -2, 3] - - We want: - - i2j_multi: {1: 1, 2: 1} - j2i_multi: {} - ''' - i2j_miss = _get_regions(i2j, i_lengths) - j2i_miss = _get_regions(j2i, j_lengths) - - i2j_multi, j2i_multi = _get_mapping(i2j_miss, j2i_miss, i_lengths, j_lengths) - return i2j_multi, j2i_multi - - -def _get_regions(alignment, lengths): - regions = {} - start = None - offset = 0 - for i in range(len(alignment)): - if alignment[i] < 0: - if start is None: - start = offset - regions.setdefault(start, []) - regions[start].append(i) - else: - start = None - offset += lengths[i] - return regions - - -def _get_mapping(miss1, miss2, lengths1, lengths2): - i2j = {} - j2i = {} - for start, region1 in miss1.items(): - if not region1 or start not in miss2: - continue - region2 = miss2[start] - if sum(lengths1[i] for i in region1) == sum(lengths2[i] for i in region2): - j = region2.pop(0) - buff = [] - # Consume tokens from region 1, until we meet the length of the - # first token in region2. If we do, align the tokens. If - # we exceed the length, break. - while region1: - buff.append(region1.pop(0)) - if sum(lengths1[i] for i in buff) == lengths2[j]: - for i in buff: - i2j[i] = j - j2i[j] = buff[-1] - j += 1 - buff = [] - elif sum(lengths1[i] for i in buff) > lengths2[j]: - break - else: - if buff and sum(lengths1[i] for i in buff) == lengths2[j]: - for i in buff: - i2j[i] = j - j2i[j] = buff[-1] - return i2j, j2i - - -def _convert_sequence(seq): - if isinstance(seq, numpy.ndarray): - return numpy.ascontiguousarray(seq, dtype='uint32_t') - cdef np.ndarray output = numpy.zeros((len(seq),), dtype='uint32') - cdef bytes item_bytes - for i, item in enumerate(seq): - if isinstance(item, unicode): - item_bytes = item.encode('utf8') - else: - item_bytes = item - output[i] = hash32(item_bytes, len(item_bytes), 0) - return output - - -cdef void fill_matrix(int* D, - const int* S, int m, const int* T, int n) nogil: - m1 = m+1 - n1 = n+1 - for i in range(m1*n1): - D[i] = 0 - - for i in range(m1): - D[i*n1] = i - - for j in range(n1): - D[j] = j - - cdef int sub_cost, ins_cost, del_cost - for j in range(n): - for i in range(m): - i_j = i*n1 + j - i1_j1 = (i+1)*n1 + j+1 - i1_j = (i+1)*n1 + j - i_j1 = i*n1 + j+1 - if S[i] != T[j]: - sub_cost = D[i_j] + 1 - else: - sub_cost = D[i_j] - del_cost = D[i_j1] + 1 - ins_cost = D[i1_j] + 1 - best = min(min(sub_cost, ins_cost), del_cost) - D[i1_j1] = best - - -cdef void fill_i2j(np.ndarray i2j, np.ndarray D) except *: - j = D.shape[1]-2 - cdef int i = D.shape[0]-2 - while i >= 0: - while D[i+1, j] < D[i+1, j+1]: - j -= 1 - if D[i, j+1] < D[i+1, j+1]: - i2j[i] = -1 - else: - i2j[i] = j - j -= 1 - i -= 1 - -cdef void fill_j2i(np.ndarray j2i, np.ndarray D) except *: - i = D.shape[0]-2 - cdef int j = D.shape[1]-2 - while j >= 0: - while D[i, j+1] < D[i+1, j+1]: - i -= 1 - if D[i+1, j] < D[i+1, j+1]: - j2i[j] = -1 - else: - j2i[j] = i - i -= 1 - j -= 1 diff --git a/spacy/_matcher2_notes.py b/spacy/_matcher2_notes.py deleted file mode 100644 index ece1c9d48..000000000 --- a/spacy/_matcher2_notes.py +++ /dev/null @@ -1,251 +0,0 @@ -import pytest - - -class Vocab(object): - pass - - -class Doc(list): - def __init__(self, vocab, words=None): - list.__init__(self) - self.extend([Token(i, w) for i, w in enumerate(words)]) - - -class Token(object): - def __init__(self, i, word): - self.i = i - self.text = word - - -def find_matches(patterns, doc): - init_states = [(pattern, 0, None) for pattern in patterns] - curr_states = [] - matches = [] - for token in doc: - nexts = [] - for state in (curr_states + init_states): - matches, nexts = transition(state, token, matches, nexts) - curr_states = nexts - return matches - - -def transition(state, token, matches, nexts): - action = get_action(state, token) - is_match, keep_state, advance_state = [bool(int(c)) for c in action] - pattern, i, start = state - if start is None: - start = token.i - if is_match: - matches.append((pattern, start, token.i+1)) - if advance_state: - nexts.append((pattern, i+1, start)) - if keep_state: - # TODO: This needs to be zero-width :(. - nexts.append((pattern, i, start)) - return (matches, nexts) - - -def get_action(state, token): - '''We need to consider: - - a) Does the token match the specification? [Yes, No] - b) What's the quantifier? [1, 0+, ?] - c) Is this the last specification? [final, non-final] - - We can transition in the following ways: - - a) Do we emit a match? - b) Do we add a state with (next state, next token)? - c) Do we add a state with (next state, same token)? - d) Do we add a state with (same state, next token)? - - We'll code the actions as boolean strings, so 0000 means no to all 4, - 1000 means match but no states added, etc. - - 1: - Yes, final: - 1000 - Yes, non-final: - 0100 - No, final: - 0000 - No, non-final - 0000 - 0+: - Yes, final: - 1001 - Yes, non-final: - 0111 - No, final: - 1000 (note: Don't include last token!) - No, non-final: - 0010 - ?: - Yes, final: - 1000 - Yes, non-final: - 0100 - No, final: - 1000 (note: Don't include last token!) - No, non-final: - 0010 - - Problem: If a quantifier is matching, we're adding a lot of open partials - ''' - is_match = get_is_match(state, token) - operator = get_operator(state, token) - is_final = get_is_final(state, token) - raise NotImplementedError - - -def get_is_match(state, token): - pattern, i, start = state - is_match = token.text == pattern[i]['spec'] - if pattern[i].get('invert'): - return not is_match - else: - return is_match - -def get_is_final(state, token): - pattern, i, start = state - return i == len(pattern)-1 - -def get_operator(state, token): - pattern, i, start = state - return pattern[i].get('op', '1') - - -######################## -# Tests for get_action # -######################## - - -def test_get_action_simple_match(): - pattern = [{'spec': 'a', 'op': '1'}] - doc = Doc(Vocab(), words=['a']) - state = (pattern, 0, None) - action = get_action(state, doc[0]) - assert action == '100' - - -def test_get_action_simple_reject(): - pattern = [{'spec': 'b', 'op': '1'}] - doc = Doc(Vocab(), words=['a']) - state = (pattern, 0, None) - action = get_action(state, doc[0]) - assert action == '000' - - -def test_get_action_simple_match_match(): - pattern = [{'spec': 'a', 'op': '1'}, {'spec': 'a', 'op': '1'}] - doc = Doc(Vocab(), words=['a', 'a']) - state = (pattern, 0, None) - action = get_action(state, doc[0]) - assert action == '001' - state = (pattern, 1, 0) - action = get_action(state, doc[1]) - assert action == '100' - - -def test_get_action_simple_match_reject(): - pattern = [{'spec': 'a', 'op': '1'}, {'spec': 'b', 'op': '1'}] - doc = Doc(Vocab(), words=['a', 'a']) - state = (pattern, 0, None) - action = get_action(state, doc[0]) - assert action == '001' - state = (pattern, 1, 0) - action = get_action(state, doc[1]) - assert action == '000' - - -def test_get_action_simple_match_reject(): - pattern = [{'spec': 'a', 'op': '1'}, {'spec': 'b', 'op': '1'}] - doc = Doc(Vocab(), words=['a', 'a']) - state = (pattern, 0, None) - action = get_action(state, doc[0]) - assert action == '001' - state = (pattern, 1, 0) - action = get_action(state, doc[1]) - assert action == '000' - - -def test_get_action_plus_match(): - pattern = [{'spec': 'a', 'op': '1+'}] - doc = Doc(Vocab(), words=['a']) - state = (pattern, 0, None) - action = get_action(state, doc[0]) - assert action == '110' - - -def test_get_action_plus_match_match(): - pattern = [{'spec': 'a', 'op': '1+'}] - doc = Doc(Vocab(), words=['a', 'a']) - state = (pattern, 0, None) - action = get_action(state, doc[0]) - assert action == '110' - state = (pattern, 0, 0) - action = get_action(state, doc[1]) - assert action == '110' - - -########################## -# Tests for find_matches # -########################## - -def test_find_matches_simple_accept(): - pattern = [{'spec': 'a', 'op': '1'}] - doc = Doc(Vocab(), words=['a']) - matches = find_matches([pattern], doc) - assert matches == [(pattern, 0, 1)] - - -def test_find_matches_simple_reject(): - pattern = [{'spec': 'a', 'op': '1'}] - doc = Doc(Vocab(), words=['b']) - matches = find_matches([pattern], doc) - assert matches == [] - - -def test_find_matches_match_twice(): - pattern = [{'spec': 'a', 'op': '1'}] - doc = Doc(Vocab(), words=['a', 'a']) - matches = find_matches([pattern], doc) - assert matches == [(pattern, 0, 1), (pattern, 1, 2)] - - -def test_find_matches_longer_pattern(): - pattern = [{'spec': 'a', 'op': '1'}, {'spec': 'b', 'op': '1'}] - doc = Doc(Vocab(), words=['a', 'b']) - matches = find_matches([pattern], doc) - assert matches == [(pattern, 0, 2)] - - -def test_find_matches_two_patterns(): - patterns = [[{'spec': 'a', 'op': '1'}], [{'spec': 'b', 'op': '1'}]] - doc = Doc(Vocab(), words=['a', 'b']) - matches = find_matches(patterns, doc) - assert matches == [(patterns[0], 0, 1), (patterns[1], 1, 2)] - - -def test_find_matches_two_patterns_overlap(): - patterns = [[{'spec': 'a'}, {'spec': 'b'}], - [{'spec': 'b'}, {'spec': 'c'}]] - doc = Doc(Vocab(), words=['a', 'b', 'c']) - matches = find_matches(patterns, doc) - assert matches == [(patterns[0], 0, 2), (patterns[1], 1, 3)] - - -def test_find_matches_greedy(): - patterns = [[{'spec': 'a', 'op': '1+'}]] - doc = Doc(Vocab(), words=['a']) - matches = find_matches(patterns, doc) - assert matches == [(patterns[0], 0, 1)] - doc = Doc(Vocab(), words=['a', 'a']) - matches = find_matches(patterns, doc) - assert matches == [(patterns[0], 0, 1), (patterns[0], 0, 2), (patterns[0], 1, 2)] - -def test_find_matches_non_greedy(): - patterns = [[{'spec': 'a', 'op': '0+'}, {'spec': 'b', "op": "1"}]] - doc = Doc(Vocab(), words=['b']) - matches = find_matches(patterns, doc) - assert matches == [(patterns[0], 0, 1)] diff --git a/spacy/_ml.py b/spacy/_ml.py index 0f5cb8ed8..e5d1cfc63 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -64,6 +64,23 @@ def _flatten_add_lengths(seqs, pad=0, drop=0.): return (X, lengths), finish_update +@layerize +def _logistic(X, drop=0.): + xp = get_array_module(X) + if not isinstance(X, xp.ndarray): + X = xp.asarray(X) + # Clip to range (-10, 10) + X = xp.minimum(X, 10., X) + X = xp.maximum(X, -10., X) + Y = 1. / (1. + xp.exp(-X)) + + def logistic_bwd(dY, sgd=None): + dX = dY * (Y * (1-Y)) + return dX + + return Y, logistic_bwd + + def _zero_init(model): def _zero_init_impl(self, X, y): self.W.fill(0) @@ -127,8 +144,8 @@ class PrecomputableAffine(Model): self.nF = nF def begin_update(self, X, drop=0.): - Yf = self.ops.gemm(X, - self.W.reshape((self.nF*self.nO*self.nP, self.nI)), trans2=True) + Yf = self.ops.xp.dot(X, + self.W.reshape((self.nF*self.nO*self.nP, self.nI)).T) Yf = Yf.reshape((Yf.shape[0], self.nF, self.nO, self.nP)) Yf = self._add_padding(Yf) @@ -144,11 +161,11 @@ class PrecomputableAffine(Model): Wopfi = self.W.transpose((1, 2, 0, 3)) Wopfi = self.ops.xp.ascontiguousarray(Wopfi) Wopfi = Wopfi.reshape((self.nO*self.nP, self.nF * self.nI)) - dXf = self.ops.gemm(dY.reshape((dY.shape[0], self.nO*self.nP)), Wopfi) + dXf = self.ops.dot(dY.reshape((dY.shape[0], self.nO*self.nP)), Wopfi) # Reuse the buffer dWopfi = Wopfi; dWopfi.fill(0.) - self.ops.gemm(dY, Xf, out=dWopfi, trans1=True) + self.ops.xp.dot(dY.T, Xf, out=dWopfi) dWopfi = dWopfi.reshape((self.nO, self.nP, self.nF, self.nI)) # (o, p, f, i) --> (f, o, p, i) self.d_W += dWopfi.transpose((2, 0, 1, 3)) @@ -450,7 +467,6 @@ def SpacyVectors(docs, drop=0.): def build_text_classifier(nr_class, width=64, **cfg): - depth = cfg.get('depth', 2) nr_vector = cfg.get('nr_vector', 5000) pretrained_dims = cfg.get('pretrained_dims', 0) with Model.define_operators({'>>': chain, '+': add, '|': concatenate, @@ -502,7 +518,7 @@ def build_text_classifier(nr_class, width=64, **cfg): LN(Maxout(width, vectors_width)) >> Residual( (ExtractWindow(nW=1) >> LN(Maxout(width, width*3))) - ) ** depth, pad=depth + ) ** 2, pad=2 ) >> flatten_add_lengths >> ParametricAttention(width) @@ -515,6 +531,8 @@ def build_text_classifier(nr_class, width=64, **cfg): _preprocess_doc >> LinearModel(nr_class) ) + #model = linear_model >> logistic + model = ( (linear_model | cnn_model) >> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0)) diff --git a/spacy/about.py b/spacy/about.py index 736d46cc3..5ccea8681 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -9,7 +9,7 @@ __uri__ = 'https://spacy.io' __author__ = 'Explosion AI' __email__ = 'contact@explosion.ai' __license__ = 'MIT' -__release__ = False +__release__ = True __docs_models__ = 'https://spacy.io/usage/models' __download_url__ = 'https://github.com/explosion/spacy-models/releases/download' diff --git a/spacy/attrs.pyx b/spacy/attrs.pyx index ed1f39a3f..d4e8a38c5 100644 --- a/spacy/attrs.pyx +++ b/spacy/attrs.pyx @@ -131,7 +131,7 @@ def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False): 'NumValue', 'PartType', 'Polite', 'StyleVariant', 'PronType', 'AdjType', 'Person', 'Variant', 'AdpType', 'Reflex', 'Negative', 'Mood', 'Aspect', 'Case', - 'Polarity', 'PrepCase', 'Animacy' # U20 + 'Polarity', 'Animacy' # U20 ] for key in morph_keys: if key in stringy_attrs: diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py index 2788ffc86..cb646c6af 100644 --- a/spacy/cli/__init__.py +++ b/spacy/cli/__init__.py @@ -9,5 +9,3 @@ from .convert import convert from .vocab import make_vocab as vocab from .init_model import init_model from .validate import validate -from .ud_train import main as ud_train -from .conll17_ud_eval import main as ud_evaluate diff --git a/spacy/cli/conll17_ud_eval.py b/spacy/cli/conll17_ud_eval.py deleted file mode 100644 index 3a41f99dc..000000000 --- a/spacy/cli/conll17_ud_eval.py +++ /dev/null @@ -1,571 +0,0 @@ -#!/usr/bin/env python - -# CoNLL 2017 UD Parsing evaluation script. -# -# Compatible with Python 2.7 and 3.2+, can be used either as a module -# or a standalone executable. -# -# Copyright 2017 Institute of Formal and Applied Linguistics (UFAL), -# Faculty of Mathematics and Physics, Charles University, Czech Republic. -# -# Changelog: -# - [02 Jan 2017] Version 0.9: Initial release -# - [25 Jan 2017] Version 0.9.1: Fix bug in LCS alignment computation -# - [10 Mar 2017] Version 1.0: Add documentation and test -# Compare HEADs correctly using aligned words -# Allow evaluation with errorneous spaces in forms -# Compare forms in LCS case insensitively -# Detect cycles and multiple root nodes -# Compute AlignedAccuracy - -# Command line usage -# ------------------ -# conll17_ud_eval.py [-v] [-w weights_file] gold_conllu_file system_conllu_file -# -# - if no -v is given, only the CoNLL17 UD Shared Task evaluation LAS metrics -# is printed -# - if -v is given, several metrics are printed (as precision, recall, F1 score, -# and in case the metric is computed on aligned words also accuracy on these): -# - Tokens: how well do the gold tokens match system tokens -# - Sentences: how well do the gold sentences match system sentences -# - Words: how well can the gold words be aligned to system words -# - UPOS: using aligned words, how well does UPOS match -# - XPOS: using aligned words, how well does XPOS match -# - Feats: using aligned words, how well does FEATS match -# - AllTags: using aligned words, how well does UPOS+XPOS+FEATS match -# - Lemmas: using aligned words, how well does LEMMA match -# - UAS: using aligned words, how well does HEAD match -# - LAS: using aligned words, how well does HEAD+DEPREL(ignoring subtypes) match -# - if weights_file is given (with lines containing deprel-weight pairs), -# one more metric is shown: -# - WeightedLAS: as LAS, but each deprel (ignoring subtypes) has different weight - -# API usage -# --------- -# - load_conllu(file) -# - loads CoNLL-U file from given file object to an internal representation -# - the file object should return str on both Python 2 and Python 3 -# - raises UDError exception if the given file cannot be loaded -# - evaluate(gold_ud, system_ud) -# - evaluate the given gold and system CoNLL-U files (loaded with load_conllu) -# - raises UDError if the concatenated tokens of gold and system file do not match -# - returns a dictionary with the metrics described above, each metrics having -# three fields: precision, recall and f1 - -# Description of token matching -# ----------------------------- -# In order to match tokens of gold file and system file, we consider the text -# resulting from concatenation of gold tokens and text resulting from -# concatenation of system tokens. These texts should match -- if they do not, -# the evaluation fails. -# -# If the texts do match, every token is represented as a range in this original -# text, and tokens are equal only if their range is the same. - -# Description of word matching -# ---------------------------- -# When matching words of gold file and system file, we first match the tokens. -# The words which are also tokens are matched as tokens, but words in multi-word -# tokens have to be handled differently. -# -# To handle multi-word tokens, we start by finding "multi-word spans". -# Multi-word span is a span in the original text such that -# - it contains at least one multi-word token -# - all multi-word tokens in the span (considering both gold and system ones) -# are completely inside the span (i.e., they do not "stick out") -# - the multi-word span is as small as possible -# -# For every multi-word span, we align the gold and system words completely -# inside this span using LCS on their FORMs. The words not intersecting -# (even partially) any multi-word span are then aligned as tokens. - - -from __future__ import division -from __future__ import print_function - -import argparse -import io -import sys -import unittest - -# CoNLL-U column names -ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10) - -# UD Error is used when raising exceptions in this module -class UDError(Exception): - pass - -# Load given CoNLL-U file into internal representation -def load_conllu(file): - # Internal representation classes - class UDRepresentation: - def __init__(self): - # Characters of all the tokens in the whole file. - # Whitespace between tokens is not included. - self.characters = [] - # List of UDSpan instances with start&end indices into `characters`. - self.tokens = [] - # List of UDWord instances. - self.words = [] - # List of UDSpan instances with start&end indices into `characters`. - self.sentences = [] - class UDSpan: - def __init__(self, start, end, characters): - self.start = start - # Note that self.end marks the first position **after the end** of span, - # so we can use characters[start:end] or range(start, end). - self.end = end - self.characters = characters - - @property - def text(self): - return ''.join(self.characters[self.start:self.end]) - - def __str__(self): - return self.text - - def __repr__(self): - return self.text - class UDWord: - def __init__(self, span, columns, is_multiword): - # Span of this word (or MWT, see below) within ud_representation.characters. - self.span = span - # 10 columns of the CoNLL-U file: ID, FORM, LEMMA,... - self.columns = columns - # is_multiword==True means that this word is part of a multi-word token. - # In that case, self.span marks the span of the whole multi-word token. - self.is_multiword = is_multiword - # Reference to the UDWord instance representing the HEAD (or None if root). - self.parent = None - # Let's ignore language-specific deprel subtypes. - self.columns[DEPREL] = columns[DEPREL].split(':')[0] - - ud = UDRepresentation() - - # Load the CoNLL-U file - index, sentence_start = 0, None - linenum = 0 - while True: - line = file.readline() - linenum += 1 - if not line: - break - line = line.rstrip("\r\n") - - # Handle sentence start boundaries - if sentence_start is None: - # Skip comments - if line.startswith("#"): - continue - # Start a new sentence - ud.sentences.append(UDSpan(index, 0, ud.characters)) - sentence_start = len(ud.words) - if not line: - # Add parent UDWord links and check there are no cycles - def process_word(word): - if word.parent == "remapping": - raise UDError("There is a cycle in a sentence") - if word.parent is None: - head = int(word.columns[HEAD]) - if head > len(ud.words) - sentence_start: - raise UDError("Line {}: HEAD '{}' points outside of the sentence".format( - linenum, word.columns[HEAD])) - if head: - parent = ud.words[sentence_start + head - 1] - word.parent = "remapping" - process_word(parent) - word.parent = parent - - for word in ud.words[sentence_start:]: - process_word(word) - - # Check there is a single root node - if len([word for word in ud.words[sentence_start:] if word.parent is None]) != 1: - raise UDError("There are multiple roots in a sentence") - - # End the sentence - ud.sentences[-1].end = index - sentence_start = None - continue - - # Read next token/word - columns = line.split("\t") - if len(columns) != 10: - raise UDError("The CoNLL-U line {} does not contain 10 tab-separated columns: '{}'".format(linenum, line)) - - # Skip empty nodes - if "." in columns[ID]: - continue - - # Delete spaces from FORM so gold.characters == system.characters - # even if one of them tokenizes the space. - columns[FORM] = columns[FORM].replace(" ", "") - if not columns[FORM]: - raise UDError("There is an empty FORM in the CoNLL-U file -- line %d" % linenum) - - # Save token - ud.characters.extend(columns[FORM]) - ud.tokens.append(UDSpan(index, index + len(columns[FORM]), ud.characters)) - index += len(columns[FORM]) - - # Handle multi-word tokens to save word(s) - if "-" in columns[ID]: - try: - start, end = map(int, columns[ID].split("-")) - except: - raise UDError("Cannot parse multi-word token ID '{}'".format(columns[ID])) - - for _ in range(start, end + 1): - word_line = file.readline().rstrip("\r\n") - word_columns = word_line.split("\t") - if len(word_columns) != 10: - print(columns) - raise UDError("The CoNLL-U line {} does not contain 10 tab-separated columns: '{}'".format(linenum, word_line)) - ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True)) - # Basic tokens/words - else: - try: - word_id = int(columns[ID]) - except: - raise UDError("Cannot parse word ID '{}'".format(columns[ID])) - if word_id != len(ud.words) - sentence_start + 1: - raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format(columns[ID], columns[FORM], len(ud.words) - sentence_start + 1)) - - try: - head_id = int(columns[HEAD]) - except: - raise UDError("Cannot parse HEAD '{}'".format(columns[HEAD])) - if head_id < 0: - raise UDError("HEAD cannot be negative") - - ud.words.append(UDWord(ud.tokens[-1], columns, is_multiword=False)) - - if sentence_start is not None: - raise UDError("The CoNLL-U file does not end with empty line") - - return ud - -# Evaluate the gold and system treebanks (loaded using load_conllu). -def evaluate(gold_ud, system_ud, deprel_weights=None): - class Score: - def __init__(self, gold_total, system_total, correct, aligned_total=None): - self.precision = correct / system_total if system_total else 0.0 - self.recall = correct / gold_total if gold_total else 0.0 - self.f1 = 2 * correct / (system_total + gold_total) if system_total + gold_total else 0.0 - self.aligned_accuracy = correct / aligned_total if aligned_total else aligned_total - class AlignmentWord: - def __init__(self, gold_word, system_word): - self.gold_word = gold_word - self.system_word = system_word - self.gold_parent = None - self.system_parent_gold_aligned = None - class Alignment: - def __init__(self, gold_words, system_words): - self.gold_words = gold_words - self.system_words = system_words - self.matched_words = [] - self.matched_words_map = {} - def append_aligned_words(self, gold_word, system_word): - self.matched_words.append(AlignmentWord(gold_word, system_word)) - self.matched_words_map[system_word] = gold_word - def fill_parents(self): - # We represent root parents in both gold and system data by '0'. - # For gold data, we represent non-root parent by corresponding gold word. - # For system data, we represent non-root parent by either gold word aligned - # to parent system nodes, or by None if no gold words is aligned to the parent. - for words in self.matched_words: - words.gold_parent = words.gold_word.parent if words.gold_word.parent is not None else 0 - words.system_parent_gold_aligned = self.matched_words_map.get(words.system_word.parent, None) \ - if words.system_word.parent is not None else 0 - - def lower(text): - if sys.version_info < (3, 0) and isinstance(text, str): - return text.decode("utf-8").lower() - return text.lower() - - def spans_score(gold_spans, system_spans): - correct, gi, si = 0, 0, 0 - while gi < len(gold_spans) and si < len(system_spans): - if system_spans[si].start < gold_spans[gi].start: - si += 1 - elif gold_spans[gi].start < system_spans[si].start: - gi += 1 - else: - correct += gold_spans[gi].end == system_spans[si].end - si += 1 - gi += 1 - - return Score(len(gold_spans), len(system_spans), correct) - - def alignment_score(alignment, key_fn, weight_fn=lambda w: 1): - gold, system, aligned, correct = 0, 0, 0, 0 - - for word in alignment.gold_words: - gold += weight_fn(word) - - for word in alignment.system_words: - system += weight_fn(word) - - for words in alignment.matched_words: - aligned += weight_fn(words.gold_word) - - if key_fn is None: - # Return score for whole aligned words - return Score(gold, system, aligned) - - for words in alignment.matched_words: - if key_fn(words.gold_word, words.gold_parent) == key_fn(words.system_word, words.system_parent_gold_aligned): - correct += weight_fn(words.gold_word) - - return Score(gold, system, correct, aligned) - - def beyond_end(words, i, multiword_span_end): - if i >= len(words): - return True - if words[i].is_multiword: - return words[i].span.start >= multiword_span_end - return words[i].span.end > multiword_span_end - - def extend_end(word, multiword_span_end): - if word.is_multiword and word.span.end > multiword_span_end: - return word.span.end - return multiword_span_end - - def find_multiword_span(gold_words, system_words, gi, si): - # We know gold_words[gi].is_multiword or system_words[si].is_multiword. - # Find the start of the multiword span (gs, ss), so the multiword span is minimal. - # Initialize multiword_span_end characters index. - if gold_words[gi].is_multiword: - multiword_span_end = gold_words[gi].span.end - if not system_words[si].is_multiword and system_words[si].span.start < gold_words[gi].span.start: - si += 1 - else: # if system_words[si].is_multiword - multiword_span_end = system_words[si].span.end - if not gold_words[gi].is_multiword and gold_words[gi].span.start < system_words[si].span.start: - gi += 1 - gs, ss = gi, si - - # Find the end of the multiword span - # (so both gi and si are pointing to the word following the multiword span end). - while not beyond_end(gold_words, gi, multiword_span_end) or \ - not beyond_end(system_words, si, multiword_span_end): - if gi < len(gold_words) and (si >= len(system_words) or - gold_words[gi].span.start <= system_words[si].span.start): - multiword_span_end = extend_end(gold_words[gi], multiword_span_end) - gi += 1 - else: - multiword_span_end = extend_end(system_words[si], multiword_span_end) - si += 1 - return gs, ss, gi, si - - def compute_lcs(gold_words, system_words, gi, si, gs, ss): - lcs = [[0] * (si - ss) for i in range(gi - gs)] - for g in reversed(range(gi - gs)): - for s in reversed(range(si - ss)): - if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]): - lcs[g][s] = 1 + (lcs[g+1][s+1] if g+1 < gi-gs and s+1 < si-ss else 0) - lcs[g][s] = max(lcs[g][s], lcs[g+1][s] if g+1 < gi-gs else 0) - lcs[g][s] = max(lcs[g][s], lcs[g][s+1] if s+1 < si-ss else 0) - return lcs - - def align_words(gold_words, system_words): - alignment = Alignment(gold_words, system_words) - - gi, si = 0, 0 - while gi < len(gold_words) and si < len(system_words): - if gold_words[gi].is_multiword or system_words[si].is_multiword: - # A: Multi-word tokens => align via LCS within the whole "multiword span". - gs, ss, gi, si = find_multiword_span(gold_words, system_words, gi, si) - - if si > ss and gi > gs: - lcs = compute_lcs(gold_words, system_words, gi, si, gs, ss) - - # Store aligned words - s, g = 0, 0 - while g < gi - gs and s < si - ss: - if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]): - alignment.append_aligned_words(gold_words[gs+g], system_words[ss+s]) - g += 1 - s += 1 - elif lcs[g][s] == (lcs[g+1][s] if g+1 < gi-gs else 0): - g += 1 - else: - s += 1 - else: - # B: No multi-word token => align according to spans. - if (gold_words[gi].span.start, gold_words[gi].span.end) == (system_words[si].span.start, system_words[si].span.end): - alignment.append_aligned_words(gold_words[gi], system_words[si]) - gi += 1 - si += 1 - elif gold_words[gi].span.start <= system_words[si].span.start: - gi += 1 - else: - si += 1 - - alignment.fill_parents() - - return alignment - - # Check that underlying character sequences do match - if gold_ud.characters != system_ud.characters: - index = 0 - while gold_ud.characters[index] == system_ud.characters[index]: - index += 1 - - raise UDError( - "The concatenation of tokens in gold file and in system file differ!\n" + - "First 20 differing characters in gold file: '{}' and system file: '{}'".format( - "".join(gold_ud.characters[index:index + 20]), - "".join(system_ud.characters[index:index + 20]) - ) - ) - - # Align words - alignment = align_words(gold_ud.words, system_ud.words) - - # Compute the F1-scores - result = { - "Tokens": spans_score(gold_ud.tokens, system_ud.tokens), - "Sentences": spans_score(gold_ud.sentences, system_ud.sentences), - "Words": alignment_score(alignment, None), - "UPOS": alignment_score(alignment, lambda w, parent: w.columns[UPOS]), - "XPOS": alignment_score(alignment, lambda w, parent: w.columns[XPOS]), - "Feats": alignment_score(alignment, lambda w, parent: w.columns[FEATS]), - "AllTags": alignment_score(alignment, lambda w, parent: (w.columns[UPOS], w.columns[XPOS], w.columns[FEATS])), - "Lemmas": alignment_score(alignment, lambda w, parent: w.columns[LEMMA]), - "UAS": alignment_score(alignment, lambda w, parent: parent), - "LAS": alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL])), - } - - # Add WeightedLAS if weights are given - if deprel_weights is not None: - def weighted_las(word): - return deprel_weights.get(word.columns[DEPREL], 1.0) - result["WeightedLAS"] = alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL]), weighted_las) - - return result - -def load_deprel_weights(weights_file): - if weights_file is None: - return None - - deprel_weights = {} - for line in weights_file: - # Ignore comments and empty lines - if line.startswith("#") or not line.strip(): - continue - - columns = line.rstrip("\r\n").split() - if len(columns) != 2: - raise ValueError("Expected two columns in the UD Relations weights file on line '{}'".format(line)) - - deprel_weights[columns[0]] = float(columns[1]) - - return deprel_weights - -def load_conllu_file(path): - _file = open(path, mode="r", **({"encoding": "utf-8"} if sys.version_info >= (3, 0) else {})) - return load_conllu(_file) - -def evaluate_wrapper(args): - # Load CoNLL-U files - gold_ud = load_conllu_file(args.gold_file) - system_ud = load_conllu_file(args.system_file) - - # Load weights if requested - deprel_weights = load_deprel_weights(args.weights) - - return evaluate(gold_ud, system_ud, deprel_weights) - -def main(): - # Parse arguments - parser = argparse.ArgumentParser() - parser.add_argument("gold_file", type=str, - help="Name of the CoNLL-U file with the gold data.") - parser.add_argument("system_file", type=str, - help="Name of the CoNLL-U file with the predicted data.") - parser.add_argument("--weights", "-w", type=argparse.FileType("r"), default=None, - metavar="deprel_weights_file", - help="Compute WeightedLAS using given weights for Universal Dependency Relations.") - parser.add_argument("--verbose", "-v", default=0, action="count", - help="Print all metrics.") - args = parser.parse_args() - - # Use verbose if weights are supplied - if args.weights is not None and not args.verbose: - args.verbose = 1 - - # Evaluate - evaluation = evaluate_wrapper(args) - - # Print the evaluation - if not args.verbose: - print("LAS F1 Score: {:.2f}".format(100 * evaluation["LAS"].f1)) - else: - metrics = ["Tokens", "Sentences", "Words", "UPOS", "XPOS", "Feats", "AllTags", "Lemmas", "UAS", "LAS"] - if args.weights is not None: - metrics.append("WeightedLAS") - - print("Metrics | Precision | Recall | F1 Score | AligndAcc") - print("-----------+-----------+-----------+-----------+-----------") - for metric in metrics: - print("{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}".format( - metric, - 100 * evaluation[metric].precision, - 100 * evaluation[metric].recall, - 100 * evaluation[metric].f1, - "{:10.2f}".format(100 * evaluation[metric].aligned_accuracy) if evaluation[metric].aligned_accuracy is not None else "" - )) - -if __name__ == "__main__": - main() - -# Tests, which can be executed with `python -m unittest conll17_ud_eval`. -class TestAlignment(unittest.TestCase): - @staticmethod - def _load_words(words): - """Prepare fake CoNLL-U files with fake HEAD to prevent multiple roots errors.""" - lines, num_words = [], 0 - for w in words: - parts = w.split(" ") - if len(parts) == 1: - num_words += 1 - lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, parts[0], int(num_words>1))) - else: - lines.append("{}-{}\t{}\t_\t_\t_\t_\t_\t_\t_\t_".format(num_words + 1, num_words + len(parts) - 1, parts[0])) - for part in parts[1:]: - num_words += 1 - lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, part, int(num_words>1))) - return load_conllu((io.StringIO if sys.version_info >= (3, 0) else io.BytesIO)("\n".join(lines+["\n"]))) - - def _test_exception(self, gold, system): - self.assertRaises(UDError, evaluate, self._load_words(gold), self._load_words(system)) - - def _test_ok(self, gold, system, correct): - metrics = evaluate(self._load_words(gold), self._load_words(system)) - gold_words = sum((max(1, len(word.split(" ")) - 1) for word in gold)) - system_words = sum((max(1, len(word.split(" ")) - 1) for word in system)) - self.assertEqual((metrics["Words"].precision, metrics["Words"].recall, metrics["Words"].f1), - (correct / system_words, correct / gold_words, 2 * correct / (gold_words + system_words))) - - def test_exception(self): - self._test_exception(["a"], ["b"]) - - def test_equal(self): - self._test_ok(["a"], ["a"], 1) - self._test_ok(["a", "b", "c"], ["a", "b", "c"], 3) - - def test_equal_with_multiword(self): - self._test_ok(["abc a b c"], ["a", "b", "c"], 3) - self._test_ok(["a", "bc b c", "d"], ["a", "b", "c", "d"], 4) - self._test_ok(["abcd a b c d"], ["ab a b", "cd c d"], 4) - self._test_ok(["abc a b c", "de d e"], ["a", "bcd b c d", "e"], 5) - - def test_alignment(self): - self._test_ok(["abcd"], ["a", "b", "c", "d"], 0) - self._test_ok(["abc", "d"], ["a", "b", "c", "d"], 1) - self._test_ok(["a", "bc", "d"], ["a", "b", "c", "d"], 2) - self._test_ok(["a", "bc b c", "d"], ["a", "b", "cd"], 2) - self._test_ok(["abc a BX c", "def d EX f"], ["ab a b", "cd c d", "ef e f"], 4) - self._test_ok(["ab a b", "cd bc d"], ["a", "bc", "d"], 2) - self._test_ok(["a", "bc b c", "d"], ["ab AX BX", "cd CX a"], 1) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 3c661825c..be5be0f0b 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -8,8 +8,8 @@ from thinc.neural._classes.model import Model from timeit import default_timer as timer from ..attrs import PROB, IS_OOV, CLUSTER, LANG -from ..gold import GoldCorpus -from ..util import prints, minibatch, minibatch_by_words +from ..gold import GoldCorpus, minibatch +from ..util import prints from .. import util from .. import about from .. import displacy @@ -51,6 +51,8 @@ def train(lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0, train_path = util.ensure_path(train_data) dev_path = util.ensure_path(dev_data) meta_path = util.ensure_path(meta_path) + if not output_path.exists(): + output_path.mkdir() if not train_path.exists(): prints(train_path, title="Training data not found", exits=1) if dev_path and not dev_path.exists(): @@ -63,14 +65,7 @@ def train(lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0, title="Not a valid meta.json format", exits=1) meta.setdefault('lang', lang) meta.setdefault('name', 'unnamed') - - if not output_path.exists(): - output_path.mkdir() - print("Counting training words (limit=%s" % n_sents) - corpus = GoldCorpus(train_path, dev_path, limit=n_sents) - n_train_words = corpus.count_train() - print(n_train_words) pipeline = ['tagger', 'parser', 'ner'] if no_tagger and 'tagger' in pipeline: pipeline.remove('tagger') @@ -86,9 +81,13 @@ def train(lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0, dropout_rates = util.decaying(util.env_opt('dropout_from', 0.2), util.env_opt('dropout_to', 0.2), util.env_opt('dropout_decay', 0.0)) - batch_sizes = util.compounding(util.env_opt('batch_from', 1000), - util.env_opt('batch_to', 1000), + batch_sizes = util.compounding(util.env_opt('batch_from', 1), + util.env_opt('batch_to', 16), util.env_opt('batch_compound', 1.001)) + max_doc_len = util.env_opt('max_doc_len', 5000) + corpus = GoldCorpus(train_path, dev_path, limit=n_sents) + n_train_words = corpus.count_train() + lang_class = util.get_lang_class(lang) nlp = lang_class() meta['pipeline'] = pipeline @@ -106,7 +105,6 @@ def train(lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0, lex.is_oov = False for name in pipeline: nlp.add_pipe(nlp.create_pipe(name), name=name) - nlp.add_pipe(nlp.create_pipe('merge_subtokens')) if parser_multitasks: for objective in parser_multitasks.split(','): nlp.parser.add_multitask_objective(objective) @@ -118,20 +116,21 @@ def train(lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0, print("Itn.\tP.Loss\tN.Loss\tUAS\tNER P.\tNER R.\tNER F.\tTag %\tToken %") try: + train_docs = corpus.train_docs(nlp, projectivize=True, noise_level=0.0, + gold_preproc=gold_preproc, max_length=0) + train_docs = list(train_docs) for i in range(n_iter): - train_docs = corpus.train_docs(nlp, noise_level=0.0, - gold_preproc=gold_preproc, max_length=0) - words_seen = 0 with tqdm.tqdm(total=n_train_words, leave=False) as pbar: losses = {} - for batch in minibatch_by_words(train_docs, size=batch_sizes): + for batch in minibatch(train_docs, size=batch_sizes): + batch = [(d, g) for (d, g) in batch if len(d) < max_doc_len] if not batch: continue docs, golds = zip(*batch) nlp.update(docs, golds, sgd=optimizer, drop=next(dropout_rates), losses=losses) pbar.update(sum(len(doc) for doc in docs)) - words_seen += sum(len(doc) for doc in docs) + with nlp.use_params(optimizer.averages): util.set_env_log(False) epoch_model_path = output_path / ('model%d' % i) diff --git a/spacy/cli/ud_train.py b/spacy/cli/ud_train.py deleted file mode 100644 index b827d4a4f..000000000 --- a/spacy/cli/ud_train.py +++ /dev/null @@ -1,372 +0,0 @@ -'''Train for CONLL 2017 UD treebank evaluation. Takes .conllu files, writes -.conllu format for development data, allowing the official scorer to be used. -''' -from __future__ import unicode_literals -import plac -import tqdm -from pathlib import Path -import re -import sys -import json - -import spacy -import spacy.util -from ..tokens import Token, Doc -from ..gold import GoldParse -from ..util import compounding, minibatch_by_words -from ..syntax.nonproj import projectivize -from ..matcher import Matcher -from .. import displacy -from collections import defaultdict, Counter -from timeit import default_timer as timer - -import itertools -import random -import numpy.random -import cytoolz - -from . import conll17_ud_eval - -from .. import lang -from .. import lang -from ..lang import zh -from ..lang import ja - - -################ -# Data reading # -################ - -space_re = re.compile('\s+') -def split_text(text): - return [space_re.sub(' ', par.strip()) for par in text.split('\n\n')] - - -def read_data(nlp, conllu_file, text_file, raw_text=True, oracle_segments=False, - max_doc_length=None, limit=None): - '''Read the CONLLU format into (Doc, GoldParse) tuples. If raw_text=True, - include Doc objects created using nlp.make_doc and then aligned against - the gold-standard sequences. If oracle_segments=True, include Doc objects - created from the gold-standard segments. At least one must be True.''' - if not raw_text and not oracle_segments: - raise ValueError("At least one of raw_text or oracle_segments must be True") - paragraphs = split_text(text_file.read()) - conllu = read_conllu(conllu_file) - # sd is spacy doc; cd is conllu doc - # cs is conllu sent, ct is conllu token - docs = [] - golds = [] - for doc_id, (text, cd) in enumerate(zip(paragraphs, conllu)): - sent_annots = [] - for cs in cd: - sent = defaultdict(list) - for id_, word, lemma, pos, tag, morph, head, dep, _, space_after in cs: - if '.' in id_: - continue - if '-' in id_: - continue - id_ = int(id_)-1 - head = int(head)-1 if head != '0' else id_ - sent['words'].append(word) - sent['tags'].append(tag) - sent['heads'].append(head) - sent['deps'].append('ROOT' if dep == 'root' else dep) - sent['spaces'].append(space_after == '_') - sent['entities'] = ['-'] * len(sent['words']) - sent['heads'], sent['deps'] = projectivize(sent['heads'], - sent['deps']) - if oracle_segments: - docs.append(Doc(nlp.vocab, words=sent['words'], spaces=sent['spaces'])) - golds.append(GoldParse(docs[-1], **sent)) - - sent_annots.append(sent) - if raw_text and max_doc_length and len(sent_annots) >= max_doc_length: - doc, gold = _make_gold(nlp, None, sent_annots) - sent_annots = [] - docs.append(doc) - golds.append(gold) - if limit and len(docs) >= limit: - return docs, golds - - if raw_text and sent_annots: - doc, gold = _make_gold(nlp, None, sent_annots) - docs.append(doc) - golds.append(gold) - if limit and len(docs) >= limit: - return docs, golds - return docs, golds - - -def read_conllu(file_): - docs = [] - sent = [] - doc = [] - for line in file_: - if line.startswith('# newdoc'): - if doc: - docs.append(doc) - doc = [] - elif line.startswith('#'): - continue - elif not line.strip(): - if sent: - doc.append(sent) - sent = [] - else: - sent.append(list(line.strip().split('\t'))) - if len(sent[-1]) != 10: - print(repr(line)) - raise ValueError - if sent: - doc.append(sent) - if doc: - docs.append(doc) - return docs - - -def _make_gold(nlp, text, sent_annots): - # Flatten the conll annotations, and adjust the head indices - flat = defaultdict(list) - for sent in sent_annots: - flat['heads'].extend(len(flat['words'])+head for head in sent['heads']) - for field in ['words', 'tags', 'deps', 'entities', 'spaces']: - flat[field].extend(sent[field]) - # Construct text if necessary - assert len(flat['words']) == len(flat['spaces']) - if text is None: - text = ''.join(word+' '*space for word, space in zip(flat['words'], flat['spaces'])) - doc = nlp.make_doc(text) - flat.pop('spaces') - gold = GoldParse(doc, **flat) - return doc, gold - -############################# -# Data transforms for spaCy # -############################# - -def golds_to_gold_tuples(docs, golds): - '''Get out the annoying 'tuples' format used by begin_training, given the - GoldParse objects.''' - tuples = [] - for doc, gold in zip(docs, golds): - text = doc.text - ids, words, tags, heads, labels, iob = zip(*gold.orig_annot) - sents = [((ids, words, tags, heads, labels, iob), [])] - tuples.append((text, sents)) - return tuples - - -############## -# Evaluation # -############## - -def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None): - with text_loc.open('r', encoding='utf8') as text_file: - texts = split_text(text_file.read()) - docs = list(nlp.pipe(texts)) - with sys_loc.open('w', encoding='utf8') as out_file: - write_conllu(docs, out_file) - with gold_loc.open('r', encoding='utf8') as gold_file: - gold_ud = conll17_ud_eval.load_conllu(gold_file) - with sys_loc.open('r', encoding='utf8') as sys_file: - sys_ud = conll17_ud_eval.load_conllu(sys_file) - scores = conll17_ud_eval.evaluate(gold_ud, sys_ud) - return docs, scores - - -def write_conllu(docs, file_): - merger = Matcher(docs[0].vocab) - merger.add('SUBTOK', None, [{'DEP': 'subtok', 'op': '+'}]) - for i, doc in enumerate(docs): - matches = merger(doc) - spans = [doc[start:end+1] for _, start, end in matches] - offsets = [(span.start_char, span.end_char) for span in spans] - for start_char, end_char in offsets: - doc.merge(start_char, end_char) - file_.write("# newdoc id = {i}\n".format(i=i)) - for j, sent in enumerate(doc.sents): - file_.write("# sent_id = {i}.{j}\n".format(i=i, j=j)) - file_.write("# text = {text}\n".format(text=sent.text)) - for k, token in enumerate(sent): - file_.write(token._.get_conllu_lines(k) + '\n') - file_.write('\n') - - -def print_progress(itn, losses, ud_scores): - fields = { - 'dep_loss': losses.get('parser', 0.0), - 'tag_loss': losses.get('tagger', 0.0), - 'words': ud_scores['Words'].f1 * 100, - 'sents': ud_scores['Sentences'].f1 * 100, - 'tags': ud_scores['XPOS'].f1 * 100, - 'uas': ud_scores['UAS'].f1 * 100, - 'las': ud_scores['LAS'].f1 * 100, - } - header = ['Epoch', 'Loss', 'LAS', 'UAS', 'TAG', 'SENT', 'WORD'] - if itn == 0: - print('\t'.join(header)) - tpl = '\t'.join(( - '{:d}', - '{dep_loss:.1f}', - '{las:.1f}', - '{uas:.1f}', - '{tags:.1f}', - '{sents:.1f}', - '{words:.1f}', - )) - print(tpl.format(itn, **fields)) - -#def get_sent_conllu(sent, sent_id): -# lines = ["# sent_id = {sent_id}".format(sent_id=sent_id)] - -def get_token_conllu(token, i): - if token._.begins_fused: - n = 1 - while token.nbor(n)._.inside_fused: - n += 1 - id_ = '%d-%d' % (i, i+n) - lines = [id_, token.text, '_', '_', '_', '_', '_', '_', '_', '_'] - else: - lines = [] - if token.head.i == token.i: - head = 0 - else: - head = i + (token.head.i - token.i) + 1 - fields = [str(i+1), token.text, token.lemma_, token.pos_, token.tag_, '_', - str(head), token.dep_.lower(), '_', '_'] - lines.append('\t'.join(fields)) - return '\n'.join(lines) - -Token.set_extension('get_conllu_lines', method=get_token_conllu) -Token.set_extension('begins_fused', default=False) -Token.set_extension('inside_fused', default=False) - - -################## -# Initialization # -################## - - -def load_nlp(corpus, config): - lang = corpus.split('_')[0] - nlp = spacy.blank(lang) - if config.vectors: - nlp.vocab.from_disk(Path(config.vectors) / 'vocab') - return nlp - -def initialize_pipeline(nlp, docs, golds, config): - nlp.add_pipe(nlp.create_pipe('parser')) - if config.multitask_tag: - nlp.parser.add_multitask_objective('tag') - if config.multitask_sent: - nlp.parser.add_multitask_objective('sent_start') - nlp.add_pipe(nlp.create_pipe('tagger')) - for gold in golds: - for tag in gold.tags: - if tag is not None: - nlp.tagger.add_label(tag) - return nlp.begin_training(lambda: golds_to_gold_tuples(docs, golds)) - - -######################## -# Command line helpers # -######################## - -class Config(object): - def __init__(self, vectors=None, max_doc_length=10, multitask_tag=True, - multitask_sent=True, nr_epoch=30, batch_size=1000, dropout=0.2): - for key, value in locals().items(): - setattr(self, key, value) - - @classmethod - def load(cls, loc): - with Path(loc).open('r', encoding='utf8') as file_: - cfg = json.load(file_) - return cls(**cfg) - - -class Dataset(object): - def __init__(self, path, section): - self.path = path - self.section = section - self.conllu = None - self.text = None - for file_path in self.path.iterdir(): - name = file_path.parts[-1] - if section in name and name.endswith('conllu'): - self.conllu = file_path - elif section in name and name.endswith('txt'): - self.text = file_path - if self.conllu is None: - msg = "Could not find .txt file in {path} for {section}" - raise IOError(msg.format(section=section, path=path)) - if self.text is None: - msg = "Could not find .txt file in {path} for {section}" - self.lang = self.conllu.parts[-1].split('-')[0].split('_')[0] - - -class TreebankPaths(object): - def __init__(self, ud_path, treebank, **cfg): - self.train = Dataset(ud_path / treebank, 'train') - self.dev = Dataset(ud_path / treebank, 'dev') - self.lang = self.train.lang - - -@plac.annotations( - ud_dir=("Path to Universal Dependencies corpus", "positional", None, Path), - corpus=("UD corpus to train and evaluate on, e.g. en, es_ancora, etc", - "positional", None, str), - parses_dir=("Directory to write the development parses", "positional", None, Path), - config=("Path to json formatted config file", "positional"), - limit=("Size limit", "option", "n", int) -) -def main(ud_dir, parses_dir, config, corpus, limit=0): - lang.zh.Chinese.Defaults.use_jieba = False - lang.ja.Japanese.Defaults.use_janome = False - - random.seed(0) - numpy.random.seed(0) - - config = Config.load(config) - paths = TreebankPaths(ud_dir, corpus) - if not (parses_dir / corpus).exists(): - (parses_dir / corpus).mkdir() - print("Train and evaluate", corpus, "using lang", paths.lang) - nlp = load_nlp(paths.lang, config) - - docs, golds = read_data(nlp, paths.train.conllu.open(), paths.train.text.open(), - max_doc_length=config.max_doc_length, limit=limit) - - optimizer = initialize_pipeline(nlp, docs, golds, config) - - batch_sizes = compounding(config.batch_size //10, config.batch_size, 1.001) - for i in range(config.nr_epoch): - docs = [nlp.make_doc(doc.text) for doc in docs] - Xs = list(zip(docs, golds)) - random.shuffle(Xs) - batches = minibatch_by_words(Xs, size=batch_sizes) - losses = {} - n_train_words = sum(len(doc) for doc in docs) - with tqdm.tqdm(total=n_train_words, leave=False) as pbar: - for batch in batches: - batch_docs, batch_gold = zip(*batch) - pbar.update(sum(len(doc) for doc in batch_docs)) - nlp.update(batch_docs, batch_gold, sgd=optimizer, - drop=config.dropout, losses=losses) - - out_path = parses_dir / corpus / 'epoch-{i}.conllu'.format(i=i) - with nlp.use_params(optimizer.averages): - parsed_docs, scores = evaluate(nlp, paths.dev.text, paths.dev.conllu, out_path) - print_progress(i, losses, scores) - _render_parses(i, parsed_docs[:50]) - - -def _render_parses(i, to_render): - to_render[0].user_data['title'] = "Batch %d" % i - with Path('/tmp/parses.html').open('w') as file_: - html = displacy.render(to_render[:5], style='dep', page=True) - file_.write(html) - - -if __name__ == '__main__': - plac.call(main) diff --git a/spacy/compat.py b/spacy/compat.py index de98f54cc..3cc214b28 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -1,6 +1,7 @@ # coding: utf8 from __future__ import unicode_literals +import six import ftfy import sys import ujson @@ -46,10 +47,9 @@ is_windows = sys.platform.startswith('win') is_linux = sys.platform.startswith('linux') is_osx = sys.platform == 'darwin' -# See: https://github.com/benjaminp/six/blob/master/six.py -is_python2 = sys.version_info[0] == 2 -is_python3 = sys.version_info[0] == 3 -is_python_pre_3_5 = is_python2 or (is_python3 and sys.version_info[1] < 5) +is_python2 = six.PY2 +is_python3 = six.PY3 +is_python_pre_3_5 = is_python2 or (is_python3 and sys.version_info[1]<5) if is_python2: bytes_ = str diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 45eaa67cf..dff5fc147 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -3,25 +3,16 @@ from __future__ import unicode_literals, print_function import re +import ujson import random import cytoolz import itertools -import numpy -import tempfile -import shutil -from pathlib import Path -import msgpack -import ujson - -from . import _align from .syntax import nonproj from .tokens import Doc from . import util -from .util import minibatch, itershuffle -from .compat import json_dumps +from .util import minibatch -from libc.stdio cimport FILE, fopen, fclose, fread, fwrite, feof, fseek def tags_to_entities(tags): entities = [] @@ -68,138 +59,160 @@ def merge_sents(sents): return [(m_deps, m_brackets)] -punct_re = re.compile(r'\W') def align(cand_words, gold_words): + cost, edit_path = _min_edit_path(cand_words, gold_words) + alignment = [] + i_of_gold = 0 + for move in edit_path: + if move == 'M': + alignment.append(i_of_gold) + i_of_gold += 1 + elif move == 'S': + alignment.append(None) + i_of_gold += 1 + elif move == 'D': + alignment.append(None) + elif move == 'I': + i_of_gold += 1 + else: + raise Exception(move) + return alignment + + +punct_re = re.compile(r'\W') + + +def _min_edit_path(cand_words, gold_words): + cdef: + Pool mem + int i, j, n_cand, n_gold + int* curr_costs + int* prev_costs + + # TODO: Fix this --- just do it properly, make the full edit matrix and + # then walk back over it... + # Preprocess inputs + cand_words = [punct_re.sub('', w).lower() for w in cand_words] + gold_words = [punct_re.sub('', w).lower() for w in gold_words] + if cand_words == gold_words: - alignment = numpy.arange(len(cand_words)) - return 0, alignment, alignment, {}, {} - cand_words = [w.replace(' ', '') for w in cand_words] - gold_words = [w.replace(' ', '') for w in gold_words] - cost, i2j, j2i, matrix = _align.align(cand_words, gold_words) - i2j_multi, j2i_multi = _align.multi_align(i2j, j2i, [len(w) for w in cand_words], - [len(w) for w in gold_words]) - for i, j in list(i2j_multi.items()): - if i2j_multi.get(i+1) != j and i2j_multi.get(i-1) != j: - i2j[i] = j - i2j_multi.pop(i) - for j, i in list(j2i_multi.items()): - if j2i_multi.get(j+1) != i and j2i_multi.get(j-1) != i: - j2i[j] = i - j2i_multi.pop(j) - return cost, i2j, j2i, i2j_multi, j2i_multi + return 0, ''.join(['M' for _ in gold_words]) + mem = Pool() + n_cand = len(cand_words) + n_gold = len(gold_words) + # Levenshtein distance, except we need the history, and we may want + # different costs. Mark operations with a string, and score the history + # using _edit_cost. + previous_row = [] + prev_costs = mem.alloc(n_gold + 1, sizeof(int)) + curr_costs = mem.alloc(n_gold + 1, sizeof(int)) + for i in range(n_gold + 1): + cell = '' + for j in range(i): + cell += 'I' + previous_row.append('I' * i) + prev_costs[i] = i + for i, cand in enumerate(cand_words): + current_row = ['D' * (i + 1)] + curr_costs[0] = i+1 + for j, gold in enumerate(gold_words): + if gold.lower() == cand.lower(): + s_cost = prev_costs[j] + i_cost = curr_costs[j] + 1 + d_cost = prev_costs[j + 1] + 1 + else: + s_cost = prev_costs[j] + 1 + i_cost = curr_costs[j] + 1 + d_cost = prev_costs[j + 1] + (1 if cand else 0) + + if s_cost <= i_cost and s_cost <= d_cost: + best_cost = s_cost + best_hist = previous_row[j] + ('M' if gold == cand else 'S') + elif i_cost <= s_cost and i_cost <= d_cost: + best_cost = i_cost + best_hist = current_row[j] + 'I' + else: + best_cost = d_cost + best_hist = previous_row[j + 1] + 'D' + + current_row.append(best_hist) + curr_costs[j+1] = best_cost + previous_row = current_row + for j in range(len(gold_words) + 1): + prev_costs[j] = curr_costs[j] + curr_costs[j] = 0 + + return prev_costs[n_gold], previous_row[-1] class GoldCorpus(object): """An annotated corpus, using the JSON file format. Manages annotations for tagging, dependency parsing and NER.""" - def __init__(self, train, dev, gold_preproc=False, limit=None): + def __init__(self, train_path, dev_path, gold_preproc=True, limit=None): """Create a GoldCorpus. train_path (unicode or Path): File or directory of training data. dev_path (unicode or Path): File or directory of development data. RETURNS (GoldCorpus): The newly created object. """ + self.train_path = util.ensure_path(train_path) + self.dev_path = util.ensure_path(dev_path) self.limit = limit - if isinstance(train, str) or isinstance(train, Path): - train = self.read_tuples(self.walk_corpus(train)) - dev = self.read_tuples(self.walk_corpus(dev)) + self.train_locs = self.walk_corpus(self.train_path) + self.dev_locs = self.walk_corpus(self.dev_path) - # Write temp directory with one doc per file, so we can shuffle - # and stream - self.tmp_dir = Path(tempfile.mkdtemp()) - self.write_msgpack(self.tmp_dir / 'train', train) - self.write_msgpack(self.tmp_dir / 'dev', dev) - - def __del__(self): - shutil.rmtree(self.tmp_dir) - - @staticmethod - def write_msgpack(directory, doc_tuples): - if not directory.exists(): - directory.mkdir() - for i, doc_tuple in enumerate(doc_tuples): - with open(directory / '{}.msg'.format(i), 'wb') as file_: - msgpack.dump([doc_tuple], file_, use_bin_type=True, encoding='utf8') - - @staticmethod - def walk_corpus(path): - path = util.ensure_path(path) - if not path.is_dir(): - return [path] - paths = [path] - locs = [] - seen = set() - for path in paths: - if str(path) in seen: - continue - seen.add(str(path)) - if path.parts[-1].startswith('.'): - continue - elif path.is_dir(): - paths.extend(path.iterdir()) - elif path.parts[-1].endswith('.json'): - locs.append(path) - return locs - - @staticmethod - def read_tuples(locs, limit=0): + @property + def train_tuples(self): i = 0 - for loc in locs: - loc = util.ensure_path(loc) - if loc.parts[-1].endswith('json'): - gold_tuples = read_json_file(loc) - elif loc.parts[-1].endswith('msg'): - with loc.open('rb') as file_: - gold_tuples = msgpack.load(file_, encoding='utf8') - else: - msg = "Cannot read from file: %s. Supported formats: .json, .msg" - raise ValueError(msg % loc) + for loc in self.train_locs: + gold_tuples = read_json_file(loc) for item in gold_tuples: yield item i += len(item[1]) - if limit and i >= limit: + if self.limit and i >= self.limit: break @property def dev_tuples(self): - locs = (self.tmp_dir / 'dev').iterdir() - yield from self.read_tuples(locs, limit=self.limit) - - @property - def train_tuples(self): - locs = (self.tmp_dir / 'train').iterdir() - yield from self.read_tuples(locs, limit=self.limit) + i = 0 + for loc in self.dev_locs: + gold_tuples = read_json_file(loc) + for item in gold_tuples: + yield item + i += len(item[1]) + if self.limit and i >= self.limit: + break def count_train(self): n = 0 i = 0 for raw_text, paragraph_tuples in self.train_tuples: - for sent_tuples, brackets in paragraph_tuples: - n += len(sent_tuples[1]) + n += sum([len(s[0][1]) for s in paragraph_tuples]) if self.limit and i >= self.limit: break i += len(paragraph_tuples) return n - def train_docs(self, nlp, gold_preproc=False, max_length=None, - noise_level=0.0): - locs = list((self.tmp_dir / 'train').iterdir()) - random.shuffle(locs) - train_tuples = self.read_tuples(locs, limit=self.limit) + def train_docs(self, nlp, gold_preproc=False, + projectivize=False, max_length=None, + noise_level=0.0): + train_tuples = self.train_tuples + if projectivize: + train_tuples = nonproj.preprocess_training_data( + self.train_tuples, label_freq_cutoff=100) + random.shuffle(train_tuples) gold_docs = self.iter_gold_docs(nlp, train_tuples, gold_preproc, max_length=max_length, - noise_level=noise_level, - make_projective=True) + noise_level=noise_level) yield from gold_docs def dev_docs(self, nlp, gold_preproc=False): - gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, - gold_preproc=gold_preproc) + gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, gold_preproc) yield from gold_docs @classmethod def iter_gold_docs(cls, nlp, tuples, gold_preproc, max_length=None, - noise_level=0.0, make_projective=False): + noise_level=0.0): for raw_text, paragraph_tuples in tuples: if gold_preproc: raw_text = None @@ -207,7 +220,7 @@ class GoldCorpus(object): paragraph_tuples = merge_sents(paragraph_tuples) docs = cls._make_docs(nlp, raw_text, paragraph_tuples, gold_preproc, noise_level=noise_level) - golds = cls._make_golds(docs, paragraph_tuples, make_projective) + golds = cls._make_golds(docs, paragraph_tuples) for doc, gold in zip(docs, golds): if (not max_length) or len(doc) < max_length: yield doc, gold @@ -224,18 +237,35 @@ class GoldCorpus(object): for (sent_tuples, brackets) in paragraph_tuples] @classmethod - def _make_golds(cls, docs, paragraph_tuples, make_projective): + def _make_golds(cls, docs, paragraph_tuples): assert len(docs) == len(paragraph_tuples) if len(docs) == 1: return [GoldParse.from_annot_tuples(docs[0], - paragraph_tuples[0][0], - make_projective=make_projective)] + paragraph_tuples[0][0])] else: - return [GoldParse.from_annot_tuples(doc, sent_tuples, - make_projective=make_projective) + return [GoldParse.from_annot_tuples(doc, sent_tuples) for doc, (sent_tuples, brackets) in zip(docs, paragraph_tuples)] + @staticmethod + def walk_corpus(path): + if not path.is_dir(): + return [path] + paths = [path] + locs = [] + seen = set() + for path in paths: + if str(path) in seen: + continue + seen.add(str(path)) + if path.parts[-1].startswith('.'): + continue + elif path.is_dir(): + paths.extend(path.iterdir()) + elif path.parts[-1].endswith('.json'): + locs.append(path) + return locs + def add_noise(orig, noise_level): if random.random() >= noise_level: @@ -267,7 +297,11 @@ def read_json_file(loc, docs_filter=None, limit=None): for filename in loc.iterdir(): yield from read_json_file(loc / filename, limit=limit) else: - for doc in _json_iterate(loc): + with loc.open('r', encoding='utf8') as file_: + docs = ujson.load(file_) + if limit is not None: + docs = docs[:limit] + for doc in docs: if docs_filter is not None and not docs_filter(doc): continue paragraphs = [] @@ -297,56 +331,6 @@ def read_json_file(loc, docs_filter=None, limit=None): yield [paragraph.get('raw', None), sents] -def _json_iterate(loc): - # We should've made these files jsonl...But since we didn't, parse out - # the docs one-by-one to reduce memory usage. - # It's okay to read in the whole file -- just don't parse it into JSON. - cdef bytes py_raw - loc = util.ensure_path(loc) - with loc.open('rb') as file_: - py_raw = file_.read() - raw = py_raw - cdef int square_depth = 0 - cdef int curly_depth = 0 - cdef int inside_string = 0 - cdef int escape = 0 - cdef int start = -1 - cdef char c - cdef char quote = ord('"') - cdef char backslash = ord('\\') - cdef char open_square = ord('[') - cdef char close_square = ord(']') - cdef char open_curly = ord('{') - cdef char close_curly = ord('}') - for i in range(len(py_raw)): - c = raw[i] - if c == backslash: - escape = True - continue - if escape: - escape = False - continue - if c == quote: - inside_string = not inside_string - continue - if inside_string: - continue - if c == open_square: - square_depth += 1 - elif c == close_square: - square_depth -= 1 - elif c == open_curly: - if square_depth == 1 and curly_depth == 0: - start = i - curly_depth += 1 - elif c == close_curly: - curly_depth -= 1 - if square_depth == 1 and curly_depth == 0: - py_str = py_raw[start : i+1].decode('utf8') - yield ujson.loads(py_str) - start = -1 - - def iob_to_biluo(tags): out = [] curr_label = None @@ -450,21 +434,8 @@ cdef class GoldParse: self.labels = [None] * len(doc) self.ner = [None] * len(doc) - # This needs to be done before we align the words - if make_projective and heads is not None and deps is not None: - heads, deps = nonproj.projectivize(heads, deps) - - # Do many-to-one alignment for misaligned tokens. - # If we over-segment, we'll have one gold word that covers a sequence - # of predicted words - # If we under-segment, we'll have one predicted word that covers a - # sequence of gold words. - # If we "mis-segment", we'll have a sequence of predicted words covering - # a sequence of gold words. That's many-to-many -- we don't do that. - cost, i2j, j2i, i2j_multi, j2i_multi = align([t.orth_ for t in doc], words) - - self.cand_to_gold = [(j if j >= 0 else None) for j in i2j] - self.gold_to_cand = [(i if i >= 0 else None) for i in j2i] + self.cand_to_gold = align([t.orth_ for t in doc], words) + self.gold_to_cand = align(words, [t.orth_ for t in doc]) annot_tuples = (range(len(words)), words, tags, heads, deps, entities) self.orig_annot = list(zip(*annot_tuples)) @@ -472,47 +443,12 @@ cdef class GoldParse: for i, gold_i in enumerate(self.cand_to_gold): if doc[i].text.isspace(): self.words[i] = doc[i].text - self.tags[i] = '_SP' + self.tags[i] = 'SP' self.heads[i] = None self.labels[i] = None self.ner[i] = 'O' if gold_i is None: - if i in i2j_multi: - self.words[i] = words[i2j_multi[i]] - self.tags[i] = tags[i2j_multi[i]] - is_last = i2j_multi[i] != i2j_multi.get(i+1) - is_first = i2j_multi[i] != i2j_multi.get(i-1) - # Set next word in multi-token span as head, until last - if not is_last: - self.heads[i] = i+1 - self.labels[i] = 'subtok' - else: - self.heads[i] = self.gold_to_cand[heads[i2j_multi[i]]] - self.labels[i] = deps[i2j_multi[i]] - # Now set NER...This is annoying because if we've split - # got an entity word split into two, we need to adjust the - # BILOU tags. We can't have BB or LL etc. - # Case 1: O -- easy. - ner_tag = entities[i2j_multi[i]] - if ner_tag == 'O': - self.ner[i] = 'O' - # Case 2: U. This has to become a B I* L sequence. - elif ner_tag.startswith('U-'): - if is_first: - self.ner[i] = ner_tag.replace('U-', 'B-', 1) - elif is_last: - self.ner[i] = ner_tag.replace('U-', 'L-', 1) - else: - self.ner[i] = ner_tag.replace('U-', 'I-', 1) - # Case 3: L. If not last, change to I. - elif ner_tag.startswith('L-'): - if is_last: - self.ner[i] = ner_tag - else: - self.ner[i] = ner_tag.replace('L-', 'I-', 1) - # Case 4: I. Stays correct - elif ner_tag.startswith('I-'): - self.ner[i] = ner_tag + pass else: self.words[i] = words[gold_i] self.tags[i] = tags[gold_i] @@ -527,6 +463,10 @@ cdef class GoldParse: if cycle is not None: raise Exception("Cycle found: %s" % cycle) + if make_projective: + proj_heads, _ = nonproj.projectivize(self.heads, self.labels) + self.heads = proj_heads + def __len__(self): """Get the number of gold-standard tokens. diff --git a/spacy/lang/en/stop_words.py b/spacy/lang/en/stop_words.py index 0aa9ebb55..394731ff1 100644 --- a/spacy/lang/en/stop_words.py +++ b/spacy/lang/en/stop_words.py @@ -39,7 +39,7 @@ made make many may me meanwhile might mine more moreover most mostly move much must my myself name namely neither never nevertheless next nine no nobody none noone nor not -nothing now nowhere n't +nothing now nowhere of off often on once one only onto or other others otherwise our ours ourselves out over own @@ -66,6 +66,4 @@ whereafter whereas whereby wherein whereupon wherever whether which while whither who whoever whole whom whose why will with within without would yet you your yours yourself yourselves - -'d 'll 'm 're 's 've """.split()) diff --git a/spacy/lang/es/syntax_iterators.py b/spacy/lang/es/syntax_iterators.py index b81d1fab0..c414897a0 100644 --- a/spacy/lang/es/syntax_iterators.py +++ b/spacy/lang/es/syntax_iterators.py @@ -6,19 +6,17 @@ from ...symbols import NOUN, PROPN, PRON, VERB, AUX def noun_chunks(obj): doc = obj.doc - if not len(doc): - return - np_label = doc.vocab.strings.add('NP') + np_label = doc.vocab.strings['NP'] left_labels = ['det', 'fixed', 'neg'] #['nunmod', 'det', 'appos', 'fixed'] right_labels = ['flat', 'fixed', 'compound', 'neg'] stop_labels = ['punct'] - np_left_deps = [doc.vocab.strings.add(label) for label in left_labels] - np_right_deps = [doc.vocab.strings.add(label) for label in right_labels] - stop_deps = [doc.vocab.strings.add(label) for label in stop_labels] + np_left_deps = [doc.vocab.strings[label] for label in left_labels] + np_right_deps = [doc.vocab.strings[label] for label in right_labels] + stop_deps = [doc.vocab.strings[label] for label in stop_labels] token = doc[0] while token and token.i < len(doc): if token.pos in [PROPN, NOUN, PRON]: - left, right = noun_bounds(doc, token, np_left_deps, np_right_deps, stop_deps) + left, right = noun_bounds(token) yield left.i, right.i+1, np_label token = right token = next_token(token) @@ -35,7 +33,7 @@ def next_token(token): return None -def noun_bounds(doc, root, np_left_deps, np_right_deps, stop_deps): +def noun_bounds(root): left_bound = root for token in reversed(list(root.lefts)): if token.dep in np_left_deps: @@ -43,7 +41,7 @@ def noun_bounds(doc, root, np_left_deps, np_right_deps, stop_deps): right_bound = root for token in root.rights: if (token.dep in np_right_deps): - left, right = noun_bounds(doc, token, np_left_deps, np_right_deps, stop_deps) + left, right = noun_bounds(token) if list(filter(lambda t: is_verb_token(t) or t.dep in stop_deps, doc[left_bound.i: right.i])): break diff --git a/spacy/lang/fi/examples.py b/spacy/lang/fi/examples.py deleted file mode 100644 index 2b1445619..000000000 --- a/spacy/lang/fi/examples.py +++ /dev/null @@ -1,15 +0,0 @@ -# coding: utf8 -from __future__ import unicode_literals - -""" -Example sentences to test spaCy and its language models. ->>> from spacy.lang.fi.examples import sentences ->>> docs = nlp.pipe(sentences) -""" - -sentences = [ - "Apple harkitsee ostavansa startup-yrityksen UK:sta 1 miljardilla dollarilla.", - "Itseajavat autot siirtävät vakuutusriskin valmistajille.", - "San Francisco harkitsee jakelurobottien kieltämistä jalkakäytävillä.", - "Lontoo on iso kaupunki Iso-Britanniassa." -] diff --git a/spacy/lang/fi/lex_attrs.py b/spacy/lang/fi/lex_attrs.py deleted file mode 100644 index ce63c47b8..000000000 --- a/spacy/lang/fi/lex_attrs.py +++ /dev/null @@ -1,26 +0,0 @@ -# coding: utf8 -from __future__ import unicode_literals - -# import the symbols for the attrs you want to overwrite -from ...attrs import LIKE_NUM - -# check if token resembles a number - -_num_words = ['nolla', 'yksi', 'kaksi', 'kolme', 'neljä', 'viisi', 'kuusi', 'seitsemän', 'kahdeksan', 'yhdeksän', 'kymmenen', 'yksitoista', 'kaksitoista', 'kolmetoista' 'neljätoista', 'viisitoista', 'kuusitoista', 'seitsemäntoista', 'kahdeksantoista', 'yhdeksäntoista', 'kaksikymmentä', 'kolmekymmentä', 'neljäkymmentä', 'viisikymmentä', 'kuusikymmentä'v, 'seitsemänkymmentä', 'kahdeksankymmentä', 'yhdeksänkymmentä', 'sata', 'tuhat', 'miljoona', 'miljardi', 'triljoona'] - - -def like_num(text): - text = text.replace('.', '').replace(',', '') - if text.isdigit(): - return True - if text.count('/') == 1: - num, denom = text.split('/') - if num.isdigit() and denom.isdigit(): - return True - if text in _num_words: - return True - return False - -LEX_ATTRS = { - LIKE_NUM: like_num -} diff --git a/spacy/lang/fi/stop_words.py b/spacy/lang/fi/stop_words.py index 302320596..c43dc4404 100644 --- a/spacy/lang/fi/stop_words.py +++ b/spacy/lang/fi/stop_words.py @@ -79,7 +79,7 @@ pienestä pieni pienin poikki puolesta puolestaan päälle runsaasti -saakka sama samaa samaan samalla saman samat samoin satojen se +saakka sama samaa samaan samalla saman samat samoin sata sataa satojen se seitsemän sekä sen seuraavat siellä sieltä siihen siinä siis siitä sijaan siksi sille silloin sillä silti siltä sinne sinua sinulla sinulle sinulta sinun sinussa sinusta sinut sinuun sinä sisäkkäin sisällä siten sitten sitä ssa sta @@ -89,7 +89,7 @@ taa taas taemmas tahansa tai takaa takaisin takana takia tallä tapauksessa tarpeeksi tavalla tavoitteena te teidän teidät teihin teille teillä teiltä teissä teistä teitä tietysti todella toinen toisaalla toisaalle toisaalta toiseen toiseksi toisella toiselle toiselta toisemme toisen toisensa toisessa -toisesta toista toistaiseksi toki tosin tule tulee tulemme tulen +toisesta toista toistaiseksi toki tosin tuhannen tuhat tule tulee tulemme tulen tulet tulette tulevat tulimme tulin tulisi tulisimme tulisin tulisit tulisitte tulisivat tulit tulitte tulivat tulla tulleet tullut tuntuu tuo tuohon tuoksi tuolla tuolle tuolloin tuolta tuon tuona tuonne tuossa tuosta tuota tuskin tykö diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py index 8231b0be3..3b67c5489 100644 --- a/spacy/lang/ja/__init__.py +++ b/spacy/lang/ja/__init__.py @@ -35,32 +35,14 @@ class JapaneseTokenizer(object): def from_disk(self, path, **exclude): return self -class JapaneseCharacterSegmenter(object): - def __init__(self, vocab): - self.vocab = vocab - - def __call__(self, text): - words = [] - spaces = [] - doc = self.tokenizer(text) - for token in self.tokenizer(text): - words.extend(list(token.text)) - spaces.extend([False]*len(token.text)) - spaces[-1] = bool(token.whitespace_) - return Doc(self.vocab, words=words, spaces=spaces) - class JapaneseDefaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters[LANG] = lambda text: 'ja' - use_janome = True @classmethod def create_tokenizer(cls, nlp=None): - if cls.use_janome: - return JapaneseTokenizer(cls, nlp) - else: - return JapaneseCharacterSegmenter(cls, nlp.vocab) + return JapaneseTokenizer(cls, nlp) class Japanese(Language): diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py index 009b6c3c3..f1279f035 100644 --- a/spacy/lang/lex_attrs.py +++ b/spacy/lang/lex_attrs.py @@ -144,7 +144,7 @@ def is_lower(string): return string.islower() def is_space(string): return string.isspace() def is_title(string): return string.istitle() def is_upper(string): return string.isupper() -def is_stop(string, stops=set()): return string.lower() in stops +def is_stop(string, stops=set()): return string in stops def is_oov(string): return True def get_prob(string): return -20. diff --git a/spacy/lang/pl/__init__.py b/spacy/lang/pl/__init__.py index c678d25e5..80011f9d8 100644 --- a/spacy/lang/pl/__init__.py +++ b/spacy/lang/pl/__init__.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS -from .tag_map import TAG_MAP from .stop_words import STOP_WORDS from ..tokenizer_exceptions import BASE_EXCEPTIONS @@ -18,7 +17,6 @@ class PolishDefaults(Language.Defaults): lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) stop_words = STOP_WORDS - tag_map = TAG_MAP class Polish(Language): diff --git a/spacy/lang/pl/tag_map.py b/spacy/lang/pl/tag_map.py deleted file mode 100644 index 80b818f47..000000000 --- a/spacy/lang/pl/tag_map.py +++ /dev/null @@ -1,1628 +0,0 @@ -# coding: utf8 -from __future__ import unicode_literals -from ...symbols import POS, ADJ, ADP, ADV, AUX, CCONJ, DET, INTJ, NOUN, NUM, PART, PRON, PROPN, PUNCT, SCONJ, VERB, X - -TAG_MAP = { - "adja": {POS: ADJ}, - "adjc": {POS: ADJ}, - "adjp": {POS: ADJ, "PrepCase": "pre"}, - "adj:pl:acc:m1.p1:com": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc", "Degree": "cmp"}, - "adj:pl:acc:m1.p1:pos": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc", "Degree": "pos"}, - "adj:pl:acc:m1.p1:sup": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc", "Degree": "sup"}, - "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Degree": "cmp"}, - "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Degree": "pos"}, - "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Degree": "sup"}, - "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Degree": "cmp"}, - "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Degree": "pos"}, - "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Degree": "sup"}, - "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "gen", "Gender": "masc|fem|neut", "Degree": "cmp"}, - "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "gen", "Gender": "masc|fem|neut", "Degree": "pos"}, - "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "gen", "Gender": "masc|fem|neut", "Degree": "sup"}, - "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Degree": "cmp"}, - "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Degree": "pos"}, - "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Degree": "sup"}, - "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "loc", "Gender": "masc|fem|neut", "Degree": "cmp"}, - "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "loc", "Gender": "masc|fem|neut", "Degree": "pos"}, - "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "loc", "Gender": "masc|fem|neut", "Degree": "sup"}, - "adj:pl:nom:m1.p1:pos": {POS: ADJ, "Number": "plur", "Case": "nom", "Gender": "masc", "Degree": "pos"}, - "adj:pl:nom:m2.m3.f.n1.n2.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "nom", "Gender": "masc|fem|neut", "Degree": "pos"}, - "adj:pl:nom.voc:m1.p1:com": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Degree": "cmp"}, - "adj:pl:nom.voc:m1.p1:pos": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Degree": "pos"}, - "adj:pl:nom.voc:m1.p1:sup": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Degree": "sup"}, - "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc|fem|neut", "Degree": "cmp"}, - "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc|fem|neut", "Degree": "pos"}, - "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc|fem|neut", "Degree": "sup"}, - "adj:sg:acc:f:com": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "fem", "Degree": "cmp"}, - "adj:sg:acc:f:pos": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "fem", "Degree": "pos"}, - "adj:sg:acc:f:sup": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "fem", "Degree": "sup"}, - "adj:sg:acc:m1.m2:com": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Degree": "cmp"}, - "adj:sg:acc:m1.m2:pos": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Degree": "pos"}, - "adj:sg:acc:m1.m2:sup": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Degree": "sup"}, - "adj:sg:acc:m3:com": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Degree": "cmp"}, - "adj:sg:acc:m3:pos": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Degree": "pos"}, - "adj:sg:acc:m3:sup": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Degree": "sup"}, - "adj:sg:acc:n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "neut", "Degree": "cmp"}, - "adj:sg:acc:n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "neut", "Degree": "pos"}, - "adj:sg:acc:n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "neut", "Degree": "sup"}, - "adj:sg:dat:f:com": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "fem", "Degree": "cmp"}, - "adj:sg:dat:f:pos": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "fem", "Degree": "pos"}, - "adj:sg:dat:f:sup": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "fem", "Degree": "sup"}, - "adj:sg:dat:m1.m2.m3.n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Degree": "cmp"}, - "adj:sg:dat:m1.m2.m3.n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Degree": "pos"}, - "adj:sg:dat:m1.m2.m3.n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Degree": "sup"}, - "adj:sg:gen:f:com": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "fem", "Degree": "cmp"}, - "adj:sg:gen:f:pos": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "fem", "Degree": "pos"}, - "adj:sg:gen:f:sup": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "fem", "Degree": "sup"}, - "adj:sg:gen:m1.m2.m3.n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Degree": "cmp"}, - "adj:sg:gen:m1.m2.m3.n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Degree": "pos"}, - "adj:sg:gen:m1.m2.m3.n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Degree": "sup"}, - "adj:sg:inst:f:com": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "fem", "Degree": "cmp"}, - "adj:sg:inst:f:pos": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "fem", "Degree": "pos"}, - "adj:sg:inst:f:sup": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "fem", "Degree": "sup"}, - "adj:sg:inst:m1.m2.m3.n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "masc|neut", "Degree": "cmp"}, - "adj:sg:inst:m1.m2.m3.n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "masc|neut", "Degree": "pos"}, - "adj:sg:inst:m1.m2.m3.n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "masc|neut", "Degree": "sup"}, - "adj:sg:loc:f:com": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "fem", "Degree": "cmp"}, - "adj:sg:loc:f:pos": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "fem", "Degree": "pos"}, - "adj:sg:loc:f:sup": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "fem", "Degree": "sup"}, - "adj:sg:loc:m1.m2.m3.n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "masc|neut", "Degree": "cmp"}, - "adj:sg:loc:m1.m2.m3.n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "masc|neut", "Degree": "pos"}, - "adj:sg:loc:m1.m2.m3.n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "masc|neut", "Degree": "sup"}, - "adj:sg:nom:f:pos": {POS: ADJ, "Number": "sing", "Case": "nom", "Gender": "fem", "Degree": "pos"}, - "adj:sg:nom:m1.m2.m3:pos": {POS: ADJ, "Number": "sing", "Case": "nom", "Gender": "Masc", "Degree": "pos"}, - "adj:sg:nom:n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "nom", "Gender": "neut", "Degree": "pos"}, - "adj:sg:nom.voc:f:com": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Degree": "cmp"}, - "adj:sg:nom.voc:f:pos": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Degree": "pos"}, - "adj:sg:nom.voc:f:sup": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Degree": "sup"}, - "adj:sg:nom.voc:m1.m2.m3:com": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Degree": "cmp"}, - "adj:sg:nom.voc:m1.m2.m3:pos": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Degree": "pos"}, - "adj:sg:nom.voc:m1.m2.m3:sup": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Degree": "sup"}, - "adj:sg:nom.voc:n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "neut", "Degree": "cmp"}, - "adj:sg:nom.voc:n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "neut", "Degree": "pos"}, - "adj:sg:nom.voc:n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "neut", "Degree": "sup"}, - "adv": {POS: ADV}, - "adv:com": {POS: ADV, "Degree": "cmp"}, - "adv:pos": {POS: ADV, "Degree": "pos"}, - "adv:sup": {POS: ADV, "Degree": "sup"}, - "aglt:pl:pri:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": 1, "Aspect": "imp", "Variant": "short"}, - "aglt:pl:pri:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": 1, "Aspect": "imp", "Variant": "long"}, - "aglt:pl:sec:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": 2, "Aspect": "imp", "Variant": "short"}, - "aglt:pl:sec:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": 2, "Aspect": "imp", "Variant": "long"}, - "aglt:sg:pri:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 1, "Aspect": "imp", "Variant": "short"}, - "aglt:sg:pri:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 1, "Aspect": "imp", "Variant": "long"}, - "aglt:sg:sec:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 2, "Aspect": "imp", "Variant": "short"}, - "aglt:sg:sec:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 2, "Aspect": "imp", "Variant": "long"}, - "bedzie:pl:pri:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": 1, "Aspect": "imp"}, - "bedzie:pl:sec:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": 2, "Aspect": "imp"}, - "bedzie:pl:ter:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": 3, "Aspect": "imp"}, - "bedzie:sg:pri:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": 1, "Aspect": "imp"}, - "bedzie:sg:sec:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": 2, "Aspect": "imp"}, - "bedzie:sg:ter:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": 3, "Aspect": "imp"}, - "burk": {POS: X}, - "comp": {POS: SCONJ}, - "conj": {POS: CCONJ}, - "depr:pl:nom:m2": {POS: NOUN, "Animacy": "anim", "Number": "plur", "Case": "nom", "Gender": "masc", "Animacy": "anim"}, - "depr:pl:voc:m2": {POS: NOUN, "Animacy": "anim", "Number": "plur", "Case": "voc", "Gender": "masc", "Animacy": "anim"}, - "fin:pl:pri:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 1, "Aspect": "imp"}, - "fin:pl:pri:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 1, "Aspect": "imp|perf"}, - "fin:pl:pri:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 1, "Aspect": "perf"}, - "fin:pl:sec:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 2, "Aspect": "imp"}, - "fin:pl:sec:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 2, "Aspect": "imp|perf"}, - "fin:pl:sec:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 2, "Aspect": "perf"}, - "fin:pl:ter:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 3, "Aspect": "imp"}, - "fin:pl:ter:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 3, "Aspect": "imp|perf"}, - "fin:pl:ter:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": 3, "Aspect": "perf"}, - "fin:sg:pri:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 1, "Aspect": "imp"}, - "fin:sg:pri:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 1, "Aspect": "imp|perf"}, - "fin:sg:pri:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 1, "Aspect": "perf"}, - "fin:sg:sec:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 2, "Aspect": "imp"}, - "fin:sg:sec:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 2, "Aspect": "imp|perf"}, - "fin:sg:sec:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 2, "Aspect": "perf"}, - "fin:sg:ter:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 3, "Aspect": "imp"}, - "fin:sg:ter:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 3, "Aspect": "imp|perf"}, - "fin:sg:ter:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": 3, "Aspect": "perf"}, - "ger:sg:dat.loc:n2:imperf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"}, - "ger:sg:dat.loc:n2:imperf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"}, - "ger:sg:dat.loc:n2:imperf.perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ger:sg:dat.loc:n2:imperf.perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ger:sg:dat.loc:n2:perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"}, - "ger:sg:dat.loc:n2:perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"}, - "ger:sg:gen:n2:imperf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"}, - "ger:sg:gen:n2:imperf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"}, - "ger:sg:gen:n2:imperf.perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ger:sg:gen:n2:imperf.perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ger:sg:gen:n2:perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"}, - "ger:sg:gen:n2:perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"}, - "ger:sg:inst:n2:imperf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"}, - "ger:sg:inst:n2:imperf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"}, - "ger:sg:inst:n2:imperf.perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ger:sg:inst:n2:imperf.perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ger:sg:inst:n2:perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"}, - "ger:sg:inst:n2:perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"}, - "ger:sg:nom.acc:n2:imperf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"}, - "ger:sg:nom.acc:n2:imperf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"}, - "ger:sg:nom.acc:n2:imperf.perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ger:sg:nom.acc:n2:imperf.perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ger:sg:nom.acc:n2:perf:aff": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"}, - "ger:sg:nom.acc:n2:perf:neg": {POS: VERB, "VerbForm": "vnoun", "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"}, - "imps:imperf": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Aspect": "imp"}, - "imps:imperf.perf": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Aspect": "imp|perf"}, - "imps:perf": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Aspect": "perf"}, - "impt:pl:pri:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 1, "Aspect": "imp"}, - "impt:pl:pri:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 1, "Aspect": "imp|perf"}, - "impt:pl:pri:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 1, "Aspect": "perf"}, - "impt:pl:sec:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 2, "Aspect": "imp"}, - "impt:pl:sec:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 2, "Aspect": "imp|perf"}, - "impt:pl:sec:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": 2, "Aspect": "perf"}, - "impt:sg:sec:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": 2, "Aspect": "imp"}, - "impt:sg:sec:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": 2, "Aspect": "imp|perf"}, - "impt:sg:sec:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": 2, "Aspect": "perf"}, - "inf:imperf": {POS: VERB, "VerbForm": "inf", "Aspect": "imp"}, - "inf:imperf.perf": {POS: VERB, "VerbForm": "inf", "Aspect": "imp|perf"}, - "inf:perf": {POS: VERB, "VerbForm": "inf", "Aspect": "perf"}, - "interj": {POS: INTJ}, - "num:comp": {POS: NUM}, - "num:pl:acc:m1:rec": {POS: NUM, "Number": "plur", "Case": "acc", "Gender": "Masc", "Animacy": "hum"}, - "num:pl:dat.loc:n1.p1.p2:congr.rec": {POS: NUM, "Number": "plur", "Case": "dat|loc", "Gender": "neut"}, - "num:pl:dat:m1.m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut"}, - "num:pl:gen.dat.inst.loc:m1.m2.m3.f.n1.n2.p1.p2:congr": {POS: NUM, "Number": "plur", "Case": "gen|dat|ins|loc", "Gender": "masc|fem|neut"}, - "num:pl:gen.dat.inst.loc:m1.m2.m3.f.n2:congr": {POS: NUM, "Number": "plur", "Case": "gen|dat|ins|loc", "Gender": "masc|fem|neut"}, - "num:pl:gen.dat.loc:m1.m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "gen|dat|loc", "Gender": "masc|fem|neut"}, - "num:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2:congr": {POS: NUM, "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut"}, - "num:pl:gen.loc:m1.m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut"}, - "num:pl:gen:n1.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "gen", "Gender": "neut"}, - "num:pl:inst:f:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "fem"}, - "num:pl:inst:m1.m2.m3.f.n1.n2.p1.p2:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut"}, - "num:pl:inst:m1.m2.m3.f.n2:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut"}, - "num:pl:inst:m1.m2.m3.n2:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "masc|neut"}, - "num:pl:inst:m1.m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut"}, - "num:pl:inst:n1.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "neut"}, - "num:pl:nom.acc:m1.m2.m3.f.n1.n2.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc", "Gender": "masc|fem|neut"}, - "num:pl:nom.acc.voc:f:congr": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "fem"}, - "num:pl:nom.acc.voc:m1:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "Masc", "Animacy": "hum"}, - "num:pl:nom.acc.voc:m2.m3.f.n1.n2.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut"}, - "num:pl:nom.acc.voc:m2.m3.f.n2:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut"}, - "num:pl:nom.acc.voc:m2.m3.n2:congr": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|neut"}, - "num:pl:nom.acc.voc:m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut"}, - "num:pl:nom.acc.voc:n1.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "neut"}, - "num:pl:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.f.n1.n2.p1.p2:rec": {POS: NUM, "Number": "plur", "Gender": "masc|fem|neut"}, - "num:pl:nom.voc:m1:congr": {POS: NUM, "Number": "plur", "Case": "nom|voc", "Gender": "Masc", "Animacy": "hum"}, - "num:pl:nom.voc:m1:rec": {POS: NUM, "Number": "plur", "Case": "nom|voc", "Gender": "Masc", "Animacy": "hum"}, - "num:sg:nom.gen.dat.inst.acc.loc.voc:f:rec": {POS: NUM, "Number": "sing", "Gender": "fem"}, - "num:sg:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.n1.n2:rec": {POS: NUM, "Number": "sing", "Gender": "masc|neut"}, - "pact:pl:acc:m1.p1:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp", "Polarity": "pos"}, - "pact:pl:acc:m1.p1:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp", "Polarity": "neg"}, - "pact:pl:acc:m1.p1:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:pl:acc:m1.p1:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"}, - "pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"}, - "pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"}, - "pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"}, - "pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"}, - "pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"}, - "pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"}, - "pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"}, - "pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:pl:nom.voc:m1.p1:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp", "Polarity": "pos"}, - "pact:pl:nom.voc:m1.p1:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp", "Polarity": "neg"}, - "pact:pl:nom.voc:m1.p1:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:pl:nom.voc:m1.p1:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:acc.inst:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:acc.inst:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:acc.inst:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:acc.inst:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:acc:m1.m2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:acc:m1.m2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:acc:m1.m2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:acc:m1.m2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:acc:m3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:acc:m3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:acc:m3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:acc:m3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:dat:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:gen.dat.loc:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:gen.dat.loc:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:gen.dat.loc:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:gen.dat.loc:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:gen:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:nom.acc.voc:n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:nom.acc.voc:n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:nom.acc.voc:n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:nom.acc.voc:n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:nom.voc:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:nom.voc:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:nom.voc:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:nom.voc:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"}, - "pact:sg:nom.voc:m1.m2.m3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp", "Polarity": "pos"}, - "pact:sg:nom.voc:m1.m2.m3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp", "Polarity": "neg"}, - "pact:sg:nom.voc:m1.m2.m3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp|perf", "Polarity": "pos"}, - "pact:sg:nom.voc:m1.m2.m3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp|perf", "Polarity": "neg"}, - "pant:perf": {POS: VERB, "Tense": "past", "VerbForm": "conv", "Aspect": "perf"}, - "pcon:imperf": {POS: VERB, "Tense": "pres", "VerbForm": "conv", "Aspect": "imp"}, - "ppas:pl:acc:m1.p1:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp", "Polarity": "pos"}, - "ppas:pl:acc:m1.p1:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp", "Polarity": "neg"}, - "ppas:pl:acc:m1.p1:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:pl:acc:m1.p1:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:pl:acc:m1.p1:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "perf", "Polarity": "pos"}, - "ppas:pl:acc:m1.p1:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "perf", "Polarity": "neg"}, - "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"}, - "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"}, - "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "pos"}, - "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "neg"}, - "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"}, - "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"}, - "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "pos"}, - "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "neg"}, - "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"}, - "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"}, - "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "pos"}, - "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "neg"}, - "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"}, - "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"}, - "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "pos"}, - "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "neg"}, - "ppas:pl:nom.voc:m1.p1:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp", "Polarity": "pos"}, - "ppas:pl:nom.voc:m1.p1:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp", "Polarity": "neg"}, - "ppas:pl:nom.voc:m1.p1:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:pl:nom.voc:m1.p1:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:pl:nom.voc:m1.p1:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "perf", "Polarity": "pos"}, - "ppas:pl:nom.voc:m1.p1:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:acc.inst:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:acc.inst:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:acc.inst:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:acc.inst:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:acc.inst:f:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:acc.inst:f:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:acc:m1.m2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:acc:m1.m2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:acc:m1.m2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:acc:m1.m2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:acc:m1.m2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:acc:m1.m2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:acc:m3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:acc:m3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:acc:m3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:acc:m3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:acc:m3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:acc:m3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:dat:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:dat:m1.m2.m3.n1.n2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:dat:m1.m2.m3.n1.n2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:gen.dat.loc:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:gen.dat.loc:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:gen.dat.loc:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:gen.dat.loc:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:gen.dat.loc:f:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:gen.dat.loc:f:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:gen:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:gen:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:gen:m1.m2.m3.n1.n2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:gen:m1.m2.m3.n1.n2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:nom.acc.voc:n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:nom.acc.voc:n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:nom.acc.voc:n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:nom.acc.voc:n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:nom.acc.voc:n1.n2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:nom.acc.voc:n1.n2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:nom.voc:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:nom.voc:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:nom.voc:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:nom.voc:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:nom.voc:f:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:nom.voc:f:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "perf", "Polarity": "neg"}, - "ppas:sg:nom.voc:m1.m2.m3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp", "Polarity": "pos"}, - "ppas:sg:nom.voc:m1.m2.m3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp", "Polarity": "neg"}, - "ppas:sg:nom.voc:m1.m2.m3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp|perf", "Polarity": "pos"}, - "ppas:sg:nom.voc:m1.m2.m3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp|perf", "Polarity": "neg"}, - "ppas:sg:nom.voc:m1.m2.m3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "perf", "Polarity": "pos"}, - "ppas:sg:nom.voc:m1.m2.m3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "perf", "Polarity": "neg"}, - "ppron12:pl:acc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "acc", "Person": 1}, - "ppron12:pl:acc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "acc", "Person": 2}, - "ppron12:pl:dat:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "dat", "Person": 1}, - "ppron12:pl:dat:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "dat", "Person": 2}, - "ppron12:pl:gen:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "gen", "Person": 1}, - "ppron12:pl:gen:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "gen", "Person": 2}, - "ppron12:pl:inst:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "ins", "Person": 1}, - "ppron12:pl:inst:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "ins", "Person": 2}, - "ppron12:pl:loc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "loc", "Person": 1}, - "ppron12:pl:loc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "loc", "Person": 2}, - "ppron12:pl:nom:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "nom", "Person": 1}, - "ppron12:pl:nom:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "nom", "Person": 2}, - "ppron12:pl:voc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "voc", "Person": 1}, - "ppron12:pl:voc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "voc", "Person": 2}, - "ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": 1, "Variant": "long"}, - "ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": 1, "Variant": "short"}, - "ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": 2, "Variant": "long"}, - "ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": 2, "Variant": "short"}, - "ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": 1, "Variant": "long"}, - "ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": 1, "Variant": "short"}, - "ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": 2, "Variant": "long"}, - "ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": 2, "Variant": "short"}, - "ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": 1, "Variant": "long"}, - "ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": 1, "Variant": "short"}, - "ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": 2, "Variant": "long"}, - "ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": 2, "Variant": "short"}, - "ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "ins", "Gender": "masc|fem|neut", "Person": 1}, - "ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "ins", "Gender": "masc|fem|neut", "Person": 2}, - "ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "loc", "Gender": "masc|fem|neut", "Person": 1}, - "ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "loc", "Gender": "masc|fem|neut", "Person": 2}, - "ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "nom", "Gender": "masc|fem|neut", "Person": 1}, - "ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "nom", "Gender": "masc|fem|neut", "Person": 2}, - "ppron12:sg:voc:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "voc", "Gender": "masc|fem|neut", "Person": 1}, - "ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "voc", "Gender": "masc|fem|neut", "Person": 2}, - "ppron3:pl:acc:m1.p1:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "acc", "Gender": "masc", "Person": 3, "PrepCase": "npr"}, - "ppron3:pl:acc:m1.p1:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "acc", "Gender": "masc", "Person": 3, "PrepCase": "pre"}, - "ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Person": 3, "PrepCase": "npr"}, - "ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Person": 3, "PrepCase": "pre"}, - "ppron3:pl:dat:_:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "dat", "Person": 3, "PrepCase": "npr"}, - "ppron3:pl:dat:_:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "dat", "Person": 3, "PrepCase": "pre"}, - "ppron3:pl:gen:_:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "gen", "Person": 3, "PrepCase": "npr"}, - "ppron3:pl:gen:_:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "gen", "Person": 3, "PrepCase": "pre"}, - "ppron3:pl:inst:_:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "ins", "Person": 3}, - "ppron3:pl:loc:_:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "loc", "Person": 3}, - "ppron3:pl:nom:m1.p1:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "nom", "Gender": "masc", "Person": 3}, - "ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "plur", "Case": "nom", "Gender": "masc|fem|neut", "Person": 3}, - "ppron3:sg:acc:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "fem", "Person": 3, "PrepCase": "npr"}, - "ppron3:sg:acc:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "fem", "Person": 3, "PrepCase": "pre"}, - "ppron3:sg:acc:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "npr"}, - "ppron3:sg:acc:m1.m2.m3:ter:akc:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "pre"}, - "ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "npr"}, - "ppron3:sg:acc:m1.m2.m3:ter:nakc:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "pre"}, - "ppron3:sg:acc:n1.n2:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "neut", "Person": 3, "PrepCase": "npr"}, - "ppron3:sg:acc:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "acc", "Gender": "neut", "Person": 3, "PrepCase": "pre"}, - "ppron3:sg:dat:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "fem", "Person": 3, "PrepCase": "npr"}, - "ppron3:sg:dat:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "fem", "Person": 3, "PrepCase": "pre"}, - "ppron3:sg:dat:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "npr"}, - "ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "npr"}, - "ppron3:sg:dat:m1.m2.m3:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": 3, "PrepCase": "pre"}, - "ppron3:sg:dat:n1.n2:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "neut", "Person": 3, "Variant": "long", "PrepCase": "npr"}, - "ppron3:sg:dat:n1.n2:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "neut", "Person": 3, "Variant": "short", "PrepCase": "npr"}, - "ppron3:sg:dat:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "dat", "Gender": "neut", "Person": 3, "PrepCase": "pre"}, - "ppron3:sg:gen:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "fem", "Person": 3, "PrepCase": "npr"}, - "ppron3:sg:gen:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "fem", "Person": 3, "PrepCase": "pre"}, - "ppron3:sg:gen:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "npr"}, - "ppron3:sg:gen:m1.m2.m3:ter:akc:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": 3, "Variant": "long", "PrepCase": "pre"}, - "ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "npr"}, - "ppron3:sg:gen:m1.m2.m3:ter:nakc:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": 3, "Variant": "short", "PrepCase": "pre"}, - "ppron3:sg:gen:n1.n2:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "neut", "Person": 3, "Variant": "long", "PrepCase": "npr"}, - "ppron3:sg:gen:n1.n2:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "neut", "Person": 3, "Variant": "short", "PrepCase": "npr"}, - "ppron3:sg:gen:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "gen", "Gender": "neut", "Person": 3, "PrepCase": "pre"}, - "ppron3:sg:inst:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "ins", "Gender": "fem", "Person": 3, "PrepCase": "pre"}, - "ppron3:sg:inst:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "ins", "Gender": "Masc", "Person": 3}, - "ppron3:sg:inst:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "ins", "Gender": "neut", "Person": 3}, - "ppron3:sg:loc:f:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "loc", "Gender": "fem", "Person": 3}, - "ppron3:sg:loc:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "loc", "Gender": "Masc", "Person": 3}, - "ppron3:sg:loc:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "loc", "Gender": "neut", "Person": 3}, - "ppron3:sg:nom:f:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "nom", "Gender": "fem", "Person": 3}, - "ppron3:sg:nom:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "nom", "Gender": "Masc", "Person": 3}, - "ppron3:sg:nom:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": 3, "Number": "sing", "Case": "nom", "Gender": "neut", "Person": 3}, - "praet:pl:m1.p1:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc", "Aspect": "imp"}, - "praet:pl:m1.p1:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc", "Aspect": "imp|perf"}, - "praet:pl:m1.p1:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc", "Aspect": "perf"}, - "praet:pl:m2.m3.f.n1.n2.p2.p3:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc|fem|neut", "Aspect": "imp"}, - "praet:pl:m2.m3.f.n1.n2.p2.p3:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc|fem|neut", "Aspect": "imp|perf"}, - "praet:pl:m2.m3.f.n1.n2.p2.p3:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc|fem|neut", "Aspect": "perf"}, - "praet:sg:f:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "fem", "Aspect": "imp"}, - "praet:sg:f:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "fem", "Aspect": "imp|perf"}, - "praet:sg:f:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "fem", "Aspect": "perf"}, - "praet:sg:m1.m2.m3:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "imp"}, - "praet:sg:m1.m2.m3:imperf:agl": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "imp"}, - "praet:sg:m1.m2.m3:imperf:nagl": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "imp"}, - "praet:sg:m1.m2.m3:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "imp|perf"}, - "praet:sg:m1.m2.m3:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "perf"}, - "praet:sg:m1.m2.m3:perf:agl": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "perf"}, - "praet:sg:m1.m2.m3:perf:nagl": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "perf"}, - "praet:sg:n1.n2:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "neut", "Aspect": "imp"}, - "praet:sg:n1.n2:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "neut", "Aspect": "imp|perf"}, - "praet:sg:n1.n2:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "neut", "Aspect": "perf"}, - "pred": {POS: VERB}, - "prep:acc": {POS: ADP, "AdpType": "prep", "Case": "acc"}, - "prep:acc:nwok": {POS: ADP, "AdpType": "prep", "Case": "acc", "Variant": "short"}, - "prep:acc:wok": {POS: ADP, "AdpType": "prep", "Case": "acc", "Variant": "long"}, - "prep:dat": {POS: ADP, "AdpType": "prep", "Case": "dat"}, - "prep:gen": {POS: ADP, "AdpType": "prep", "Case": "gen"}, - "prep:gen:nwok": {POS: ADP, "AdpType": "prep", "Case": "gen", "Variant": "short"}, - "prep:gen:wok": {POS: ADP, "AdpType": "prep", "Case": "gen", "Variant": "long"}, - "prep:inst": {POS: ADP, "AdpType": "prep", "Case": "ins"}, - "prep:inst:nwok": {POS: ADP, "AdpType": "prep", "Case": "ins", "Variant": "short"}, - "prep:inst:wok": {POS: ADP, "AdpType": "prep", "Case": "ins", "Variant": "long"}, - "prep:loc": {POS: ADP, "AdpType": "prep", "Case": "loc"}, - "prep:loc:nwok": {POS: ADP, "AdpType": "prep", "Case": "loc", "Variant": "short"}, - "prep:loc:wok": {POS: ADP, "AdpType": "prep", "Case": "loc", "Variant": "long"}, - "prep:nom": {POS: ADP, "AdpType": "prep", "Case": "nom"}, - "qub": {POS: PART}, - "subst:pl:acc:f": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "fem"}, - "subst:pl:acc:m1": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "Masc", "Animacy": "hum"}, - "subst:pl:acc:m2": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "masc", "Animacy": "anim"}, - "subst:pl:acc:m3": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "masc", "Animacy": "inan"}, - "subst:pl:acc:n1": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "neut"}, - "subst:pl:acc:n2": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "neut"}, - "subst:pl:acc:p1": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": 1}, - "subst:pl:acc:p2": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": 2}, - "subst:pl:acc:p3": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": 3}, - "subst:pl:dat:f": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "fem"}, - "subst:pl:dat:m1": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "Masc", "Animacy": "hum"}, - "subst:pl:dat:m2": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "masc", "Animacy": "anim"}, - "subst:pl:dat:m3": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "masc", "Animacy": "inan"}, - "subst:pl:dat:n1": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "neut"}, - "subst:pl:dat:n2": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "neut"}, - "subst:pl:dat:p1": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": 1}, - "subst:pl:dat:p2": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": 2}, - "subst:pl:dat:p3": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": 3}, - "subst:pl:gen:f": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "fem"}, - "subst:pl:gen:m1": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "Masc", "Animacy": "hum"}, - "subst:pl:gen:m2": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "masc", "Animacy": "anim"}, - "subst:pl:gen:m3": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "masc", "Animacy": "inan"}, - "subst:pl:gen:n1": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "neut"}, - "subst:pl:gen:n2": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "neut"}, - "subst:pl:gen:p1": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": 1}, - "subst:pl:gen:p2": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": 2}, - "subst:pl:gen:p3": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": 3}, - "subst:pl:inst:f": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "fem"}, - "subst:pl:inst:m1": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "Masc", "Animacy": "hum"}, - "subst:pl:inst:m2": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "masc", "Animacy": "anim"}, - "subst:pl:inst:m3": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "masc", "Animacy": "inan"}, - "subst:pl:inst:n1": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "neut"}, - "subst:pl:inst:n2": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "neut"}, - "subst:pl:inst:p1": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": 1}, - "subst:pl:inst:p2": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": 2}, - "subst:pl:inst:p3": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": 3}, - "subst:pl:loc:f": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "fem"}, - "subst:pl:loc:m1": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "Masc", "Animacy": "hum"}, - "subst:pl:loc:m2": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "masc", "Animacy": "anim"}, - "subst:pl:loc:m3": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "masc", "Animacy": "inan"}, - "subst:pl:loc:n1": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "neut"}, - "subst:pl:loc:n2": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "neut"}, - "subst:pl:loc:p1": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": 1}, - "subst:pl:loc:p2": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": 2}, - "subst:pl:loc:p3": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": 3}, - "subst:pl:nom:f": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "fem"}, - "subst:pl:nom:m1": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "Masc", "Animacy": "hum"}, - "subst:pl:nom:m2": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "masc", "Animacy": "anim"}, - "subst:pl:nom:m3": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "masc", "Animacy": "inan"}, - "subst:pl:nom:n1": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "neut"}, - "subst:pl:nom:n2": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "neut"}, - "subst:pl:nom:p1": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": 1}, - "subst:pl:nom:p2": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": 2}, - "subst:pl:nom:p3": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": 3}, - "subst:pl:voc:f": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "fem"}, - "subst:pl:voc:m1": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "Masc", "Animacy": "hum"}, - "subst:pl:voc:m2": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "masc", "Animacy": "anim"}, - "subst:pl:voc:m3": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "masc", "Animacy": "inan"}, - "subst:pl:voc:n1": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "neut"}, - "subst:pl:voc:n2": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "neut"}, - "subst:pl:voc:p1": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": 1}, - "subst:pl:voc:p2": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": 2}, - "subst:pl:voc:p3": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": 3}, - "subst:sg:acc:f": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "fem"}, - "subst:sg:acc:m1": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum"}, - "subst:sg:acc:m2": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "anim"}, - "subst:sg:acc:m3": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan"}, - "subst:sg:acc:n1": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "neut"}, - "subst:sg:acc:n2": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "neut"}, - "subst:sg:dat:f": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "fem"}, - "subst:sg:dat:m1": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "Masc", "Animacy": "hum"}, - "subst:sg:dat:m2": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "masc", "Animacy": "anim"}, - "subst:sg:dat:m3": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "masc", "Animacy": "inan"}, - "subst:sg:dat:n1": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "neut"}, - "subst:sg:dat:n2": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "neut"}, - "subst:sg:gen:f": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "fem"}, - "subst:sg:gen:m1": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "Masc", "Animacy": "hum"}, - "subst:sg:gen:m2": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "masc", "Animacy": "anim"}, - "subst:sg:gen:m3": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "masc", "Animacy": "inan"}, - "subst:sg:gen:n1": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "neut"}, - "subst:sg:gen:n2": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "neut"}, - "subst:sg:inst:f": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "fem"}, - "subst:sg:inst:m1": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "Masc", "Animacy": "hum"}, - "subst:sg:inst:m2": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "masc", "Animacy": "anim"}, - "subst:sg:inst:m3": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "masc", "Animacy": "inan"}, - "subst:sg:inst:n1": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "neut"}, - "subst:sg:inst:n2": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "neut"}, - "subst:sg:loc:f": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "fem"}, - "subst:sg:loc:m1": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "Masc", "Animacy": "hum"}, - "subst:sg:loc:m2": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "masc", "Animacy": "anim"}, - "subst:sg:loc:m3": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "masc", "Animacy": "inan"}, - "subst:sg:loc:n1": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "neut"}, - "subst:sg:loc:n2": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "neut"}, - "subst:sg:nom:f": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "fem"}, - "subst:sg:nom:m1": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "Masc", "Animacy": "hum"}, - "subst:sg:nom:m2": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "masc", "Animacy": "anim"}, - "subst:sg:nom:m3": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "masc", "Animacy": "inan"}, - "subst:sg:nom:n1": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "neut"}, - "subst:sg:nom:n2": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "neut"}, - "subst:sg:voc:f": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "fem"}, - "subst:sg:voc:m1": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "Masc", "Animacy": "hum"}, - "subst:sg:voc:m2": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "masc", "Animacy": "anim"}, - "subst:sg:voc:m3": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "masc", "Animacy": "inan"}, - "subst:sg:voc:n1": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "neut"}, - "subst:sg:voc:n2": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "neut"}, - "winien:pl:m1.p1:imperf": {POS: ADJ, "Number": "plur", "Gender": "masc", "Aspect": "imp"}, - "winien:pl:m2.m3.f.n1.n2.p2.p3:imperf": {POS: ADJ, "Number": "plur", "Gender": "masc|fem|neut", "Aspect": "imp"}, - "winien:sg:f:imperf": {POS: ADJ, "Number": "sing", "Gender": "fem", "Aspect": "imp"}, - "winien:sg:m1.m2.m3:imperf": {POS: ADJ, "Number": "sing", "Gender": "Masc", "Aspect": "imp"}, - "winien:sg:n1.n2:imperf": {POS: ADJ, "Number": "sing", "Gender": "neut", "Aspect": "imp"}, - # UD - "ADJ__Animacy=Hum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Hum|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Dat|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Dat|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Nhum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Nhum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Nhum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Nhum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Nhum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Nhum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Nhum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Nhum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Nhum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Nhum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Animacy=Nhum|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Nhum|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Nhum|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Nhum|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Nhum|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Nhum|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Nhum|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur"}, - "ADJ__Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing"}, - "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Acc|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Ins|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Loc|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"}, - "ADJ__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Imp|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Aspect=Imp|Gender=Fem|Number=Plur"}, - "ADJ__Aspect=Imp|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Aspect=Imp|Gender=Fem|Number=Sing"}, - "ADJ__Aspect=Imp|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Aspect=Imp|Gender=Neut|Number=Plur"}, - "ADJ__Aspect=Imp|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Aspect=Imp|Gender=Neut|Number=Sing"}, - "ADJ__Aspect=Perf|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Acc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Acc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Dat|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Dat|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Gen|Gender=Fem|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Fem|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Ins|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Ins|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Loc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Loc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Nom|Gender=Fem|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Fem|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos"}, - "ADJ__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"}, - "ADJ__Case=Acc|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Plur"}, - "ADJ__Case=Acc|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Sing"}, - "ADJ__Case=Acc|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|Number=Plur"}, - "ADJ__Case=Acc|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|Number=Sing"}, - "ADJ__Case=Acc|Degree=Sup|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Fem|Number=Plur"}, - "ADJ__Case=Acc|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Fem|Number=Sing"}, - "ADJ__Case=Acc|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Neut|Number=Plur"}, - "ADJ__Case=Acc|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Neut|Number=Sing"}, - "ADJ__Case=Acc|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Gender=Fem|Number=Plur"}, - "ADJ__Case=Acc|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Gender=Fem|Number=Sing"}, - "ADJ__Case=Acc|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Gender=Neut|Number=Plur"}, - "ADJ__Case=Acc|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Gender=Neut|Number=Sing"}, - "ADJ__Case=Dat|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|Number=Plur"}, - "ADJ__Case=Dat|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|Number=Sing"}, - "ADJ__Case=Dat|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|Number=Plur"}, - "ADJ__Case=Dat|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|Number=Sing"}, - "ADJ__Case=Dat|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Dat|Degree=Sup|Gender=Neut|Number=Plur"}, - "ADJ__Case=Gen|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|Number=Plur"}, - "ADJ__Case=Gen|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|Number=Sing"}, - "ADJ__Case=Gen|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|Number=Plur"}, - "ADJ__Case=Gen|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|Number=Sing"}, - "ADJ__Case=Gen|Degree=Sup|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Fem|Number=Plur"}, - "ADJ__Case=Gen|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Fem|Number=Sing"}, - "ADJ__Case=Gen|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Neut|Number=Plur"}, - "ADJ__Case=Gen|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Neut|Number=Sing"}, - "ADJ__Case=Gen|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Gender=Fem|Number=Plur"}, - "ADJ__Case=Gen|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Gender=Fem|Number=Sing"}, - "ADJ__Case=Gen|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Gender=Neut|Number=Plur"}, - "ADJ__Case=Gen|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Gender=Neut|Number=Sing"}, - "ADJ__Case=Ins|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|Number=Plur"}, - "ADJ__Case=Ins|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|Number=Sing"}, - "ADJ__Case=Ins|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|Number=Plur"}, - "ADJ__Case=Ins|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|Number=Sing"}, - "ADJ__Case=Ins|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Fem|Number=Sing"}, - "ADJ__Case=Ins|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Neut|Number=Plur"}, - "ADJ__Case=Ins|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Neut|Number=Sing"}, - "ADJ__Case=Ins|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Ins|Gender=Fem|Number=Plur"}, - "ADJ__Case=Ins|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Gender=Fem|Number=Sing"}, - "ADJ__Case=Ins|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Gender=Neut|Number=Sing"}, - "ADJ__Case=Loc|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|Number=Plur"}, - "ADJ__Case=Loc|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|Number=Sing"}, - "ADJ__Case=Loc|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|Number=Plur"}, - "ADJ__Case=Loc|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|Number=Sing"}, - "ADJ__Case=Loc|Degree=Sup|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Fem|Number=Plur"}, - "ADJ__Case=Loc|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Neut|Number=Plur"}, - "ADJ__Case=Loc|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Neut|Number=Sing"}, - "ADJ__Case=Loc|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Gender=Fem|Number=Plur"}, - "ADJ__Case=Loc|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Gender=Fem|Number=Sing"}, - "ADJ__Case=Loc|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Gender=Neut|Number=Plur"}, - "ADJ__Case=Loc|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Gender=Neut|Number=Sing"}, - "ADJ__Case=Nom|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|Number=Plur"}, - "ADJ__Case=Nom|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|Number=Sing"}, - "ADJ__Case=Nom|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|Number=Plur"}, - "ADJ__Case=Nom|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|Number=Sing"}, - "ADJ__Case=Nom|Degree=Sup|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Fem|Number=Plur"}, - "ADJ__Case=Nom|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Fem|Number=Sing"}, - "ADJ__Case=Nom|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Neut|Number=Plur"}, - "ADJ__Case=Nom|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Neut|Number=Sing"}, - "ADJ__Case=Nom|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Gender=Fem|Number=Plur"}, - "ADJ__Case=Nom|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Gender=Fem|Number=Sing"}, - "ADJ__Case=Nom|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Gender=Neut|Number=Plur"}, - "ADJ__Case=Nom|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Gender=Neut|Number=Sing"}, - "ADJ__Hyph=Yes": {POS: ADJ, "morph": "Hyph=Yes"}, - "ADJ__PrepCase=Pre": {POS: ADJ, "morph": "PrepCase=Pre"}, - "ADP__AdpType=Prep|Case=Acc": {POS: ADP, "morph": "AdpType=Prep|Case=Acc"}, - "ADP__AdpType=Prep|Case=Acc|Variant=Long": {POS: ADP, "morph": "AdpType=Prep|Case=Acc|Variant=Long"}, - "ADP__AdpType=Prep|Case=Acc|Variant=Short": {POS: ADP, "morph": "AdpType=Prep|Case=Acc|Variant=Short"}, - "ADP__AdpType=Prep|Case=Dat": {POS: ADP, "morph": "AdpType=Prep|Case=Dat"}, - "ADP__AdpType=Prep|Case=Gen": {POS: ADP, "morph": "AdpType=Prep|Case=Gen"}, - "ADP__AdpType=Prep|Case=Gen|Variant=Long": {POS: ADP, "morph": "AdpType=Prep|Case=Gen|Variant=Long"}, - "ADP__AdpType=Prep|Case=Gen|Variant=Short": {POS: ADP, "morph": "AdpType=Prep|Case=Gen|Variant=Short"}, - "ADP__AdpType=Prep|Case=Ins": {POS: ADP, "morph": "AdpType=Prep|Case=Ins"}, - "ADP__AdpType=Prep|Case=Ins|Variant=Long": {POS: ADP, "morph": "AdpType=Prep|Case=Ins|Variant=Long"}, - "ADP__AdpType=Prep|Case=Ins|Variant=Short": {POS: ADP, "morph": "AdpType=Prep|Case=Ins|Variant=Short"}, - "ADP__AdpType=Prep|Case=Loc": {POS: ADP, "morph": "AdpType=Prep|Case=Loc"}, - "ADP__AdpType=Prep|Case=Loc|Variant=Long": {POS: ADP, "morph": "AdpType=Prep|Case=Loc|Variant=Long"}, - "ADP__AdpType=Prep|Case=Loc|Variant=Short": {POS: ADP, "morph": "AdpType=Prep|Case=Loc|Variant=Short"}, - "ADP__AdpType=Prep|Case=Nom": {POS: ADP, "morph": "AdpType=Prep|Case=Nom"}, - "ADV___": {POS: ADV}, - "ADV__Degree=Pos": {POS: ADV, "morph": "Degree=Pos"}, - "ADV__Degree=Sup": {POS: ADV, "morph": "Degree=Sup"}, - "AUX___": {POS: AUX}, - "AUX__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Aspect=Imp|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Imp|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Aspect=Imp|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Imp|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Aspect=Imp|Mood=Cnd|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Cnd|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|Variant=Short|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|Variant=Short|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|Variant=Short|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|Variant=Short|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|Variant=Long|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|Variant=Long|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|Variant=Short|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|Variant=Short|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|Variant=Long|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|Variant=Long|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|Variant=Short|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|Variant=Short|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin"}, - "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Imp|VerbForm=Inf": {POS: AUX, "morph": "Aspect=Imp|VerbForm=Inf"}, - "AUX__Aspect=Perf|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Perf|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Aspect=Perf|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Perf|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "AUX__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin"}, - "AUX__Aspect=Perf|VerbForm=Inf": {POS: AUX, "morph": "Aspect=Perf|VerbForm=Inf"}, - "CCONJ___": {POS: CCONJ}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Ind"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg"}, - "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Ind"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Neg"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Neg"}, - "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Ind"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Neg"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg"}, - "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Neg"}, - "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Ind"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Ind"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Ind"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg"}, - "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Nhum|Case=Dat|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Nhum|Case=Dat|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|PronType=Tot"}, - "DET__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem"}, - "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel"}, - "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot"}, - "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem"}, - "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind"}, - "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Acc|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Acc|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Acc|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Acc|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Acc|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|PronType=Dem"}, - "DET__Case=Acc|Gender=Fem|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|PronType=Ind"}, - "DET__Case=Acc|Gender=Fem|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|PronType=Int,Rel"}, - "DET__Case=Acc|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|PronType=Tot"}, - "DET__Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Acc|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Acc|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|PronType=Dem"}, - "DET__Case=Acc|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|PronType=Ind"}, - "DET__Case=Acc|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Acc|Gender=Fem|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|PronType=Tot"}, - "DET__Case=Acc|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Acc|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Acc|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Dem"}, - "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Ind"}, - "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Int,Rel"}, - "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Neg"}, - "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Tot"}, - "DET__Case=Acc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Acc|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Acc|Gender=Neut|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Dem"}, - "DET__Case=Acc|Gender=Neut|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Ind"}, - "DET__Case=Acc|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Acc|Gender=Neut|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Tot"}, - "DET__Case=Dat|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Dat|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Dat|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Plur|PronType=Dem"}, - "DET__Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Dat|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Dat|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|PronType=Dem"}, - "DET__Case=Dat|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Dat|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Dat|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Dat|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Dat|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Dat|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Dat|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Gen|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Gen|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Gen|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Dem"}, - "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Ind"}, - "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Int,Rel"}, - "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Neg"}, - "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Tot"}, - "DET__Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Gen|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Dem"}, - "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Ind"}, - "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Neg"}, - "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Tot"}, - "DET__Case=Gen|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Gen|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Gen|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Dem"}, - "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Ind"}, - "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Int,Rel"}, - "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Neg"}, - "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Tot"}, - "DET__Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Gen|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Dem"}, - "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Ind"}, - "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Neg"}, - "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Tot"}, - "DET__Case=Ins|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Ins|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Plur|PronType=Dem"}, - "DET__Case=Ins|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Plur|PronType=Tot"}, - "DET__Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Ins|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Ins|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|PronType=Dem"}, - "DET__Case=Ins|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|PronType=Ind"}, - "DET__Case=Ins|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Ins|Gender=Fem|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|PronType=Neg"}, - "DET__Case=Ins|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Ins|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Ins|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Plur|PronType=Dem"}, - "DET__Case=Ins|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Plur|PronType=Int,Rel"}, - "DET__Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Ins|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Ins|Gender=Neut|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Ind"}, - "DET__Case=Ins|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Loc|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Loc|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Loc|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Loc|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|PronType=Dem"}, - "DET__Case=Loc|Gender=Fem|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|PronType=Int,Rel"}, - "DET__Case=Loc|Gender=Fem|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|PronType=Neg"}, - "DET__Case=Loc|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|PronType=Tot"}, - "DET__Case=Loc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Loc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Loc|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Loc|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|PronType=Dem"}, - "DET__Case=Loc|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|PronType=Ind"}, - "DET__Case=Loc|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Loc|Gender=Fem|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|PronType=Tot"}, - "DET__Case=Loc|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Loc|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|PronType=Dem"}, - "DET__Case=Loc|Gender=Neut|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|PronType=Ind"}, - "DET__Case=Loc|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|PronType=Int,Rel"}, - "DET__Case=Loc|Gender=Neut|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|PronType=Tot"}, - "DET__Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Loc|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"}, - "DET__Case=Loc|Gender=Neut|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Dem"}, - "DET__Case=Loc|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Loc|Gender=Neut|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Tot"}, - "DET__Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Nom|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|PronType=Dem"}, - "DET__Case=Nom|Gender=Fem|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|PronType=Ind"}, - "DET__Case=Nom|Gender=Fem|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|PronType=Int,Rel"}, - "DET__Case=Nom|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|PronType=Tot"}, - "DET__Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Dem"}, - "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Ind"}, - "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Neg"}, - "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Tot"}, - "DET__Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind"}, - "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Dem"}, - "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Ind"}, - "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Int,Rel"}, - "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Neg"}, - "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Tot"}, - "DET__Case=Nom|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"}, - "DET__Case=Nom|Gender=Neut|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Dem"}, - "DET__Case=Nom|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "DET__Case=Nom|Gender=Neut|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Neg"}, - "DET__Case=Nom|Gender=Neut|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Tot"}, - "NOUN__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Hum|Case=Voc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Nhum|Case=Dat|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Dat|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Nhum|Case=Dat|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Dat|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur"}, - "NOUN__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing"}, - "NOUN__Animacy=Nhum|Case=Voc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Voc|Gender=Masc|Number=Plur"}, - "NOUN__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"}, - "NOUN__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"}, - "NOUN__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"}, - "NOUN__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"}, - "NOUN__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"}, - "NOUN__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"}, - "NOUN__Case=Acc|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|Number=Plur"}, - "NOUN__Case=Acc|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|Number=Sing"}, - "NOUN__Case=Acc|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|Number=Plur"}, - "NOUN__Case=Acc|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|Number=Sing"}, - "NOUN__Case=Dat|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|Number=Plur"}, - "NOUN__Case=Dat|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|Number=Sing"}, - "NOUN__Case=Dat|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|Number=Plur"}, - "NOUN__Case=Dat|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|Number=Sing"}, - "NOUN__Case=Gen|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|Number=Plur"}, - "NOUN__Case=Gen|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|Number=Sing"}, - "NOUN__Case=Gen|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|Number=Plur"}, - "NOUN__Case=Gen|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|Number=Sing"}, - "NOUN__Case=Ins|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|Number=Plur"}, - "NOUN__Case=Ins|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|Number=Sing"}, - "NOUN__Case=Ins|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|Number=Plur"}, - "NOUN__Case=Ins|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|Number=Sing"}, - "NOUN__Case=Loc|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|Number=Plur"}, - "NOUN__Case=Loc|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|Number=Sing"}, - "NOUN__Case=Loc|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|Number=Plur"}, - "NOUN__Case=Loc|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|Number=Sing"}, - "NOUN__Case=Nom|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|Number=Plur"}, - "NOUN__Case=Nom|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|Number=Sing"}, - "NOUN__Case=Nom|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|Number=Plur"}, - "NOUN__Case=Nom|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|Number=Sing"}, - "NOUN__Case=Voc|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|Number=Sing"}, - "NOUN__Case=Voc|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|Number=Plur"}, - "NOUN__Case=Voc|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|Number=Sing"}, - "NUM__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing"}, - "NUM__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing"}, - "NUM__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur"}, - "NUM__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur"}, - "NUM__Case=Acc|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Acc|Gender=Fem|Number=Plur"}, - "NUM__Case=Acc|Gender=Fem|Number=Sing": {POS: NUM, "morph": "Case=Acc|Gender=Fem|Number=Sing"}, - "NUM__Case=Acc|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Acc|Gender=Neut|Number=Plur"}, - "NUM__Case=Dat|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Dat|Gender=Fem|Number=Plur"}, - "NUM__Case=Dat|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Dat|Gender=Neut|Number=Plur"}, - "NUM__Case=Gen|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Gen|Gender=Fem|Number=Plur"}, - "NUM__Case=Gen|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Gen|Gender=Neut|Number=Plur"}, - "NUM__Case=Ins|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Ins|Gender=Fem|Number=Plur"}, - "NUM__Case=Ins|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Ins|Gender=Neut|Number=Plur"}, - "NUM__Case=Loc|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Loc|Gender=Fem|Number=Plur"}, - "NUM__Case=Loc|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Loc|Gender=Neut|Number=Plur"}, - "NUM__Case=Nom|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Nom|Gender=Fem|Number=Plur"}, - "NUM__Case=Nom|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Nom|Gender=Neut|Number=Plur"}, - "NUM__Case=Nom|Number=Plur": {POS: NUM, "morph": "Case=Nom|Number=Plur"}, - "PART___": {POS: PART}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=1|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=2|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Neg"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=1|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=2|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Tot": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Tot"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Ind"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Neg"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=1|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=2|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg"}, - "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=1|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=2|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=1|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=2|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "PRON__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=1|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=2|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Ind"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=1|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=2|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=1|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=2|PronType=Prs"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel"}, - "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg"}, - "PRON__Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing|Person=2|PronType=Prs"}, - "PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Nhum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Nhum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Short"}, - "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Dem"}, - "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Ind"}, - "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Neg"}, - "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Tot"}, - "PRON__Case=Acc|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Acc|PronType=Prs|Reflex=Yes"}, - "PRON__Case=Dat|Gender=Fem|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Plur|Person=1|PronType=Prs"}, - "PRON__Case=Dat|Gender=Fem|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Plur|Person=2|PronType=Prs"}, - "PRON__Case=Dat|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Dat|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Dat|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long"}, - "PRON__Case=Dat|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Short"}, - "PRON__Case=Dat|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Short"}, - "PRON__Case=Dat|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Dat|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Dat|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Dat|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Case=Dat|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Dat|Gender=Neut|Number=Sing|PronType=Dem"}, - "PRON__Case=Dat|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Dat|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "PRON__Case=Dat|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Dat|PronType=Prs|Reflex=Yes"}, - "PRON__Case=Gen|Gender=Fem|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Plur|Person=1|PronType=Prs"}, - "PRON__Case=Gen|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"}, - "PRON__Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Dem"}, - "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Ind"}, - "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Neg"}, - "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Tot"}, - "PRON__Case=Gen|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Gen|PronType=Prs|Reflex=Yes"}, - "PRON__Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short"}, - "PRON__Case=Ins|Gender=Fem|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Sing|Person=1|PronType=Prs"}, - "PRON__Case=Ins|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Ins|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Ins|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Ins|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Dem"}, - "PRON__Case=Ins|Gender=Neut|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Ind"}, - "PRON__Case=Ins|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "PRON__Case=Ins|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Tot"}, - "PRON__Case=Ins|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Ins|PronType=Prs|Reflex=Yes"}, - "PRON__Case=Loc|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Loc|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Loc|Gender=Fem|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Loc|Gender=Fem|Number=Sing|Person=1|PronType=Prs"}, - "PRON__Case=Loc|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Loc|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Loc|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Loc|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"}, - "PRON__Case=Loc|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Dem"}, - "PRON__Case=Loc|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "PRON__Case=Loc|Gender=Neut|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Neg"}, - "PRON__Case=Loc|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Tot"}, - "PRON__Case=Loc|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Loc|PronType=Prs|Reflex=Yes"}, - "PRON__Case=Nom|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Nom|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Nom|Gender=Fem|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Nom|Gender=Fem|Number=Sing|Person=1|PronType=Prs"}, - "PRON__Case=Nom|Gender=Fem|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Case=Nom|Gender=Fem|Number=Sing|Person=2|PronType=Prs"}, - "PRON__Case=Nom|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Nom|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Nom|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Nom|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"}, - "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Dem"}, - "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Ind"}, - "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Int,Rel"}, - "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Neg"}, - "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Tot"}, - "PRON__PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "PronType=Prs|Reflex=Yes"}, - "PRON__PronType=Prs|Reflex=Yes|Typo=Yes": {POS: PRON, "morph": "PronType=Prs|Reflex=Yes|Typo=Yes"}, - "PROPN__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Hum|Case=Voc|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing"}, - "PROPN__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur"}, - "PROPN__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing"}, - "PROPN__Case=Acc|Gender=Fem|Number=Plur": {POS: PROPN, "morph": "Case=Acc|Gender=Fem|Number=Plur"}, - "PROPN__Case=Acc|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Acc|Gender=Fem|Number=Sing"}, - "PROPN__Case=Acc|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Acc|Gender=Neut|Number=Plur"}, - "PROPN__Case=Acc|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Acc|Gender=Neut|Number=Sing"}, - "PROPN__Case=Dat|Gender=Fem|Number=Plur": {POS: PROPN, "morph": "Case=Dat|Gender=Fem|Number=Plur"}, - "PROPN__Case=Dat|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Dat|Gender=Fem|Number=Sing"}, - "PROPN__Case=Dat|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Dat|Gender=Neut|Number=Sing"}, - "PROPN__Case=Gen|Gender=Fem|Number=Plur": {POS: PROPN, "morph": "Case=Gen|Gender=Fem|Number=Plur"}, - "PROPN__Case=Gen|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Gen|Gender=Fem|Number=Sing"}, - "PROPN__Case=Gen|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Gen|Gender=Neut|Number=Plur"}, - "PROPN__Case=Gen|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Gen|Gender=Neut|Number=Sing"}, - "PROPN__Case=Ins|Gender=Fem|Number=Plur": {POS: PROPN, "morph": "Case=Ins|Gender=Fem|Number=Plur"}, - "PROPN__Case=Ins|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Ins|Gender=Fem|Number=Sing"}, - "PROPN__Case=Ins|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Ins|Gender=Neut|Number=Plur"}, - "PROPN__Case=Ins|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Ins|Gender=Neut|Number=Sing"}, - "PROPN__Case=Loc|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Loc|Gender=Fem|Number=Sing"}, - "PROPN__Case=Loc|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Loc|Gender=Neut|Number=Plur"}, - "PROPN__Case=Loc|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Loc|Gender=Neut|Number=Sing"}, - "PROPN__Case=Nom|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Nom|Gender=Fem|Number=Sing"}, - "PROPN__Case=Nom|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Nom|Gender=Neut|Number=Plur"}, - "PROPN__Case=Nom|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Nom|Gender=Neut|Number=Sing"}, - "PROPN__Case=Voc|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Voc|Gender=Fem|Number=Sing"}, - "PROPN__Case=Voc|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Voc|Gender=Neut|Number=Plur"}, - "PUNCT___": {POS: PUNCT}, - "SCONJ___": {POS: SCONJ}, - "VERB___": {POS: VERB}, - "VERB__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Aspect=Imp|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Aspect=Imp|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Aspect=Imp|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin"}, - "VERB__Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin"}, - "VERB__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin"}, - "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Imp|Tense=Pres|VerbForm=Conv": {POS: VERB, "morph": "Aspect=Imp|Tense=Pres|VerbForm=Conv"}, - "VERB__Aspect=Imp|VerbForm=Inf": {POS: VERB, "morph": "Aspect=Imp|VerbForm=Inf"}, - "VERB__Aspect=Perf|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Aspect=Perf|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"}, - "VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin"}, - "VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin"}, - "VERB__Aspect=Perf|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin"}, - "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin"}, - "VERB__Aspect=Perf|Tense=Past|VerbForm=Conv": {POS: VERB, "morph": "Aspect=Perf|Tense=Past|VerbForm=Conv"}, - "VERB__Aspect=Perf|VerbForm=Inf": {POS: VERB, "morph": "Aspect=Perf|VerbForm=Inf"}, - "X___": {POS: X}, - "X__Abbr=Yes": {POS: X, "morph": "Abbr=Yes"} -} diff --git a/spacy/lang/pl/tokenizer_exceptions.py b/spacy/lang/pl/tokenizer_exceptions.py index 269634671..aa3f55d22 100644 --- a/spacy/lang/pl/tokenizer_exceptions.py +++ b/spacy/lang/pl/tokenizer_exceptions.py @@ -1,7 +1,7 @@ # encoding: utf8 from __future__ import unicode_literals -from ...symbols import ORTH, LEMMA, POS, ADV, ADJ, NOUN +from ...symbols import ORTH, LEMMA, POS, ADV, ADJ, NOUN, ADP _exc = {} @@ -12,11 +12,24 @@ for exc_data in [ {ORTH: "mgr.", LEMMA: "magister", POS: NOUN}, {ORTH: "tzn.", LEMMA: "to znaczy", POS: ADV}, {ORTH: "tj.", LEMMA: "to jest", POS: ADV}, - {ORTH: "tzw.", LEMMA: "tak zwany", POS: ADJ}]: + {ORTH: "tzw.", LEMMA: "tak zwany", POS: ADJ}, + {ORTH: "adw.", LEMMA: "adwokat", POS: NOUN}, + {ORTH: "afr.", LEMMA: "afrykański", POS: ADJ}, + {ORTH: "c.b.d.o.", LEMMA: "co było do okazania", POS: ADV}, + {ORTH: "cbdu.", LEMMA: "co było do udowodnienia", POS: ADV}, + {ORTH: "mn.w.", LEMMA: "mniej więcej", POS: ADV}, + {ORTH: "nt.", LEMMA: "na temat", POS: ADP}, + {ORTH: "ok.", LEMMA: "około"}, + {ORTH: "n.p.u.", LEMMA: "na psa urok"}, + {ORTH: "ww.", LEMMA: "wyżej wymieniony", POS: ADV}]: _exc[exc_data[ORTH]] = [exc_data] for orth in [ - "w.", "r."]: + "w.", "r.", "br.", "bm.", "b.r.", "amer.", "am.", "bdb.", "św.", "p.", "lit.", + "wym.", "czyt.", "daw.", "d.", "zob.", "gw.", "dn.", "dyr.", "im.", "mł.", + "min.", "dot.", "muz.", "k.k.", "k.p.a.", "k.p.c.", "n.p.m.", "p.p.m.", "nb.", + "ob.", "n.e.", "p.n.e.", "zw.", "zool.", "zach.", "żarg.", "żart.", "wzgl.", + "wyj.", "xx.", "ks.", "x.", "wyd.", "wsch.", "o.o."]: _exc[orth] = [{ORTH: orth}] diff --git a/spacy/lang/tag_map.py b/spacy/lang/tag_map.py index f7c42a434..f2bde76bb 100644 --- a/spacy/lang/tag_map.py +++ b/spacy/lang/tag_map.py @@ -24,5 +24,5 @@ TAG_MAP = { "ADJ": {POS: ADJ}, "VERB": {POS: VERB}, "PART": {POS: PART}, - "_SP": {POS: SPACE} + "SP": {POS: SPACE} } diff --git a/spacy/lang/vi/__init__.py b/spacy/lang/vi/__init__.py deleted file mode 100644 index 0055f6faf..000000000 --- a/spacy/lang/vi/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# coding: utf8 -from __future__ import unicode_literals - -from ...attrs import LANG -from ...language import Language -from ...tokens import Doc - - -class VietnameseDefaults(Language.Defaults): - lex_attr_getters = dict(Language.Defaults.lex_attr_getters) - lex_attr_getters[LANG] = lambda text: 'vi' # for pickling - - -class Vietnamese(Language): - lang = 'vi' - Defaults = VietnameseDefaults # override defaults - - -__all__ = ['Vietnamese'] diff --git a/spacy/lang/zh/__init__.py b/spacy/lang/zh/__init__.py index bdf739fd7..a2a2dcacd 100644 --- a/spacy/lang/zh/__init__.py +++ b/spacy/lang/zh/__init__.py @@ -9,7 +9,6 @@ from ...tokens import Doc class ChineseDefaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters[LANG] = lambda text: 'zh' # for pickling - use_jieba = True class Chinese(Language): @@ -17,25 +16,14 @@ class Chinese(Language): Defaults = ChineseDefaults # override defaults def make_doc(self, text): - if self.Defaults.use_jieba: - try: - import jieba - except ImportError: - msg = ("Jieba not installed. Either set Chinese.use_jieba = False, " - "or install it https://github.com/fxsjy/jieba") - raise ImportError(msg) - words = list(jieba.cut(text, cut_all=False)) - words = [x for x in words if x] - return Doc(self.vocab, words=words, spaces=[False]*len(words)) - else: - words = [] - spaces = [] - doc = self.tokenizer(text) - for token in self.tokenizer(text): - words.extend(list(token.text)) - spaces.extend([False]*len(token.text)) - spaces[-1] = bool(token.whitespace_) - return Doc(self.vocab, words=words, spaces=spaces) + try: + import jieba + except ImportError: + raise ImportError("The Chinese tokenizer requires the Jieba library: " + "https://github.com/fxsjy/jieba") + words = list(jieba.cut(text, cut_all=False)) + words = [x for x in words if x] + return Doc(self.vocab, words=words, spaces=[False]*len(words)) __all__ = ['Chinese'] diff --git a/spacy/language.py b/spacy/language.py index 4e74327a3..f04da7d30 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -17,7 +17,7 @@ from .vocab import Vocab from .lemmatizer import Lemmatizer from .pipeline import DependencyParser, Tensorizer, Tagger, EntityRecognizer from .pipeline import SimilarityHook, TextCategorizer, SentenceSegmenter -from .pipeline import merge_noun_chunks, merge_entities, merge_subtokens +from .pipeline import merge_noun_chunks, merge_entities from .compat import json_dumps, izip, basestring_ from .gold import GoldParse from .scorer import Scorer @@ -108,8 +108,7 @@ class Language(object): 'sbd': lambda nlp, **cfg: SentenceSegmenter(nlp.vocab, **cfg), 'sentencizer': lambda nlp, **cfg: SentenceSegmenter(nlp.vocab, **cfg), 'merge_noun_chunks': lambda nlp, **cfg: merge_noun_chunks, - 'merge_entities': lambda nlp, **cfg: merge_entities, - 'merge_subtokens': lambda nlp, **cfg: merge_subtokens, + 'merge_entities': lambda nlp, **cfg: merge_entities } def __init__(self, vocab=True, make_doc=True, meta={}, **kwargs): diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index b4323e424..e51795684 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -1,7 +1,7 @@ # coding: utf8 from __future__ import unicode_literals -from .symbols import POS, NOUN, VERB, ADJ, PUNCT, PROPN +from .symbols import POS, NOUN, VERB, ADJ, PUNCT from .symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos @@ -27,13 +27,11 @@ class Lemmatizer(object): univ_pos = 'adj' elif univ_pos in (PUNCT, 'PUNCT', 'punct'): univ_pos = 'punct' - elif univ_pos in (PROPN, 'PROPN'): - return [string] else: - return [string.lower()] + return list(set([string.lower()])) # See Issue #435 for example of where this logic is requied. if self.is_base_form(univ_pos, morphology): - return [string.lower()] + return list(set([string.lower()])) lemmas = lemmatize(string, self.index.get(univ_pos, {}), self.exc.get(univ_pos, {}), self.rules.get(univ_pos, [])) @@ -90,7 +88,6 @@ class Lemmatizer(object): def lemmatize(string, index, exceptions, rules): - orig = string string = string.lower() forms = [] forms.extend(exceptions.get(string, [])) @@ -108,5 +105,5 @@ def lemmatize(string, index, exceptions, rules): if not forms: forms.extend(oov_forms) if not forms: - forms.append(orig) + forms.append(string) return list(set(forms)) diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index 8c2d49b8a..ec87dce12 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -1,19 +1,24 @@ -# cython: infer_types=True # cython: profile=True +# cython: infer_types=True +# coding: utf8 from __future__ import unicode_literals -from libcpp.vector cimport vector -from libc.stdint cimport int32_t, uint64_t, uint16_t -from preshed.maps cimport PreshMap + +import ujson from cymem.cymem cimport Pool +from preshed.maps cimport PreshMap +from libcpp.vector cimport vector +from libcpp.pair cimport pair from murmurhash.mrmr cimport hash64 -from .typedefs cimport attr_t, hash_t +from libc.stdint cimport int32_t + +from .typedefs cimport attr_t +from .typedefs cimport hash_t from .structs cimport TokenC -from .lexeme cimport attr_id_t +from .tokens.doc cimport Doc, get_token_attr from .vocab cimport Vocab -from .tokens.doc cimport Doc -from .tokens.doc cimport get_token_attr -from .attrs cimport ID, attr_id_t, NULL_ATTR + from .attrs import IDS +from .attrs cimport attr_id_t, ID, NULL_ATTR from .attrs import FLAG61 as U_ENT from .attrs import FLAG60 as B2_ENT from .attrs import FLAG59 as B3_ENT @@ -43,24 +48,29 @@ from .attrs import FLAG36 as L9_ENT from .attrs import FLAG35 as L10_ENT -cdef enum action_t: - REJECT = 0000 - MATCH = 1000 - ADVANCE = 0100 - RETRY = 0010 - RETRY_EXTEND = 0011 - MATCH_EXTEND = 1001 - MATCH_REJECT = 2000 - - -cdef enum quantifier_t: +cpdef enum quantifier_t: + _META + ONE ZERO ZERO_ONE ZERO_PLUS - ONE - ONE_PLUS +cdef enum action_t: + REJECT + ADVANCE + REPEAT + ACCEPT + ADVANCE_ZERO + ACCEPT_PREV + PANIC + +# A "match expression" conists of one or more token patterns +# Each token pattern consists of a quantifier and 0+ (attr, value) pairs. +# A state is an (int, pattern pointer) pair, where the int is the start +# position, and the pattern pointer shows where we're up to +# in the pattern. + cdef struct AttrValueC: attr_id_t attr attr_t value @@ -70,231 +80,10 @@ cdef struct TokenPatternC: AttrValueC* attrs int32_t nr_attr quantifier_t quantifier - hash_t key -cdef struct ActionC: - char emit_match - char next_state_next_token - char next_state_same_token - char same_state_next_token - - -cdef struct PatternStateC: - TokenPatternC* pattern - int32_t start - int32_t length - - -cdef struct MatchC: - attr_t pattern_id - int32_t start - int32_t length - - -cdef find_matches(TokenPatternC** patterns, int n, Doc doc): - cdef vector[PatternStateC] states - cdef vector[MatchC] matches - cdef PatternStateC state - cdef Pool mem = Pool() - # TODO: Prefill this with the extra attribute values. - extra_attrs = mem.alloc(len(doc), sizeof(attr_t*)) - # Main loop - cdef int i, j - for i in range(doc.length): - for j in range(n): - states.push_back(PatternStateC(patterns[j], i, 0)) - transition_states(states, matches, &doc.c[i], extra_attrs[i]) - # Handle matches that end in 0-width patterns - finish_states(matches, states) - return [(matches[i].pattern_id, matches[i].start, matches[i].start+matches[i].length) - for i in range(matches.size())] - - - -cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& matches, - const TokenC* token, const attr_t* extra_attrs) except *: - cdef int q = 0 - cdef vector[PatternStateC] new_states - for i in range(states.size()): - action = get_action(states[i], token, extra_attrs) - if action == REJECT: - continue - state = states[i] - states[q] = state - while action in (RETRY, RETRY_EXTEND): - if action == RETRY_EXTEND: - new_states.push_back( - PatternStateC(pattern=state.pattern, start=state.start, - length=state.length+1)) - states[q].pattern += 1 - action = get_action(states[q], token, extra_attrs) - if action == REJECT: - pass - elif action == ADVANCE: - states[q].pattern += 1 - states[q].length += 1 - q += 1 - else: - ent_id = state.pattern[1].attrs.value - if action == MATCH: - matches.push_back( - MatchC(pattern_id=ent_id, start=state.start, - length=state.length+1)) - elif action == MATCH_REJECT: - matches.push_back( - MatchC(pattern_id=ent_id, start=state.start, - length=state.length)) - elif action == MATCH_EXTEND: - matches.push_back( - MatchC(pattern_id=ent_id, start=state.start, - length=state.length)) - states[q].length += 1 - q += 1 - states.resize(q) - for i in range(new_states.size()): - states.push_back(new_states[i]) - - -cdef void finish_states(vector[MatchC]& matches, vector[PatternStateC]& states) except *: - '''Handle states that end in zero-width patterns.''' - cdef PatternStateC state - for i in range(states.size()): - state = states[i] - while get_quantifier(state) in (ZERO_PLUS, ZERO_ONE): - is_final = get_is_final(state) - if is_final: - ent_id = state.pattern[1].attrs.value - matches.push_back( - MatchC(pattern_id=ent_id, start=state.start, length=state.length)) - break - else: - state.pattern += 1 - - -cdef action_t get_action(PatternStateC state, const TokenC* token, const attr_t* extra_attrs) nogil: - '''We need to consider: - - a) Does the token match the specification? [Yes, No] - b) What's the quantifier? [1, 0+, ?] - c) Is this the last specification? [final, non-final] - - We can transition in the following ways: - - a) Do we emit a match? - b) Do we add a state with (next state, next token)? - c) Do we add a state with (next state, same token)? - d) Do we add a state with (same state, next token)? - - We'll code the actions as boolean strings, so 0000 means no to all 4, - 1000 means match but no states added, etc. - - 1: - Yes, final: - 1000 - Yes, non-final: - 0100 - No, final: - 0000 - No, non-final - 0000 - 0+: - Yes, final: - 1001 - Yes, non-final: - 0011 - No, final: - 1000 (note: Don't include last token!) - No, non-final: - 0010 - ?: - Yes, final: - 1000 - Yes, non-final: - 0100 - No, final: - 1000 (note: Don't include last token!) - No, non-final: - 0010 - - Possible combinations: 1000, 0100, 0000, 1001, 0011, 0010, - - We'll name the bits "match", "advance", "retry", "extend" - REJECT = 0000 - MATCH = 1000 - ADVANCE = 0100 - RETRY = 0010 - MATCH_EXTEND = 1001 - RETRY_EXTEND = 0011 - MATCH_REJECT = 2000 # Match, but don't include last token - - Problem: If a quantifier is matching, we're adding a lot of open partials - ''' - cdef char is_match - is_match = get_is_match(state, token, extra_attrs) - quantifier = get_quantifier(state) - is_final = get_is_final(state) - if quantifier == ZERO: - is_match = not is_match - quantifier = ONE - if quantifier == ONE: - if is_match and is_final: - # Yes, final: 1000 - return MATCH - elif is_match and not is_final: - # Yes, non-final: 0100 - return ADVANCE - elif not is_match and is_final: - # No, final: 0000 - return REJECT - else: - return REJECT - elif quantifier == ZERO_PLUS: - if is_match and is_final: - # Yes, final: 1001 - return MATCH_EXTEND - elif is_match and not is_final: - # Yes, non-final: 0011 - return RETRY_EXTEND - elif not is_match and is_final: - # No, final 2000 (note: Don't include last token!) - return MATCH_REJECT - else: - # No, non-final 0010 - return RETRY - elif quantifier == ZERO_ONE: - if is_match and is_final: - # Yes, final: 1000 - return MATCH - elif is_match and not is_final: - # Yes, non-final: 0100 - return ADVANCE - elif not is_match and is_final: - # No, final 2000 (note: Don't include last token!) - return MATCH_REJECT - else: - # No, non-final 0010 - return RETRY - - -cdef char get_is_match(PatternStateC state, const TokenC* token, const attr_t* extra_attrs) nogil: - spec = state.pattern - for attr in spec.attrs[:spec.nr_attr]: - if get_token_attr(token, attr.attr) != attr.value: - return 0 - else: - return 1 - - -cdef char get_is_final(PatternStateC state) nogil: - if state.pattern[1].attrs[0].attr == ID and state.pattern[1].nr_attr == 0: - return 1 - else: - return 0 - - -cdef char get_quantifier(PatternStateC state) nogil: - return state.pattern.quantifier +ctypedef TokenPatternC* TokenPatternC_ptr +ctypedef pair[int, TokenPatternC_ptr] StateC cdef TokenPatternC* init_pattern(Pool mem, attr_t entity_id, @@ -308,7 +97,6 @@ cdef TokenPatternC* init_pattern(Pool mem, attr_t entity_id, for j, (attr, value) in enumerate(spec): pattern[i].attrs[j].attr = attr pattern[i].attrs[j].value = value - pattern[i].key = hash64(pattern[i].attrs, pattern[i].nr_attr * sizeof(AttrValueC), 0) i = len(token_specs) pattern[i].attrs = mem.alloc(2, sizeof(AttrValueC)) pattern[i].attrs[0].attr = ID @@ -317,16 +105,48 @@ cdef TokenPatternC* init_pattern(Pool mem, attr_t entity_id, return pattern -cdef attr_t get_pattern_key(const TokenPatternC* pattern) nogil: +cdef attr_t get_pattern_key(const TokenPatternC* pattern) except 0: while pattern.nr_attr != 0: pattern += 1 id_attr = pattern[0].attrs[0] + assert id_attr.attr == ID return id_attr.value + +cdef int get_action(const TokenPatternC* pattern, const TokenC* token) nogil: + lookahead = &pattern[1] + for attr in pattern.attrs[:pattern.nr_attr]: + if get_token_attr(token, attr.attr) != attr.value: + if pattern.quantifier == ONE: + return REJECT + elif pattern.quantifier == ZERO: + return ACCEPT if lookahead.nr_attr == 0 else ADVANCE + elif pattern.quantifier in (ZERO_ONE, ZERO_PLUS): + return ACCEPT_PREV if lookahead.nr_attr == 0 else ADVANCE_ZERO + else: + return PANIC + if pattern.quantifier == ZERO: + return REJECT + elif lookahead.nr_attr == 0: + return ACCEPT + elif pattern.quantifier in (ONE, ZERO_ONE): + return ADVANCE + elif pattern.quantifier == ZERO_PLUS: + # This is a bandaid over the 'shadowing' problem described here: + # https://github.com/explosion/spaCy/issues/864 + next_action = get_action(lookahead, token) + if next_action is REJECT: + return REPEAT + else: + return ADVANCE_ZERO + else: + return PANIC + + def _convert_strings(token_specs, string_store): # Support 'syntactic sugar' operator '+', as combination of ONE, ZERO_PLUS - operators = {'*': (ZERO_PLUS,), '+': (ONE, ZERO_PLUS), - '?': (ZERO_ONE,), '1': (ONE,), '!': (ZERO,)} + operators = {'!': (ZERO,), '*': (ZERO_PLUS,), '+': (ONE, ZERO_PLUS), + '?': (ZERO_ONE,), '1': (ONE,)} tokens = [] op = ONE for spec in token_specs: @@ -356,6 +176,21 @@ def _convert_strings(token_specs, string_store): return tokens +def merge_phrase(matcher, doc, i, matches): + """Callback to merge a phrase on match.""" + ent_id, label, start, end = matches[i] + span = doc[start:end] + span.merge(ent_type=label, ent_id=ent_id) + + +def unpickle_matcher(vocab, patterns, callbacks): + matcher = Matcher(vocab) + for key, specs in patterns.items(): + callback = callbacks.get(key, None) + matcher.add(key, callback, *specs) + return matcher + + cdef class Matcher: """Match sequences of tokens, based on pattern rules.""" cdef Pool mem @@ -476,7 +311,7 @@ cdef class Matcher: if key not in self._patterns: return default return (self._callbacks[key], self._patterns[key]) - + def pipe(self, docs, batch_size=1000, n_threads=2): """Match a stream of documents, yielding them in turn. @@ -498,9 +333,85 @@ cdef class Matcher: describing the matches. A match tuple describes a span `doc[start:end]`. The `label_id` and `key` are both integers. """ - matches = find_matches(&self.patterns[0], self.patterns.size(), doc) - for i, (key, start, end) in enumerate(matches): - on_match = self._callbacks.get(key, None) + cdef vector[StateC] partials + cdef int n_partials = 0 + cdef int q = 0 + cdef int i, token_i + cdef const TokenC* token + cdef StateC state + matches = [] + for token_i in range(doc.length): + token = &doc.c[token_i] + q = 0 + # Go over the open matches, extending or finalizing if able. + # Otherwise, we over-write them (q doesn't advance) + for state in partials: + action = get_action(state.second, token) + if action == PANIC: + raise Exception("Error selecting action in matcher") + while action == ADVANCE_ZERO: + state.second += 1 + action = get_action(state.second, token) + if action == PANIC: + raise Exception("Error selecting action in matcher") + + if action == REPEAT: + # Leave the state in the queue, and advance to next slot + # (i.e. we don't overwrite -- we want to greedily match + # more pattern. + q += 1 + elif action == REJECT: + pass + elif action == ADVANCE: + partials[q] = state + partials[q].second += 1 + q += 1 + elif action in (ACCEPT, ACCEPT_PREV): + # TODO: What to do about patterns starting with ZERO? Need + # to adjust the start position. + start = state.first + end = token_i+1 if action == ACCEPT else token_i + ent_id = state.second[1].attrs[0].value + label = state.second[1].attrs[1].value + matches.append((ent_id, start, end)) + + partials.resize(q) + # Check whether we open any new patterns on this token + for pattern in self.patterns: + action = get_action(pattern, token) + if action == PANIC: + raise Exception("Error selecting action in matcher") + while action == ADVANCE_ZERO: + pattern += 1 + action = get_action(pattern, token) + if action == REPEAT: + state.first = token_i + state.second = pattern + partials.push_back(state) + elif action == ADVANCE: + # TODO: What to do about patterns starting with ZERO? Need + # to adjust the start position. + state.first = token_i + state.second = pattern + 1 + partials.push_back(state) + elif action in (ACCEPT, ACCEPT_PREV): + start = token_i + end = token_i+1 if action == ACCEPT else token_i + ent_id = pattern[1].attrs[0].value + label = pattern[1].attrs[1].value + matches.append((ent_id, start, end)) + # Look for open patterns that are actually satisfied + for state in partials: + while state.second.quantifier in (ZERO, ZERO_ONE, ZERO_PLUS): + state.second += 1 + if state.second.nr_attr == 0: + start = state.first + end = len(doc) + ent_id = state.second.attrs[0].value + label = state.second.attrs[0].value + matches.append((ent_id, start, end)) + for i, (ent_id, start, end) in enumerate(matches): + on_match = self._callbacks.get(ent_id) if on_match is not None: on_match(self, doc, i, matches) return matches @@ -512,37 +423,31 @@ cdef class Matcher: return key -def unpickle_matcher(vocab, patterns, callbacks): - matcher = Matcher(vocab) - for key, specs in patterns.items(): - callback = callbacks.get(key, None) - matcher.add(key, callback, *specs) - return matcher - - -def _get_longest_matches(matches): - '''Filter out matches that have a longer equivalent.''' - longest_matches = {} - for pattern_id, start, end in matches: - key = (pattern_id, start) - length = end-start - if key not in longest_matches or length > longest_matches[key]: - longest_matches[key] = length - return [(pattern_id, start, start+length) - for (pattern_id, start), length in longest_matches.items()] - - def get_bilou(length): - if length == 0: - raise ValueError("Length must be >= 1") - elif length == 1: + if length == 1: return [U_ENT] elif length == 2: return [B2_ENT, L2_ENT] elif length == 3: return [B3_ENT, I3_ENT, L3_ENT] + elif length == 4: + return [B4_ENT, I4_ENT, I4_ENT, L4_ENT] + elif length == 5: + return [B5_ENT, I5_ENT, I5_ENT, I5_ENT, L5_ENT] + elif length == 6: + return [B6_ENT, I6_ENT, I6_ENT, I6_ENT, I6_ENT, L6_ENT] + elif length == 7: + return [B7_ENT, I7_ENT, I7_ENT, I7_ENT, I7_ENT, I7_ENT, L7_ENT] + elif length == 8: + return [B8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, L8_ENT] + elif length == 9: + return [B9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, + L9_ENT] + elif length == 10: + return [B10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT, + I10_ENT, I10_ENT, L10_ENT] else: - return [B4_ENT, I4_ENT] + [I4_ENT] * (length-3) + [L4_ENT] + raise ValueError("Max length currently 10 for phrase matching") cdef class PhraseMatcher: @@ -551,21 +456,21 @@ cdef class PhraseMatcher: cdef Matcher matcher cdef PreshMap phrase_ids cdef int max_length + cdef attr_t* _phrase_key cdef public object _callbacks cdef public object _patterns def __init__(self, Vocab vocab, max_length=10): self.mem = Pool() + self._phrase_key = self.mem.alloc(max_length, sizeof(attr_t)) self.max_length = max_length self.vocab = vocab self.matcher = Matcher(self.vocab) self.phrase_ids = PreshMap() - abstract_patterns = [ - [{U_ENT: True}], - [{B2_ENT: True}, {L2_ENT: True}], - [{B3_ENT: True}, {I3_ENT: True}, {L3_ENT: True}], - [{B4_ENT: True}, {I4_ENT: True}, {I4_ENT: True, "OP": "+"}, {L4_ENT: True}], - ] + abstract_patterns = [] + for length in range(1, max_length): + abstract_patterns.append([{tag: True} + for tag in get_bilou(length)]) self.matcher.add('Candidate', None, *abstract_patterns) self._callbacks = {} @@ -599,24 +504,29 @@ cdef class PhraseMatcher: *docs (Doc): `Doc` objects representing match patterns. """ cdef Doc doc + for doc in docs: + if len(doc) >= self.max_length: + msg = ( + "Pattern length (%d) >= phrase_matcher.max_length (%d). " + "Length can be set on initialization, up to 10." + ) + raise ValueError(msg % (len(doc), self.max_length)) cdef hash_t ent_id = self.matcher._normalize_key(key) self._callbacks[ent_id] = on_match cdef int length cdef int i cdef hash_t phrase_hash - cdef Pool mem = Pool() for doc in docs: length = doc.length - if length == 0: - continue tags = get_bilou(length) - phrase_key = mem.alloc(length, sizeof(attr_t)) + for i in range(self.max_length): + self._phrase_key[i] = 0 for i, tag in enumerate(tags): lexeme = self.vocab[doc.c[i].lex.orth] lexeme.set_flag(tag, True) - phrase_key[i] = lexeme.orth - phrase_hash = hash64(phrase_key, - length * sizeof(attr_t), 0) + self._phrase_key[i] = lexeme.orth + phrase_hash = hash64(self._phrase_key, + self.max_length * sizeof(attr_t), 0) self.phrase_ids.set(phrase_hash, ent_id) def __call__(self, Doc doc): @@ -638,45 +548,28 @@ cdef class PhraseMatcher: on_match(self, doc, i, matches) return matches - def pipe(self, stream, batch_size=1000, n_threads=2, return_matches=False, - as_tuples=False): + def pipe(self, stream, batch_size=1000, n_threads=2): """Match a stream of documents, yielding them in turn. docs (iterable): A stream of documents. batch_size (int): Number of documents to accumulate into a working set. n_threads (int): The number of threads with which to work on the buffer in parallel, if the implementation supports multi-threading. - return_matches (bool): Yield the match lists along with the docs, making - results (doc, matches) tuples. - as_tuples (bool): Interpret the input stream as (doc, context) tuples, - and yield (result, context) tuples out. - If both return_matches and as_tuples are True, the output will - be a sequence of ((doc, matches), context) tuples. YIELDS (Doc): Documents, in order. """ - if as_tuples: - for doc, context in stream: - matches = self(doc) - if return_matches: - yield ((doc, matches), context) - else: - yield (doc, context) - else: - for doc in stream: - matches = self(doc) - if return_matches: - yield (doc, matches) - else: - yield doc + for doc in stream: + self(doc) + yield doc def accept_match(self, Doc doc, int start, int end): + assert (end - start) < self.max_length cdef int i, j - cdef Pool mem = Pool() - phrase_key = mem.alloc(end-start, sizeof(attr_t)) + for i in range(self.max_length): + self._phrase_key[i] = 0 for i, j in enumerate(range(start, end)): - phrase_key[i] = doc.c[j].lex.orth - cdef hash_t key = hash64(phrase_key, - (end-start) * sizeof(attr_t), 0) + self._phrase_key[i] = doc.c[j].lex.orth + cdef hash_t key = hash64(self._phrase_key, + self.max_length * sizeof(attr_t), 0) ent_id = self.phrase_ids.get(key) if ent_id == 0: return None diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd index d0110b300..9192f351f 100644 --- a/spacy/morphology.pxd +++ b/spacy/morphology.pxd @@ -47,9 +47,7 @@ cdef class Morphology: cdef enum univ_morph_t: NIL = 0 Animacy_anim = symbols.Animacy_anim - Animacy_inan - Animacy_hum - Animacy_nhum + Animacy_inam Aspect_freq Aspect_imp Aspect_mod diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index ab48427ce..a5c5c0fbe 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -184,9 +184,7 @@ cdef class Morphology: IDS = { "Animacy_anim": Animacy_anim, - "Animacy_inan": Animacy_inan, - "Animacy_hum": Animacy_hum, # U20 - "Animacy_nhum": Animacy_nhum, + "Animacy_inam": Animacy_inam, "Aspect_freq": Aspect_freq, "Aspect_imp": Aspect_imp, "Aspect_mod": Aspect_mod, diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index bcf42b724..743f6ac85 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -25,7 +25,6 @@ from .morphology cimport Morphology from .vocab cimport Vocab from .syntax import nonproj from .compat import json_dumps -from .matcher import Matcher from .attrs import POS from .parts_of_speech import X @@ -98,17 +97,6 @@ def merge_entities(doc): return doc -def merge_subtokens(doc, label='subtok'): - merger = Matcher(doc.vocab) - merger.add('SUBTOK', None, [{'DEP': label, 'op': '+'}]) - matches = merger(doc) - spans = [doc[start:end+1] for _, start, end in matches] - offsets = [(span.start_char, span.end_char) for span in spans] - for start_char, end_char in offsets: - doc.merge(start_char, end_char) - return doc - - class Pipe(object): """This class is not instantiated directly. Components inherit from it, and it defines the interface that components should follow to function as @@ -179,7 +167,7 @@ class Pipe(object): problem. """ raise NotImplementedError - + def create_optimizer(self): return create_default_optimizer(self.model.ops, **self.cfg.get('optimizer', {})) @@ -664,13 +652,11 @@ class MultitaskObjective(Tagger): self.make_label = self.make_dep_tag_offset elif target == 'ent_tag': self.make_label = self.make_ent_tag - elif target == 'sent_start': - self.make_label = self.make_sent_start elif hasattr(target, '__call__'): self.make_label = target else: raise ValueError("MultitaskObjective target should be function or " - "one of: dep, tag, ent, sent_start, dep_tag_offset, ent_tag.") + "one of: dep, tag, ent, dep_tag_offset, ent_tag.") self.cfg = dict(cfg) self.cfg.setdefault('cnn_maxout_pieces', 2) self.cfg.setdefault('pretrained_dims', @@ -730,7 +716,11 @@ class MultitaskObjective(Tagger): for i, gold in enumerate(golds): for j in range(len(docs[i])): # Handes alignment for tokenization differences - label = self.make_label(j, gold.words, gold.tags, + gold_idx = gold.cand_to_gold[j] + if gold_idx is None: + idx += 1 + continue + label = self.make_label(gold_idx, gold.words, gold.tags, gold.heads, gold.labels, gold.ents) if label is None or label not in self.labels: correct[idx] = guesses[idx] @@ -775,51 +765,6 @@ class MultitaskObjective(Tagger): else: return '%s-%s' % (tags[i], ents[i]) - @staticmethod - def make_sent_start(target, words, tags, heads, deps, ents, cache=True, _cache={}): - '''A multi-task objective for representing sentence boundaries, - using BILU scheme. (O is impossible) - - The implementation of this method uses an internal cache that relies - on the identity of the heads array, to avoid requiring a new piece - of gold data. You can pass cache=False if you know the cache will - do the wrong thing. - ''' - assert len(words) == len(heads) - assert target < len(words), (target, len(words)) - if cache: - if id(heads) in _cache: - return _cache[id(heads)][target] - else: - for key in list(_cache.keys()): - _cache.pop(key) - sent_tags = ['I-SENT'] * len(words) - _cache[id(heads)] = sent_tags - else: - sent_tags = ['I-SENT'] * len(words) - - def _find_root(child): - seen = set([child]) - while child is not None and heads[child] != child: - seen.add(child) - child = heads[child] - return child - - sentences = {} - for i in range(len(words)): - root = _find_root(i) - if root is None: - sent_tags[i] = None - else: - sentences.setdefault(root, []).append(i) - for root, span in sorted(sentences.items()): - if len(span) == 1: - sent_tags[span[0]] = 'U-SENT' - else: - sent_tags[span[0]] = 'B-SENT' - sent_tags[span[-1]] = 'L-SENT' - return sent_tags[target] - class SimilarityHook(Pipe): """ @@ -878,8 +823,8 @@ class TextCategorizer(Pipe): name = 'textcat' @classmethod - def Model(cls, **cfg): - return build_text_classifier(**cfg) + def Model(cls, nr_class=1, width=64, **cfg): + return build_text_classifier(nr_class, width, **cfg) def __init__(self, vocab, model=True, **cfg): self.vocab = vocab @@ -945,15 +890,6 @@ class TextCategorizer(Pipe): if label in self.labels: return 0 if self.model not in (None, True, False): - # This functionality was available previously, but was broken. - # The problem is that we resize the last layer, but the last layer - # is actually just an ensemble. We're not resizing the child layers - # -- a huge problem. - raise ValueError( - "Cannot currently add labels to pre-trained text classifier. " - "Add labels before training begins. This functionality was " - "available in previous versions, but had significant bugs that " - "let to poor performance") smaller = self.model._layers[-1] larger = Affine(len(self.labels)+1, smaller.nI) copy_array(larger.W[:smaller.nO], smaller.W) @@ -969,9 +905,8 @@ class TextCategorizer(Pipe): token_vector_width = 64 if self.model is True: self.cfg['pretrained_dims'] = self.vocab.vectors_length - self.cfg['nr_class'] = len(self.labels) - self.cfg['width'] = token_vector_width - self.model = self.Model(**self.cfg) + self.model = self.Model(len(self.labels), token_vector_width, + **self.cfg) link_vectors_to_models(self.vocab) if sgd is None: sgd = self.create_optimizer() @@ -985,7 +920,7 @@ cdef class DependencyParser(Parser): @property def postprocesses(self): return [nonproj.deprojectivize] - + def add_multitask_objective(self, target): labeller = MultitaskObjective(self.vocab, target=target) self._multitasks.append(labeller) @@ -1006,7 +941,7 @@ cdef class EntityRecognizer(Parser): TransitionSystem = BiluoPushDown nr_feature = 6 - + def add_multitask_objective(self, target): labeller = MultitaskObjective(self.vocab, target=target) self._multitasks.append(labeller) diff --git a/spacy/scorer.py b/spacy/scorer.py index 1dc600e6e..673df132c 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -1,7 +1,7 @@ # coding: utf8 from __future__ import division, print_function, unicode_literals -from .gold import tags_to_entities, GoldParse +from .gold import tags_to_entities class PRFScore(object): @@ -84,8 +84,6 @@ class Scorer(object): } def score(self, tokens, gold, verbose=False, punct_labels=('p', 'punct')): - if len(tokens) != len(gold): - gold = GoldParse.from_annot_tuples(tokens, zip(*gold.orig_annot)) assert len(tokens) == len(gold) gold_deps = set() gold_tags = set() @@ -102,7 +100,8 @@ class Scorer(object): continue gold_i = gold.cand_to_gold[token.i] if gold_i is None: - self.tokens.fp += 1 + if token.dep_.lower() not in punct_labels: + self.tokens.fp += 1 else: self.tokens.tp += 1 cand_tags.add((gold_i, token.tag_)) diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd index 051b92edb..511e0e51a 100644 --- a/spacy/symbols.pxd +++ b/spacy/symbols.pxd @@ -85,7 +85,6 @@ cdef enum symbol_t: SENT_START SPACY PROB - LANG ADJ ADP @@ -109,9 +108,8 @@ cdef enum symbol_t: SPACE Animacy_anim - Animacy_inan + Animacy_inam Animacy_hum # U20 - Animacy_nhum Aspect_freq Aspect_imp Aspect_mod @@ -395,7 +393,6 @@ cdef enum symbol_t: EVENT WORK_OF_ART LANGUAGE - LAW DATE TIME @@ -454,9 +451,10 @@ cdef enum symbol_t: prt punct quantmod - relcl rcmod root xcomp acl + LAW + LANG diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx index 949621820..3bf413f43 100644 --- a/spacy/symbols.pyx +++ b/spacy/symbols.pyx @@ -114,9 +114,8 @@ IDS = { "SPACE": SPACE, "Animacy_anim": Animacy_anim, - "Animacy_inam": Animacy_inan, + "Animacy_inam": Animacy_inam, "Animacy_hum": Animacy_hum, # U20 - "Animacy_nhum": Animacy_nhum, "Aspect_freq": Aspect_freq, "Aspect_imp": Aspect_imp, "Aspect_mod": Aspect_mod, @@ -459,7 +458,6 @@ IDS = { "punct": punct, "quantmod": quantmod, "rcmod": rcmod, - "relcl": relcl, "root": root, "xcomp": xcomp, diff --git a/spacy/syntax/_state.pxd b/spacy/syntax/_state.pxd index a95a1910f..5470df470 100644 --- a/spacy/syntax/_state.pxd +++ b/spacy/syntax/_state.pxd @@ -108,7 +108,7 @@ cdef cppclass StateC: ids[1] = this.B(1) ids[2] = this.S(0) ids[3] = this.S(1) - ids[4] = this.S(2) + ids[4] = this.H(this.S(0)) ids[5] = this.L(this.B(0), 1) ids[6] = this.L(this.S(0), 1) ids[7] = this.R(this.S(0), 1) diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index 28e1a0292..190155269 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -6,19 +6,16 @@ from __future__ import unicode_literals from cpython.ref cimport Py_INCREF from cymem.cymem cimport Pool -from collections import OrderedDict, defaultdict, Counter +from collections import OrderedDict from thinc.extra.search cimport Beam -import json from .stateclass cimport StateClass from ._state cimport StateC -from . import nonproj +from .nonproj import is_nonproj_tree from .transition_system cimport move_cost_func_t, label_cost_func_t from ..gold cimport GoldParse, GoldParseC from ..structs cimport TokenC -# Calculate cost as gold/not gold. We don't use scalar value anyway. -cdef int BINARY_COSTS = 1 DEF NON_MONOTONIC = True DEF USE_BREAK = True @@ -57,8 +54,6 @@ cdef weight_t push_cost(StateClass stcls, const GoldParseC* gold, int target) no cost += 1 if gold.heads[S_i] == target and (NON_MONOTONIC or not stcls.has_head(S_i)): cost += 1 - if BINARY_COSTS and cost >= 1: - return cost cost += Break.is_valid(stcls.c, 0) and Break.move_cost(stcls, gold) == 0 return cost @@ -72,8 +67,6 @@ cdef weight_t pop_cost(StateClass stcls, const GoldParseC* gold, int target) nog cost += gold.heads[target] == B_i if gold.heads[B_i] == B_i or gold.heads[B_i] < target: break - if BINARY_COSTS and cost >= 1: - return cost if Break.is_valid(stcls.c, 0) and Break.move_cost(stcls, gold) == 0: cost += 1 return cost @@ -117,8 +110,7 @@ cdef bint _is_gold_root(const GoldParseC* gold, int word) nogil: cdef class Shift: @staticmethod cdef bint is_valid(const StateC* st, attr_t label) nogil: - sent_start = st._sent[st.B_(0).l_edge].sent_start - return st.buffer_length() >= 2 and not st.shifted[st.B(0)] and sent_start != 1 + return st.buffer_length() >= 2 and not st.shifted[st.B(0)] and st.B_(0).sent_start != 1 @staticmethod cdef int transition(StateC* st, attr_t label) nogil: @@ -178,8 +170,7 @@ cdef class Reduce: cdef class LeftArc: @staticmethod cdef bint is_valid(const StateC* st, attr_t label) nogil: - sent_start = st._sent[st.B_(0).l_edge].sent_start - return sent_start != 1 + return st.B_(0).sent_start != 1 @staticmethod cdef int transition(StateC* st, attr_t label) nogil: @@ -214,8 +205,7 @@ cdef class RightArc: @staticmethod cdef bint is_valid(const StateC* st, attr_t label) nogil: # If there's (perhaps partial) parse pre-set, don't allow cycle. - sent_start = st._sent[st.B_(0).l_edge].sent_start - return sent_start != 1 and st.H(st.S(0)) != st.B(0) + return st.B_(0).sent_start != 1 and st.H(st.S(0)) != st.B(0) @staticmethod cdef int transition(StateC* st, attr_t label) nogil: @@ -322,42 +312,39 @@ cdef class ArcEager(TransitionSystem): @classmethod def get_actions(cls, **kwargs): - min_freq = kwargs.get('min_freq', None) - actions = defaultdict(lambda: Counter()) - actions[SHIFT][''] = 1 - actions[REDUCE][''] = 1 + actions = kwargs.get('actions', OrderedDict(( + (SHIFT, ['']), + (REDUCE, ['']), + (RIGHT, []), + (LEFT, []), + (BREAK, ['ROOT'])) + )) + seen_actions = set() for label in kwargs.get('left_labels', []): - actions[LEFT][label] = 1 - actions[SHIFT][label] = 1 + if label.upper() != 'ROOT': + if (LEFT, label) not in seen_actions: + actions[LEFT].append(label) + seen_actions.add((LEFT, label)) for label in kwargs.get('right_labels', []): - actions[RIGHT][label] = 1 - actions[REDUCE][label] = 1 + if label.upper() != 'ROOT': + if (RIGHT, label) not in seen_actions: + actions[RIGHT].append(label) + seen_actions.add((RIGHT, label)) + for raw_text, sents in kwargs.get('gold_parses', []): for (ids, words, tags, heads, labels, iob), ctnts in sents: - heads, labels = nonproj.projectivize(heads, labels) for child, head, label in zip(ids, heads, labels): - if label.upper() == 'ROOT' : + if label.upper() == 'ROOT': label = 'ROOT' - if head == child: - actions[BREAK][label] += 1 - elif head < child: - actions[RIGHT][label] += 1 - actions[REDUCE][''] += 1 - elif head > child: - actions[LEFT][label] += 1 - actions[SHIFT][''] += 1 - if min_freq is not None: - for action, label_freqs in actions.items(): - for label, freq in list(label_freqs.items()): - if freq < min_freq: - label_freqs.pop(label) - # Ensure these actions are present - actions[BREAK].setdefault('ROOT', 0) - actions[RIGHT].setdefault('subtok', 0) - actions[LEFT].setdefault('subtok', 0) - # Used for backoff - actions[RIGHT].setdefault('dep', 0) - actions[LEFT].setdefault('dep', 0) + if label != 'ROOT': + if head < child: + if (RIGHT, label) not in seen_actions: + actions[RIGHT].append(label) + seen_actions.add((RIGHT, label)) + elif head > child: + if (LEFT, label) not in seen_actions: + actions[LEFT].append(label) + seen_actions.add((LEFT, label)) return actions property action_types: @@ -389,34 +376,18 @@ cdef class ArcEager(TransitionSystem): def preprocess_gold(self, GoldParse gold): if not self.has_gold(gold): return None - for i, (head, dep) in enumerate(zip(gold.heads, gold.labels)): + for i in range(gold.length): # Missing values - if head is None or dep is None: + if gold.heads[i] is None or gold.labels[i] is None: gold.c.heads[i] = i gold.c.has_dep[i] = False else: - if head > i: - action = LEFT - elif head < i: - action = RIGHT - else: - action = BREAK - if dep not in self.labels[action]: - if action == BREAK: - dep = 'ROOT' - elif nonproj.is_decorated(dep): - backoff = nonproj.decompose(dep)[0] - if backoff in self.labels[action]: - dep = backoff - else: - dep = 'dep' - else: - dep = 'dep' + label = gold.labels[i] gold.c.has_dep[i] = True - if dep.upper() == 'ROOT': - dep = 'ROOT' - gold.c.heads[i] = head - gold.c.labels[i] = self.strings.add(dep) + if label.upper() == 'ROOT': + label = 'ROOT' + gold.c.heads[i] = gold.heads[i] + gold.c.labels[i] = self.strings.add(label) return gold def get_beam_parses(self, Beam beam): @@ -556,13 +527,8 @@ cdef class ArcEager(TransitionSystem): is_valid[i] = False costs[i] = 9000 if n_gold < 1: - # Check label set --- leading cause - label_set = set([self.strings[self.c[i].label] for i in range(self.n_moves)]) - for label_str in gold.labels: - if label_str is not None and label_str not in label_set: - raise ValueError("Cannot get gold parser action: unknown label: %s" % label_str) - # Check projectivity --- other leading cause - if nonproj.is_nonproj_tree(gold.heads): + # Check projectivity --- leading cause + if is_nonproj_tree(gold.heads): raise ValueError( "Could not find a gold-standard action to supervise the " "dependency parser. Likely cause: the tree is " diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx index d56008ca0..73ef17534 100644 --- a/spacy/syntax/ner.pyx +++ b/spacy/syntax/ner.pyx @@ -3,7 +3,7 @@ from __future__ import unicode_literals from thinc.typedefs cimport weight_t from thinc.extra.search cimport Beam -from collections import OrderedDict, Counter +from collections import OrderedDict from .stateclass cimport StateClass from ._state cimport StateC @@ -64,18 +64,21 @@ cdef class BiluoPushDown(TransitionSystem): @classmethod def get_actions(cls, **kwargs): - actions = { - MISSING: Counter(), - BEGIN: Counter(), - IN: Counter(), - LAST: Counter(), - UNIT: Counter(), - OUT: Counter() - } - actions[OUT][''] = 1 + actions = kwargs.get('actions', OrderedDict(( + (MISSING, ['']), + (BEGIN, []), + (IN, []), + (LAST, []), + (UNIT, []), + (OUT, ['']) + ))) + seen_entities = set() for entity_type in kwargs.get('entity_types', []): + if entity_type in seen_entities: + continue + seen_entities.add(entity_type) for action in (BEGIN, IN, LAST, UNIT): - actions[action][entity_type] = 1 + actions[action].append(entity_type) moves = ('M', 'B', 'I', 'L', 'U') for raw_text, sents in kwargs.get('gold_parses', []): for (ids, words, tags, heads, labels, biluo), _ in sents: @@ -84,8 +87,10 @@ cdef class BiluoPushDown(TransitionSystem): if ner_tag.count('-') != 1: raise ValueError(ner_tag) _, label = ner_tag.split('-') - for action in (BEGIN, IN, LAST, UNIT): - actions[action][label] += 1 + if label not in seen_entities: + seen_entities.add(label) + for move_str in ('B', 'I', 'L', 'U'): + actions[moves.index(move_str)].append(label) return actions property action_types: @@ -208,7 +213,7 @@ cdef class BiluoPushDown(TransitionSystem): raise Exception(move) return t - def add_action(self, int action, label_name, freq=None): + def add_action(self, int action, label_name): cdef attr_t label_id if not isinstance(label_name, (int, long)): label_id = self.strings.add(label_name) @@ -229,12 +234,6 @@ cdef class BiluoPushDown(TransitionSystem): self.c[self.n_moves] = self.init_transition(self.n_moves, action, label_id) assert self.c[self.n_moves].label == label_id self.n_moves += 1 - if self.labels.get(action, []): - freq = min(0, min(self.labels[action].values())) - self.labels[action][label_name] = freq-1 - else: - self.labels[action] = Counter() - self.labels[action][label_name] = -1 return 1 cdef int initialize_state(self, StateC* st) nogil: diff --git a/spacy/syntax/nn_parser.pxd b/spacy/syntax/nn_parser.pxd index 9a1734d1c..56615c6f1 100644 --- a/spacy/syntax/nn_parser.pxd +++ b/spacy/syntax/nn_parser.pxd @@ -15,7 +15,7 @@ cdef class Parser: cdef readonly object cfg cdef public object _multitasks - cdef void _parseC(self, StateC** states, int nr_task, + cdef void _parseC(self, StateC* state, const float* feat_weights, const float* bias, const float* hW, const float* hb, int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 759ccacab..f8cd964ef 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -1,6 +1,7 @@ # cython: infer_types=True # cython: cdivision=True # cython: boundscheck=False +# cython: profile=True # coding: utf-8 from __future__ import unicode_literals, print_function @@ -27,8 +28,6 @@ from thinc.misc import LayerNorm from thinc.neural.ops import CupyOps from thinc.neural.util import get_array_module from thinc.linalg cimport Vec, VecVec -from thinc cimport openblas - from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten from .._ml import link_vectors_to_models, create_default_optimizer @@ -172,8 +171,8 @@ cdef void sum_state_features(float* output, else: idx = token_ids[f] * F * O + f*O feature = &cached[idx] - VecVec.add_i(output, - feature, 1., O) + for i in range(O): + output[i] += feature[i] output += O token_ids += F @@ -266,7 +265,7 @@ cdef class Parser: with Model.use_device('cpu'): upper = chain( - clone(Maxout(hidden_width, hidden_width), depth-1), + clone(LayerNorm(Maxout(hidden_width, hidden_width)), depth-1), zero_init(Affine(nr_class, hidden_width, drop_factor=0.0)) ) @@ -302,7 +301,7 @@ cdef class Parser: """ self.vocab = vocab if moves is True: - self.moves = self.TransitionSystem(self.vocab.strings) + self.moves = self.TransitionSystem(self.vocab.strings, {}) else: self.moves = moves if 'beam_width' not in cfg: @@ -311,7 +310,12 @@ cdef class Parser: cfg['beam_density'] = util.env_opt('beam_density', 0.0) if 'pretrained_dims' not in cfg: cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1] + cfg.setdefault('cnn_maxout_pieces', 3) self.cfg = cfg + if 'actions' in self.cfg: + for action, labels in self.cfg.get('actions', {}).items(): + for label in labels: + self.moves.add_action(action, label) self.model = model self._multitasks = [] @@ -418,81 +422,69 @@ cdef class Parser: cdef int nr_hidden = hidden_weights.shape[0] cdef int nr_task = states.size() with nogil: - self._parseC(&states[0], nr_task, feat_weights, bias, hW, hb, - nr_class, nr_hidden, nr_feat, nr_piece) + for i in range(nr_task): + self._parseC(states[i], + feat_weights, bias, hW, hb, + nr_class, nr_hidden, nr_feat, nr_piece) PyErr_CheckSignals() tokvecs = self.model[0].ops.unflatten(tokvecs, [len(doc) for doc in docs]) return state_objs, tokvecs - cdef void _parseC(self, StateC** states, int nr_task, + cdef void _parseC(self, StateC* state, const float* feat_weights, const float* bias, const float* hW, const float* hb, int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil: token_ids = calloc(nr_feat, sizeof(int)) is_valid = calloc(nr_class, sizeof(int)) - vectors = calloc(nr_hidden * nr_task, sizeof(float)) - unmaxed = calloc(nr_hidden * nr_piece, sizeof(float)) - scores = calloc(nr_class*nr_task, sizeof(float)) + vectors = calloc(nr_hidden * nr_piece, sizeof(float)) + scores = calloc(nr_class, sizeof(float)) if not (token_ids and is_valid and vectors and scores): with gil: PyErr_SetFromErrno(MemoryError) PyErr_CheckSignals() - cdef int nr_todo = nr_task - cdef int i, j - cdef vector[StateC*] unfinished - while nr_todo >= 1: - memset(vectors, 0, nr_todo * nr_hidden * sizeof(float)) - memset(scores, 0, nr_todo * nr_class * sizeof(float)) - for i in range(nr_todo): - state = states[i] - state.set_context_tokens(token_ids, nr_feat) - memset(unmaxed, 0, nr_hidden * nr_piece * sizeof(float)) - sum_state_features(unmaxed, - feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece) - VecVec.add_i(unmaxed, - bias, 1., nr_hidden*nr_piece) - state_vector = &vectors[i*nr_hidden] - for j in range(nr_hidden): - index = j * nr_piece - which = Vec.arg_max(&unmaxed[index], nr_piece) - state_vector[j] = unmaxed[index + which] - # Compute hidden-to-output - openblas.simple_gemm(scores, nr_todo, nr_class, - vectors, nr_todo, nr_hidden, hW, nr_hidden, nr_class, 0, 0) - # Add bias - for i in range(nr_todo): - VecVec.add_i(&scores[i*nr_class], - hb, 1., nr_class) - # Validate actions, argmax, take action. - for i in range(nr_todo): - state = states[i] - self.moves.set_valid(is_valid, state) - guess = arg_max_if_valid(&scores[i*nr_class], is_valid, nr_class) - action = self.moves.c[guess] - action.do(state, action.label) - state.push_hist(guess) - if not state.is_final(): - unfinished.push_back(state) - for i in range(unfinished.size()): - states[i] = unfinished[i] - nr_todo = unfinished.size() - unfinished.clear() + cdef float feature + while not state.is_final(): + state.set_context_tokens(token_ids, nr_feat) + memset(vectors, 0, nr_hidden * nr_piece * sizeof(float)) + memset(scores, 0, nr_class * sizeof(float)) + sum_state_features(vectors, + feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece) + for i in range(nr_hidden * nr_piece): + vectors[i] += bias[i] + V = vectors + W = hW + for i in range(nr_hidden): + if nr_piece == 1: + feature = V[0] if V[0] >= 0. else 0. + elif nr_piece == 2: + feature = V[0] if V[0] >= V[1] else V[1] + else: + feature = Vec.max(V, nr_piece) + for j in range(nr_class): + scores[j] += feature * W[j] + W += nr_class + V += nr_piece + for i in range(nr_class): + scores[i] += hb[i] + self.moves.set_valid(is_valid, state) + guess = arg_max_if_valid(scores, is_valid, nr_class) + action = self.moves.c[guess] + action.do(state, action.label) + state.push_hist(guess) free(token_ids) free(is_valid) free(vectors) - free(unmaxed) free(scores) - def beam_parse(self, docs, int beam_width=3, float beam_density=0.001, - float drop=0.): + def beam_parse(self, docs, int beam_width=3, float beam_density=0.001): cdef Beam beam cdef np.ndarray scores cdef Doc doc cdef int nr_class = self.moves.n_moves cuda_stream = util.get_cuda_stream() (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model( - docs, cuda_stream, drop) + docs, cuda_stream, 0.0) cdef int offset = 0 cdef int j = 0 cdef int k @@ -531,8 +523,8 @@ cdef class Parser: n_states += 1 if n_states == 0: break - vectors, _ = state2vec.begin_update(token_ids[:n_states], drop) - scores, _ = vec2scores.begin_update(vectors, drop=drop) + vectors = state2vec(token_ids[:n_states]) + scores = vec2scores(vectors) c_scores = scores.data for beam in todo: for i in range(beam.size): @@ -563,10 +555,7 @@ cdef class Parser: for multitask in self._multitasks: multitask.update(docs, golds, drop=drop, sgd=sgd) cuda_stream = util.get_cuda_stream() - # Chop sequences into lengths of this many transitions, to make the - # batch uniform length. - cut_gold = numpy.random.choice(range(20, 100)) - states, golds, max_steps = self._init_gold_batch(docs, golds, max_length=cut_gold) + states, golds, max_steps = self._init_gold_batch(docs, golds) (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream, drop) todo = [(s, g) for (s, g) in zip(states, golds) @@ -669,7 +658,8 @@ cdef class Parser: for beam in beams: _cleanup(beam) - def _init_gold_batch(self, whole_docs, whole_golds, min_length=5, max_length=500): + + def _init_gold_batch(self, whole_docs, whole_golds): """Make a square batch, of length equal to the shortest doc. A long doc will get multiple states. Let's say we have a doc of length 2*N, where N is the shortest doc. We'll make two states, one representing @@ -678,7 +668,7 @@ cdef class Parser: StateClass state Transition action whole_states = self.moves.init_batch(whole_docs) - max_length = max(min_length, min(max_length, min([len(doc) for doc in whole_docs]))) + max_length = max(5, min(50, min([len(doc) for doc in whole_docs]))) max_moves = 0 states = [] golds = [] @@ -800,11 +790,6 @@ cdef class Parser: for doc in docs: hook(doc) - @property - def labels(self): - class_names = [self.moves.get_class_name(i) for i in range(self.moves.n_moves)] - return class_names - @property def tok2vec(self): '''Return the embedding and convolutional layer of the model.''' @@ -823,6 +808,9 @@ cdef class Parser: for action in self.moves.action_types: added = self.moves.add_action(action, label) if added: + # Important that the labels be stored as a list! We need the + # order, or the model goes out of synch + self.cfg.setdefault('extra_labels', []).append(label) resized = True if self.model not in (True, False, None) and resized: # Weights are stored in (nr_out, nr_in) format, so we're basically @@ -836,10 +824,12 @@ cdef class Parser: def begin_training(self, gold_tuples, pipeline=None, sgd=None, **cfg): if 'model' in cfg: self.model = cfg['model'] - cfg.setdefault('min_action_freq', 30) - actions = self.moves.get_actions(gold_parses=gold_tuples, - min_freq=cfg.get('min_action_freq', 30)) - self.moves.initialize_actions(actions) + gold_tuples = nonproj.preprocess_training_data(gold_tuples, + label_freq_cutoff=100) + actions = self.moves.get_actions(gold_parses=gold_tuples) + for action, labels in actions.items(): + for label in labels: + self.moves.add_action(action, label) cfg.setdefault('token_vector_width', 128) if self.model is True: cfg['pretrained_dims'] = self.vocab.vectors_length @@ -847,7 +837,7 @@ cdef class Parser: if sgd is None: sgd = self.create_optimizer() self.model[1].begin_training( - self.model[1].ops.allocate((5, cfg['token_vector_width']))) + self.model[1].ops.allocate((5, cfg['token_vector_width']))) if pipeline is not None: self.init_multitask_objectives(gold_tuples, pipeline, sgd=sgd, **cfg) link_vectors_to_models(self.vocab) diff --git a/spacy/syntax/nonproj.pyx b/spacy/syntax/nonproj.pyx index b6e1e80ef..cace1a832 100644 --- a/spacy/syntax/nonproj.pyx +++ b/spacy/syntax/nonproj.pyx @@ -9,7 +9,7 @@ from __future__ import unicode_literals from copy import copy -from ..tokens.doc cimport Doc, set_children_from_heads +from ..tokens.doc cimport Doc DELIMITER = '||' @@ -74,21 +74,7 @@ def decompose(label): def is_decorated(label): - return DELIMITER in label - -def count_decorated_labels(gold_tuples): - freqs = {} - for raw_text, sents in gold_tuples: - for (ids, words, tags, heads, labels, iob), ctnts in sents: - proj_heads, deco_labels = projectivize(heads, labels) - # set the label to ROOT for each root dependent - deco_labels = ['ROOT' if head == i else deco_labels[i] - for i, head in enumerate(proj_heads)] - # count label frequencies - for label in deco_labels: - if is_decorated(label): - freqs[label] = freqs.get(label, 0) + 1 - return freqs + return label.find(DELIMITER) != -1 def preprocess_training_data(gold_tuples, label_freq_cutoff=30): @@ -138,9 +124,8 @@ cpdef deprojectivize(Doc doc): if DELIMITER in label: new_label, head_label = label.split(DELIMITER) new_head = _find_new_head(doc[i], head_label) - doc.c[i].head = new_head.i - i + doc[i].head = new_head doc.c[i].dep = doc.vocab.strings.add(new_label) - set_children_from_heads(doc.c, doc.length) return doc @@ -206,12 +191,9 @@ def _filter_labels(gold_tuples, cutoff, freqs): for raw_text, sents in gold_tuples: filtered_sents = [] for (ids, words, tags, heads, labels, iob), ctnts in sents: - filtered_labels = [] - for label in labels: - if is_decorated(label) and freqs.get(label, 0) < cutoff: - filtered_labels.append(decompose(label)[0]) - else: - filtered_labels.append(label) + filtered_labels = [decompose(label)[0] + if freqs.get(label, cutoff) < cutoff + else label for label in labels] filtered_sents.append( ((ids, words, tags, heads, filtered_labels, iob), ctnts)) filtered.append((raw_text, filtered_sents)) diff --git a/spacy/syntax/transition_system.pxd b/spacy/syntax/transition_system.pxd index 45d9a787f..bea58e9c3 100644 --- a/spacy/syntax/transition_system.pxd +++ b/spacy/syntax/transition_system.pxd @@ -42,7 +42,6 @@ cdef class TransitionSystem: cdef public attr_t root_label cdef public freqs cdef init_state_t init_beam_state - cdef public object labels cdef int initialize_state(self, StateC* state) nogil cdef int finalize_state(self, StateC* state) nogil diff --git a/spacy/syntax/transition_system.pyx b/spacy/syntax/transition_system.pyx index 959e8169f..94b1ef2b1 100644 --- a/spacy/syntax/transition_system.pyx +++ b/spacy/syntax/transition_system.pyx @@ -5,7 +5,7 @@ from __future__ import unicode_literals from cpython.ref cimport Py_INCREF from cymem.cymem cimport Pool from thinc.typedefs cimport weight_t -from collections import OrderedDict, Counter +from collections import OrderedDict import ujson from ..structs cimport TokenC @@ -28,7 +28,7 @@ cdef void* _init_state(Pool mem, int length, void* tokens) except NULL: cdef class TransitionSystem: - def __init__(self, StringStore string_table, labels_by_action=None, min_freq=None): + def __init__(self, StringStore string_table, labels_by_action): self.mem = Pool() self.strings = string_table self.n_moves = 0 @@ -36,14 +36,21 @@ cdef class TransitionSystem: self.c = self.mem.alloc(self._size, sizeof(Transition)) - self.labels = {} - if labels_by_action: - self.initialize_actions(labels_by_action, min_freq=min_freq) + for action, label_strs in labels_by_action.items(): + for label_str in label_strs: + self.add_action(int(action), label_str) self.root_label = self.strings.add('ROOT') self.init_beam_state = _init_state def __reduce__(self): - return (self.__class__, (self.strings, self.labels), None, None) + labels_by_action = OrderedDict() + cdef Transition t + for trans in self.c[:self.n_moves]: + label_str = self.strings[trans.label] + labels_by_action.setdefault(trans.move, []).append(label_str) + return (self.__class__, + (self.strings, labels_by_action), + None, None) def init_batch(self, docs): cdef StateClass state @@ -139,22 +146,6 @@ cdef class TransitionSystem: act = self.c[clas] return self.move_name(act.move, act.label) - def initialize_actions(self, labels_by_action, min_freq=None): - self.labels = {} - self.n_moves = 0 - for action, label_freqs in sorted(labels_by_action.items()): - action = int(action) - # Make sure we take a copy here, and that we get a Counter - self.labels[action] = Counter() - # Have to be careful here: Sorting must be stable, or our model - # won't be read back in correctly. - sorted_labels = [(f, L) for L, f in label_freqs.items()] - sorted_labels.sort() - sorted_labels.reverse() - for freq, label_str in sorted_labels: - self.add_action(int(action), label_str) - self.labels[action][label_str] = freq - def add_action(self, int action, label_name): cdef attr_t label_id if not isinstance(label_name, int) and \ @@ -173,14 +164,6 @@ cdef class TransitionSystem: self.c[self.n_moves] = self.init_transition(self.n_moves, action, label_id) assert self.c[self.n_moves].label == label_id self.n_moves += 1 - if self.labels.get(action, []): - new_freq = min(self.labels[action].values()) - else: - self.labels[action] = Counter() - new_freq = -1 - if new_freq > 0: - new_freq = 0 - self.labels[action][label_name] = new_freq-1 return 1 def to_disk(self, path, **exclude): @@ -195,18 +178,26 @@ cdef class TransitionSystem: def to_bytes(self, **exclude): transitions = [] + for trans in self.c[:self.n_moves]: + transitions.append({ + 'clas': trans.clas, + 'move': trans.move, + 'label': self.strings[trans.label], + 'name': self.move_name(trans.move, trans.label) + }) serializers = { - 'moves': lambda: json_dumps(self.labels), + 'transitions': lambda: json_dumps(transitions), 'strings': lambda: self.strings.to_bytes() } return util.to_bytes(serializers, exclude) def from_bytes(self, bytes_data, **exclude): - labels = {} + transitions = [] deserializers = { - 'moves': lambda b: labels.update(ujson.loads(b)), + 'transitions': lambda b: transitions.extend(ujson.loads(b)), 'strings': lambda b: self.strings.from_bytes(b) } msg = util.from_bytes(bytes_data, deserializers, exclude) - self.initialize_actions(labels) + for trans in transitions: + self.add_action(trans['move'], trans['label']) return self diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py index 4cbb8ed94..8cd4347c2 100644 --- a/spacy/tests/doc/test_span.py +++ b/spacy/tests/doc/test_span.py @@ -19,15 +19,6 @@ def doc(en_tokenizer): return get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps) -@pytest.fixture -def doc_not_parsed(en_tokenizer): - text = "This is a sentence. This is another sentence. And a third." - tokens = en_tokenizer(text) - d = get_doc(tokens.vocab, [t.text for t in tokens]) - d.is_parsed = False - return d - - def test_spans_sent_spans(doc): sents = list(doc.sents) assert sents[0].start == 0 @@ -43,7 +34,6 @@ def test_spans_root(doc): assert span.root.text == 'sentence' assert span.root.head.text == 'is' - def test_spans_string_fn(doc): span = doc[0:4] assert len(span) == 4 @@ -51,7 +41,6 @@ def test_spans_string_fn(doc): assert span.upper_ == 'THIS IS A SENTENCE' assert span.lower_ == 'this is a sentence' - def test_spans_root2(en_tokenizer): text = "through North and South Carolina" heads = [0, 3, -1, -2, -4] @@ -60,17 +49,12 @@ def test_spans_root2(en_tokenizer): assert doc[-2:].root.text == 'Carolina' -def test_spans_span_sent(doc, doc_not_parsed): +def test_spans_span_sent(doc): """Test span.sent property""" assert len(list(doc.sents)) assert doc[:2].sent.root.text == 'is' assert doc[:2].sent.text == 'This is a sentence .' assert doc[6:7].sent.root.left_edge.text == 'This' - # test on manual sbd - doc_not_parsed[0].is_sent_start = True - doc_not_parsed[5].is_sent_start = True - assert doc_not_parsed[1:3].sent == doc_not_parsed[0:5] - assert doc_not_parsed[10:14].sent == doc_not_parsed[5:] def test_spans_lca_matrix(en_tokenizer): @@ -145,7 +129,7 @@ def test_span_to_array(doc): assert arr[0, 1] == len(span[0]) -#def test_span_as_doc(doc): -# span = doc[4:10] -# span_doc = span.as_doc() -# assert span.text == span_doc.text.strip() +def test_span_as_doc(doc): + span = doc[4:10] + span_doc = span.as_doc() + assert span.text == span_doc.text.strip() diff --git a/spacy/tests/gold/test_lev_align.py b/spacy/tests/gold/test_lev_align.py new file mode 100644 index 000000000..29f58a156 --- /dev/null +++ b/spacy/tests/gold/test_lev_align.py @@ -0,0 +1,36 @@ +# coding: utf-8 +"""Find the min-cost alignment between two tokenizations""" + +from __future__ import unicode_literals + +from ...gold import _min_edit_path as min_edit_path +from ...gold import align + +import pytest + + +@pytest.mark.parametrize('cand,gold,path', [ + (["U.S", ".", "policy"], ["U.S.", "policy"], (0, 'MDM')), + (["U.N", ".", "policy"], ["U.S.", "policy"], (1, 'SDM')), + (["The", "cat", "sat", "down"], ["The", "cat", "sat", "down"], (0, 'MMMM')), + (["cat", "sat", "down"], ["The", "cat", "sat", "down"], (1, 'IMMM')), + (["The", "cat", "down"], ["The", "cat", "sat", "down"], (1, 'MMIM')), + (["The", "cat", "sag", "down"], ["The", "cat", "sat", "down"], (1, 'MMSM'))]) +def test_gold_lev_align_edit_path(cand, gold, path): + assert min_edit_path(cand, gold) == path + + +def test_gold_lev_align_edit_path2(): + cand = ["your", "stuff"] + gold = ["you", "r", "stuff"] + assert min_edit_path(cand, gold) in [(2, 'ISM'), (2, 'SIM')] + + +@pytest.mark.parametrize('cand,gold,result', [ + (["U.S", ".", "policy"], ["U.S.", "policy"], [0, None, 1]), + (["your", "stuff"], ["you", "r", "stuff"], [None, 2]), + (["i", "like", "2", "guys", " ", "well", "id", "just", "come", "straight", "out"], + ["i", "like", "2", "guys", "well", "i", "d", "just", "come", "straight", "out"], + [0, 1, 2, 3, None, 4, None, 7, 8, 9, 10])]) +def test_gold_lev_align(cand, gold, result): + assert align(cand, gold) == result diff --git a/spacy/tests/lang/en/test_tagger.py b/spacy/tests/lang/en/test_tagger.py index 0959ba7c7..a77e6d636 100644 --- a/spacy/tests/lang/en/test_tagger.py +++ b/spacy/tests/lang/en/test_tagger.py @@ -2,9 +2,9 @@ from __future__ import unicode_literals from ....parts_of_speech import SPACE -from ....compat import unicode_ from ...util import get_doc +import six import pytest @@ -24,8 +24,8 @@ def test_tag_names(EN): text = "I ate pizzas with anchovies." doc = EN(text, disable=['parser']) assert type(doc[2].pos) == int - assert isinstance(doc[2].pos_, unicode_) - assert isinstance(doc[2].dep_, unicode_) + assert isinstance(doc[2].pos_, six.text_type) + assert isinstance(doc[2].dep_, six.text_type) assert doc[2].tag_ == u'NNS' diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py deleted file mode 100644 index 5f3a553e2..000000000 --- a/spacy/tests/parser/test_arc_eager_oracle.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import unicode_literals -from ...vocab import Vocab -from ...pipeline import DependencyParser -from ...tokens import Doc -from ...gold import GoldParse -from ...syntax.nonproj import projectivize - -annot_tuples = [ - (0, 'When', 'WRB', 11, 'advmod', 'O'), - (1, 'Walter', 'NNP', 2, 'compound', 'B-PERSON'), - (2, 'Rodgers', 'NNP', 11, 'nsubj', 'L-PERSON'), - (3, ',', ',', 2, 'punct', 'O'), - (4, 'our', 'PRP$', 6, 'poss', 'O'), - (5, 'embedded', 'VBN', 6, 'amod', 'O'), - (6, 'reporter', 'NN', 2, 'appos', 'O'), - (7, 'with', 'IN', 6, 'prep', 'O'), - (8, 'the', 'DT', 10, 'det', 'B-ORG'), - (9, '3rd', 'NNP', 10, 'compound', 'I-ORG'), - (10, 'Cavalry', 'NNP', 7, 'pobj', 'L-ORG'), - (11, 'says', 'VBZ', 44, 'advcl', 'O'), - (12, 'three', 'CD', 13, 'nummod', 'U-CARDINAL'), - (13, 'battalions', 'NNS', 16, 'nsubj', 'O'), - (14, 'of', 'IN', 13, 'prep', 'O'), - (15, 'troops', 'NNS', 14, 'pobj', 'O'), - (16, 'are', 'VBP', 11, 'ccomp', 'O'), - (17, 'on', 'IN', 16, 'prep', 'O'), - (18, 'the', 'DT', 19, 'det', 'O'), - (19, 'ground', 'NN', 17, 'pobj', 'O'), - (20, ',', ',', 17, 'punct', 'O'), - (21, 'inside', 'IN', 17, 'prep', 'O'), - (22, 'Baghdad', 'NNP', 21, 'pobj', 'U-GPE'), - (23, 'itself', 'PRP', 22, 'appos', 'O'), - (24, ',', ',', 16, 'punct', 'O'), - (25, 'have', 'VBP', 26, 'aux', 'O'), - (26, 'taken', 'VBN', 16, 'dep', 'O'), - (27, 'up', 'RP', 26, 'prt', 'O'), - (28, 'positions', 'NNS', 26, 'dobj', 'O'), - (29, 'they', 'PRP', 31, 'nsubj', 'O'), - (30, "'re", 'VBP', 31, 'aux', 'O'), - (31, 'going', 'VBG', 26, 'parataxis', 'O'), - (32, 'to', 'TO', 33, 'aux', 'O'), - (33, 'spend', 'VB', 31, 'xcomp', 'O'), - (34, 'the', 'DT', 35, 'det', 'B-TIME'), - (35, 'night', 'NN', 33, 'dobj', 'L-TIME'), - (36, 'there', 'RB', 33, 'advmod', 'O'), - (37, 'presumably', 'RB', 33, 'advmod', 'O'), - (38, ',', ',', 44, 'punct', 'O'), - (39, 'how', 'WRB', 40, 'advmod', 'O'), - (40, 'many', 'JJ', 41, 'amod', 'O'), - (41, 'soldiers', 'NNS', 44, 'pobj', 'O'), - (42, 'are', 'VBP', 44, 'aux', 'O'), - (43, 'we', 'PRP', 44, 'nsubj', 'O'), - (44, 'talking', 'VBG', 44, 'ROOT', 'O'), - (45, 'about', 'IN', 44, 'prep', 'O'), - (46, 'right', 'RB', 47, 'advmod', 'O'), - (47, 'now', 'RB', 44, 'advmod', 'O'), - (48, '?', '.', 44, 'punct', 'O')] - -def test_get_oracle_actions(): - doc = Doc(Vocab(), words=[t[1] for t in annot_tuples]) - parser = DependencyParser(doc.vocab) - parser.moves.add_action(0, '') - parser.moves.add_action(1, '') - parser.moves.add_action(1, '') - parser.moves.add_action(4, 'ROOT') - for i, (id_, word, tag, head, dep, ent) in enumerate(annot_tuples): - if head > i: - parser.moves.add_action(2, dep) - elif head < i: - parser.moves.add_action(3, dep) - ids, words, tags, heads, deps, ents = zip(*annot_tuples) - heads, deps = projectivize(heads, deps) - gold = GoldParse(doc, words=words, tags=tags, heads=heads, deps=deps) - parser.moves.preprocess_gold(gold) - actions = parser.moves.get_oracle_sequence(doc, gold) diff --git a/spacy/tests/regression/test_issue1450.py b/spacy/tests/regression/test_issue1450.py index 3cfec349f..6f1d4f568 100644 --- a/spacy/tests/regression/test_issue1450.py +++ b/spacy/tests/regression/test_issue1450.py @@ -13,8 +13,8 @@ from ...vocab import Vocab ('a b', 0, 2), ('a c', 0, 1), ('a b c', 0, 2), - ('a b b c', 0, 3), - ('a b b', 0, 3), + ('a b b c', 0, 2), + ('a b b', 0, 2), ] ) def test_issue1450_matcher_end_zero_plus(string, start, end): @@ -54,6 +54,5 @@ def test_issue1450_matcher_end_zero_plus(string, start, end): if start is None or end is None: assert matches == [] - print(matches) - assert matches[-1][1] == start - assert matches[-1][2] == end + assert matches[0][1] == start + assert matches[0][2] == end diff --git a/spacy/tests/regression/test_issue1855.py b/spacy/tests/regression/test_issue1855.py deleted file mode 100644 index b12b5c251..000000000 --- a/spacy/tests/regression/test_issue1855.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals -import re - -from ...matcher import Matcher - -import pytest - -pattern1 = [{'ORTH':'A','OP':'1'},{'ORTH':'A','OP':'*'}] -pattern2 = [{'ORTH':'A','OP':'*'},{'ORTH':'A','OP':'1'}] -pattern3 = [{'ORTH':'A','OP':'1'},{'ORTH':'A','OP':'1'}] -pattern4 = [{'ORTH':'B','OP':'1'},{'ORTH':'A','OP':'*'},{'ORTH':'B','OP':'1'}] -pattern5 = [{'ORTH':'B','OP':'*'},{'ORTH':'A','OP':'*'},{'ORTH':'B','OP':'1'}] - -re_pattern1 = 'AA*' -re_pattern2 = 'A*A' -re_pattern3 = 'AA' -re_pattern4 = 'BA*B' -re_pattern5 = 'B*A*B' - -@pytest.fixture -def text(): - return "(ABBAAAAAB)." - -@pytest.fixture -def doc(en_tokenizer,text): - doc = en_tokenizer(' '.join(text)) - return doc - -@pytest.mark.xfail -@pytest.mark.parametrize('pattern,re_pattern',[ - (pattern1,re_pattern1), - (pattern2,re_pattern2), - (pattern3,re_pattern3), - (pattern4,re_pattern4), - (pattern5,re_pattern5)]) -def test_greedy_matching(doc,text,pattern,re_pattern): - """ - Test that the greedy matching behavior of the * op - is consistant with other re implementations - """ - matcher = Matcher(doc.vocab) - matcher.add(re_pattern,None,pattern) - matches = matcher(doc) - re_matches = [m.span() for m in re.finditer(re_pattern,text)] - for match,re_match in zip(matches,re_matches): - assert match[1:]==re_match - -@pytest.mark.xfail -@pytest.mark.parametrize('pattern,re_pattern',[ - (pattern1,re_pattern1), - (pattern2,re_pattern2), - (pattern3,re_pattern3), - (pattern4,re_pattern4), - (pattern5,re_pattern5)]) -def test_match_consuming(doc,text,pattern,re_pattern): - """ - Test that matcher.__call__ consumes tokens on a match - similar to re.findall - """ - matcher = Matcher(doc.vocab) - matcher.add(re_pattern,None,pattern) - matches = matcher(doc) - re_matches = [m.span() for m in re.finditer(re_pattern,text)] - assert len(matches)==len(re_matches) diff --git a/spacy/tests/regression/test_issue1889.py b/spacy/tests/regression/test_issue1889.py deleted file mode 100644 index a0e20abcf..000000000 --- a/spacy/tests/regression/test_issue1889.py +++ /dev/null @@ -1,11 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals -from ...lang.lex_attrs import is_stop -from ...lang.en.stop_words import STOP_WORDS - -import pytest - - -@pytest.mark.parametrize('word', ['the']) -def test_lex_attrs_stop_words_case_sensitivity(word): - assert is_stop(word, STOP_WORDS) == is_stop(word.upper(), STOP_WORDS) diff --git a/spacy/tests/regression/test_issue1945.py b/spacy/tests/regression/test_issue1945.py index 052f699fb..3b3179f64 100644 --- a/spacy/tests/regression/test_issue1945.py +++ b/spacy/tests/regression/test_issue1945.py @@ -6,6 +6,7 @@ from ...vocab import Vocab from ...tokens import Doc from ...matcher import Matcher +@pytest.mark.xfail def test_issue1945(): text = "a a a" matcher = Matcher(Vocab()) diff --git a/spacy/tests/regression/test_issue850.py b/spacy/tests/regression/test_issue850.py index e83b4d8af..01bc19fb9 100644 --- a/spacy/tests/regression/test_issue850.py +++ b/spacy/tests/regression/test_issue850.py @@ -22,9 +22,10 @@ def test_basic_case(): assert end == 4 +@pytest.mark.xfail def test_issue850(): - """The variable-length pattern matches the - succeeding token. Check we handle the ambiguity correctly.""" + """The problem here is that the variable-length pattern matches the + succeeding token. We then don't handle the ambiguity correctly.""" matcher = Matcher(Vocab( lex_attr_getters={LOWER: lambda string: string.lower()})) IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True) diff --git a/spacy/tests/test_align.py b/spacy/tests/test_align.py deleted file mode 100644 index 758808f6a..000000000 --- a/spacy/tests/test_align.py +++ /dev/null @@ -1,66 +0,0 @@ -from __future__ import unicode_literals -import pytest -from .._align import align, multi_align - - -@pytest.mark.parametrize('string1,string2,cost', [ - ('hello', 'hell', 1), - ('rat', 'cat', 1), - ('rat', 'rat', 0), - ('rat', 'catsie', 4), - ('t', 'catsie', 5), -]) -def test_align_costs(string1, string2, cost): - output_cost, i2j, j2i, matrix = align(string1, string2) - assert output_cost == cost - - -@pytest.mark.parametrize('string1,string2,i2j', [ - ('hello', 'hell', [0,1,2,3,-1]), - ('rat', 'cat', [0,1,2]), - ('rat', 'rat', [0,1,2]), - ('rat', 'catsie', [0,1,2]), - ('t', 'catsie', [2]), -]) -def test_align_i2j(string1, string2, i2j): - output_cost, output_i2j, j2i, matrix = align(string1, string2) - assert list(output_i2j) == i2j - - -@pytest.mark.parametrize('string1,string2,j2i', [ - ('hello', 'hell', [0,1,2,3]), - ('rat', 'cat', [0,1,2]), - ('rat', 'rat', [0,1,2]), - ('rat', 'catsie', [0,1,2, -1, -1, -1]), - ('t', 'catsie', [-1, -1, 0, -1, -1, -1]), -]) -def test_align_i2j(string1, string2, j2i): - output_cost, output_i2j, output_j2i, matrix = align(string1, string2) - assert list(output_j2i) == j2i - -def test_align_strings(): - words1 = ['hello', 'this', 'is', 'test!'] - words2 = ['hellothis', 'is', 'test', '!'] - cost, i2j, j2i, matrix = align(words1, words2) - assert cost == 4 - assert list(i2j) == [-1, -1, 1, -1] - assert list(j2i) == [-1, 2, -1, -1] - -def test_align_many_to_one(): - words1 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] - words2 = ['ab', 'bc', 'e', 'fg', 'h'] - cost, i2j, j2i, matrix = align(words1, words2) - assert list(i2j) == [-1, -1, -1, -1, 2, -1, -1, 4] - lengths1 = [len(w) for w in words1] - lengths2 = [len(w) for w in words2] - i2j_multi, j2i_multi = multi_align(i2j, j2i, lengths1, lengths2) - assert i2j_multi[0] == 0 - assert i2j_multi[1] == 0 - assert i2j_multi[2] == 1 - assert i2j_multi[3] == 1 - assert i2j_multi[3] == 1 - assert i2j_multi[5] == 3 - assert i2j_multi[6] == 3 - - assert j2i_multi[0] == 1 - assert j2i_multi[1] == 3 diff --git a/spacy/tests/test_matcher.py b/spacy/tests/test_matcher.py index 816243e13..8210467ea 100644 --- a/spacy/tests/test_matcher.py +++ b/spacy/tests/test_matcher.py @@ -3,17 +3,12 @@ from __future__ import unicode_literals from ..matcher import Matcher, PhraseMatcher from .util import get_doc -from ..util import get_lang_class from ..tokens import Doc import pytest -@pytest.fixture(scope="session") -def en_vocab(): - return get_lang_class('en').Defaults.create_vocab() - -@pytest.fixture(scope="session") +@pytest.fixture def matcher(en_vocab): rules = { 'JS': [[{'ORTH': 'JavaScript'}]], @@ -26,196 +21,187 @@ def matcher(en_vocab): return matcher -#def test_matcher_from_api_docs(en_vocab): -# matcher = Matcher(en_vocab) -# pattern = [{'ORTH': 'test'}] -# assert len(matcher) == 0 -# matcher.add('Rule', None, pattern) -# assert len(matcher) == 1 -# matcher.remove('Rule') -# assert 'Rule' not in matcher -# matcher.add('Rule', None, pattern) -# assert 'Rule' in matcher -# on_match, patterns = matcher.get('Rule') -# assert len(patterns[0]) -# -# -#def test_matcher_from_usage_docs(en_vocab): -# text = "Wow 😀 This is really cool! 😂 😂" -# doc = get_doc(en_vocab, words=text.split(' ')) -# pos_emoji = [u'😀', u'😃', u'😂', u'🤣', u'😊', u'😍'] -# pos_patterns = [[{'ORTH': emoji}] for emoji in pos_emoji] -# -# def label_sentiment(matcher, doc, i, matches): -# match_id, start, end = matches[i] -# if doc.vocab.strings[match_id] == 'HAPPY': -# doc.sentiment += 0.1 -# span = doc[start : end] -# token = span.merge() -# token.vocab[token.text].norm_ = 'happy emoji' -# -# matcher = Matcher(en_vocab) -# matcher.add('HAPPY', label_sentiment, *pos_patterns) -# matches = matcher(doc) -# assert doc.sentiment != 0 -# assert doc[1].norm_ == 'happy emoji' +def test_matcher_from_api_docs(en_vocab): + matcher = Matcher(en_vocab) + pattern = [{'ORTH': 'test'}] + assert len(matcher) == 0 + matcher.add('Rule', None, pattern) + assert len(matcher) == 1 + matcher.remove('Rule') + assert 'Rule' not in matcher + matcher.add('Rule', None, pattern) + assert 'Rule' in matcher + on_match, patterns = matcher.get('Rule') + assert len(patterns[0]) -#@pytest.mark.parametrize('words', [["Some", "words"]]) -#def test_matcher_init(en_vocab, words): -# matcher = Matcher(en_vocab) -# doc = get_doc(en_vocab, words) -# assert len(matcher) == 0 -# assert matcher(doc) == [] -# -# -#def test_matcher_contains(matcher): -# matcher.add('TEST', None, [{'ORTH': 'test'}]) -# assert 'TEST' in matcher -# assert 'TEST2' not in matcher -# -# -#def test_matcher_no_match(matcher): -# words = ["I", "like", "cheese", "."] -# doc = get_doc(matcher.vocab, words) -# assert matcher(doc) == [] -# -# -#def test_matcher_compile(en_vocab): -# rules = { -# 'JS': [[{'ORTH': 'JavaScript'}]], -# 'GoogleNow': [[{'ORTH': 'Google'}, {'ORTH': 'Now'}]], -# 'Java': [[{'LOWER': 'java'}]] -# } -# matcher = Matcher(en_vocab) -# for key, patterns in rules.items(): -# matcher.add(key, None, *patterns) -# assert len(matcher) == 3 -# -# -#def test_matcher_match_start(matcher): -# words = ["JavaScript", "is", "good"] -# doc = get_doc(matcher.vocab, words) -# assert matcher(doc) == [(matcher.vocab.strings['JS'], 0, 1)] -# -# -#def test_matcher_match_end(matcher): -# words = ["I", "like", "java"] -# doc = get_doc(matcher.vocab, words) -# assert matcher(doc) == [(doc.vocab.strings['Java'], 2, 3)] -# -# -#def test_matcher_match_middle(matcher): -# words = ["I", "like", "Google", "Now", "best"] -# doc = get_doc(matcher.vocab, words) -# assert matcher(doc) == [(doc.vocab.strings['GoogleNow'], 2, 4)] -# -# -#def test_matcher_match_multi(matcher): -# words = ["I", "like", "Google", "Now", "and", "java", "best"] -# doc = get_doc(matcher.vocab, words) -# assert matcher(doc) == [(doc.vocab.strings['GoogleNow'], 2, 4), -# (doc.vocab.strings['Java'], 5, 6)] -# -# -#def test_matcher_empty_dict(en_vocab): -# '''Test matcher allows empty token specs, meaning match on any token.''' -# matcher = Matcher(en_vocab) -# abc = ["a", "b", "c"] -# doc = get_doc(matcher.vocab, abc) -# matcher.add('A.C', None, [{'ORTH': 'a'}, {}, {'ORTH': 'c'}]) -# matches = matcher(doc) -# assert len(matches) == 1 -# assert matches[0][1:] == (0, 3) -# matcher = Matcher(en_vocab) -# matcher.add('A.', None, [{'ORTH': 'a'}, {}]) -# matches = matcher(doc) -# assert matches[0][1:] == (0, 2) -# -# -#def test_matcher_operator_shadow(en_vocab): -# matcher = Matcher(en_vocab) -# abc = ["a", "b", "c"] -# doc = get_doc(matcher.vocab, abc) -# matcher.add('A.C', None, [{'ORTH': 'a'}, -# {"IS_ALPHA": True, "OP": "+"}, -# {'ORTH': 'c'}]) -# matches = matcher(doc) -# assert len(matches) == 1 -# assert matches[0][1:] == (0, 3) -# -# -#def test_matcher_phrase_matcher(en_vocab): -# words = ["Google", "Now"] -# doc = get_doc(en_vocab, words) -# matcher = PhraseMatcher(en_vocab) -# matcher.add('COMPANY', None, doc) -# words = ["I", "like", "Google", "Now", "best"] -# doc = get_doc(en_vocab, words) -# assert len(matcher(doc)) == 1 -# -# -#def test_phrase_matcher_length(en_vocab): -# matcher = PhraseMatcher(en_vocab) -# assert len(matcher) == 0 -# matcher.add('TEST', None, get_doc(en_vocab, ['test'])) -# assert len(matcher) == 1 -# matcher.add('TEST2', None, get_doc(en_vocab, ['test2'])) -# assert len(matcher) == 2 -# -# -#def test_phrase_matcher_contains(en_vocab): -# matcher = PhraseMatcher(en_vocab) -# matcher.add('TEST', None, get_doc(en_vocab, ['test'])) -# assert 'TEST' in matcher -# assert 'TEST2' not in matcher -# -# -#def test_matcher_match_zero(matcher): -# words1 = 'He said , " some words " ...'.split() -# words2 = 'He said , " some three words " ...'.split() -# pattern1 = [{'ORTH': '"'}, -# {'OP': '!', 'IS_PUNCT': True}, -# {'OP': '!', 'IS_PUNCT': True}, -# {'ORTH': '"'}] -# pattern2 = [{'ORTH': '"'}, -# {'IS_PUNCT': True}, -# {'IS_PUNCT': True}, -# {'IS_PUNCT': True}, -# {'ORTH': '"'}] -# -# matcher.add('Quote', None, pattern1) -# doc = get_doc(matcher.vocab, words1) -# assert len(matcher(doc)) == 1 -# -# doc = get_doc(matcher.vocab, words2) -# assert len(matcher(doc)) == 0 -# matcher.add('Quote', None, pattern2) -# assert len(matcher(doc)) == 0 -# -# -#def test_matcher_match_zero_plus(matcher): -# words = 'He said , " some words " ...'.split() -# pattern = [{'ORTH': '"'}, -# {'OP': '*', 'IS_PUNCT': False}, -# {'ORTH': '"'}] -# matcher = Matcher(matcher.vocab) -# matcher.add('Quote', None, pattern) -# doc = get_doc(matcher.vocab, words) -# assert len(matcher(doc)) == 1 -# -# -#def test_matcher_match_one_plus(matcher): -# control = Matcher(matcher.vocab) -# control.add('BasicPhilippe', None, [{'ORTH': 'Philippe'}]) -# doc = get_doc(control.vocab, ['Philippe', 'Philippe']) -# m = control(doc) -# assert len(m) == 2 -# matcher.add('KleenePhilippe', None, [{'ORTH': 'Philippe', 'OP': '1'}, -# {'ORTH': 'Philippe', 'OP': '+'}]) -# m = matcher(doc) -# assert len(m) == 1 -# +def test_matcher_from_usage_docs(en_vocab): + text = "Wow 😀 This is really cool! 😂 😂" + doc = get_doc(en_vocab, words=text.split(' ')) + pos_emoji = [u'😀', u'😃', u'😂', u'🤣', u'😊', u'😍'] + pos_patterns = [[{'ORTH': emoji}] for emoji in pos_emoji] + + def label_sentiment(matcher, doc, i, matches): + match_id, start, end = matches[i] + if doc.vocab.strings[match_id] == 'HAPPY': + doc.sentiment += 0.1 + span = doc[start : end] + token = span.merge() + token.vocab[token.text].norm_ = 'happy emoji' + + matcher = Matcher(en_vocab) + matcher.add('HAPPY', label_sentiment, *pos_patterns) + matches = matcher(doc) + assert doc.sentiment != 0 + assert doc[1].norm_ == 'happy emoji' + + +@pytest.mark.parametrize('words', [["Some", "words"]]) +def test_matcher_init(en_vocab, words): + matcher = Matcher(en_vocab) + doc = get_doc(en_vocab, words) + assert len(matcher) == 0 + assert matcher(doc) == [] + + +def test_matcher_contains(matcher): + matcher.add('TEST', None, [{'ORTH': 'test'}]) + assert 'TEST' in matcher + assert 'TEST2' not in matcher + + +def test_matcher_no_match(matcher): + words = ["I", "like", "cheese", "."] + doc = get_doc(matcher.vocab, words) + assert matcher(doc) == [] + + +def test_matcher_compile(matcher): + assert len(matcher) == 3 + + +def test_matcher_match_start(matcher): + words = ["JavaScript", "is", "good"] + doc = get_doc(matcher.vocab, words) + assert matcher(doc) == [(matcher.vocab.strings['JS'], 0, 1)] + + +def test_matcher_match_end(matcher): + words = ["I", "like", "java"] + doc = get_doc(matcher.vocab, words) + assert matcher(doc) == [(doc.vocab.strings['Java'], 2, 3)] + + +def test_matcher_match_middle(matcher): + words = ["I", "like", "Google", "Now", "best"] + doc = get_doc(matcher.vocab, words) + assert matcher(doc) == [(doc.vocab.strings['GoogleNow'], 2, 4)] + + +def test_matcher_match_multi(matcher): + words = ["I", "like", "Google", "Now", "and", "java", "best"] + doc = get_doc(matcher.vocab, words) + assert matcher(doc) == [(doc.vocab.strings['GoogleNow'], 2, 4), + (doc.vocab.strings['Java'], 5, 6)] + + +def test_matcher_empty_dict(en_vocab): + '''Test matcher allows empty token specs, meaning match on any token.''' + matcher = Matcher(en_vocab) + abc = ["a", "b", "c"] + doc = get_doc(matcher.vocab, abc) + matcher.add('A.C', None, [{'ORTH': 'a'}, {}, {'ORTH': 'c'}]) + matches = matcher(doc) + assert len(matches) == 1 + assert matches[0][1:] == (0, 3) + matcher = Matcher(en_vocab) + matcher.add('A.', None, [{'ORTH': 'a'}, {}]) + matches = matcher(doc) + assert matches[0][1:] == (0, 2) + + +def test_matcher_operator_shadow(en_vocab): + matcher = Matcher(en_vocab) + abc = ["a", "b", "c"] + doc = get_doc(matcher.vocab, abc) + matcher.add('A.C', None, [{'ORTH': 'a'}, + {"IS_ALPHA": True, "OP": "+"}, + {'ORTH': 'c'}]) + matches = matcher(doc) + assert len(matches) == 1 + assert matches[0][1:] == (0, 3) + + +def test_matcher_phrase_matcher(en_vocab): + words = ["Google", "Now"] + doc = get_doc(en_vocab, words) + matcher = PhraseMatcher(en_vocab) + matcher.add('COMPANY', None, doc) + words = ["I", "like", "Google", "Now", "best"] + doc = get_doc(en_vocab, words) + assert len(matcher(doc)) == 1 + + +def test_phrase_matcher_length(en_vocab): + matcher = PhraseMatcher(en_vocab) + assert len(matcher) == 0 + matcher.add('TEST', None, get_doc(en_vocab, ['test'])) + assert len(matcher) == 1 + matcher.add('TEST2', None, get_doc(en_vocab, ['test2'])) + assert len(matcher) == 2 + + +def test_phrase_matcher_contains(en_vocab): + matcher = PhraseMatcher(en_vocab) + matcher.add('TEST', None, get_doc(en_vocab, ['test'])) + assert 'TEST' in matcher + assert 'TEST2' not in matcher + + +def test_matcher_match_zero(matcher): + words1 = 'He said , " some words " ...'.split() + words2 = 'He said , " some three words " ...'.split() + pattern1 = [{'ORTH': '"'}, + {'OP': '!', 'IS_PUNCT': True}, + {'OP': '!', 'IS_PUNCT': True}, + {'ORTH': '"'}] + pattern2 = [{'ORTH': '"'}, + {'IS_PUNCT': True}, + {'IS_PUNCT': True}, + {'IS_PUNCT': True}, + {'ORTH': '"'}] + + matcher.add('Quote', None, pattern1) + doc = get_doc(matcher.vocab, words1) + assert len(matcher(doc)) == 1 + + doc = get_doc(matcher.vocab, words2) + assert len(matcher(doc)) == 0 + matcher.add('Quote', None, pattern2) + assert len(matcher(doc)) == 0 + + +def test_matcher_match_zero_plus(matcher): + words = 'He said , " some words " ...'.split() + pattern = [{'ORTH': '"'}, + {'OP': '*', 'IS_PUNCT': False}, + {'ORTH': '"'}] + matcher.add('Quote', None, pattern) + doc = get_doc(matcher.vocab, words) + assert len(matcher(doc)) == 1 + + +def test_matcher_match_one_plus(matcher): + control = Matcher(matcher.vocab) + control.add('BasicPhilippe', None, [{'ORTH': 'Philippe'}]) + doc = get_doc(control.vocab, ['Philippe', 'Philippe']) + m = control(doc) + assert len(m) == 2 + matcher.add('KleenePhilippe', None, [{'ORTH': 'Philippe', 'OP': '1'}, + {'ORTH': 'Philippe', 'OP': '+'}]) + m = matcher(doc) + assert len(m) == 1 + def test_operator_combos(matcher): cases = [ @@ -266,8 +252,9 @@ def test_matcher_end_zero_plus(matcher): ) nlp = lambda string: Doc(matcher.vocab, words=string.split()) assert len(matcher(nlp(u'a'))) == 1 - assert len(matcher(nlp(u'a b'))) == 2 + assert len(matcher(nlp(u'a b'))) == 1 + assert len(matcher(nlp(u'a b'))) == 1 assert len(matcher(nlp(u'a c'))) == 1 - assert len(matcher(nlp(u'a b c'))) == 2 - assert len(matcher(nlp(u'a b b c'))) == 3 - assert len(matcher(nlp(u'a b b'))) == 3 + assert len(matcher(nlp(u'a b c'))) == 1 + assert len(matcher(nlp(u'a b b c'))) == 1 + assert len(matcher(nlp(u'a b b'))) == 1 diff --git a/spacy/tests/test_textcat.py b/spacy/tests/test_textcat.py deleted file mode 100644 index b6c9d820f..000000000 --- a/spacy/tests/test_textcat.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import unicode_literals -import random -import numpy.random - -from ..pipeline import TextCategorizer -from ..lang.en import English -from ..vocab import Vocab -from ..tokens import Doc -from ..gold import GoldParse - - -def test_textcat_learns_multilabel(): - random.seed(0) - numpy.random.seed(0) - docs = [] - nlp = English() - vocab = nlp.vocab - letters = ['a', 'b', 'c'] - for w1 in letters: - for w2 in letters: - cats = {letter: float(w2==letter) for letter in letters} - docs.append((Doc(vocab, words=['d']*3 + [w1, w2] + ['d']*3), cats)) - random.shuffle(docs) - model = TextCategorizer(vocab, width=8) - for letter in letters: - model.add_label(letter) - optimizer = model.begin_training() - for i in range(30): - losses = {} - Ys = [GoldParse(doc, cats=cats) for doc, cats in docs] - Xs = [doc for doc, cats in docs] - model.update(Xs, Ys, sgd=optimizer, losses=losses) - random.shuffle(docs) - for w1 in letters: - for w2 in letters: - doc = Doc(vocab, words=['d']*3 + [w1, w2] + ['d']*3) - truth = {letter: w2==letter for letter in letters} - model(doc) - for cat, score in doc.cats.items(): - if not truth[cat]: - assert score < 0.5 - else: - assert score > 0.5 - diff --git a/spacy/tokens/doc.pxd b/spacy/tokens/doc.pxd index 28b4a4e10..f34c455c6 100644 --- a/spacy/tokens/doc.pxd +++ b/spacy/tokens/doc.pxd @@ -19,9 +19,6 @@ ctypedef fused LexemeOrToken: const_TokenC_ptr -cdef int set_children_from_heads(TokenC* tokens, int length) except -1 - - cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2 diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index e3fbb4552..098800470 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -186,20 +186,6 @@ cdef class Doc: def _(self): return Underscore(Underscore.doc_extensions, self) - @property - def is_sentenced(self): - # Check if the document has sentence boundaries, - # i.e at least one tok has the sent_start in (-1, 1) - if 'sents' in self.user_hooks: - return True - if self.is_parsed: - return True - for i in range(self.length): - if self.c[i].sent_start == -1 or self.c[i].sent_start == 1: - return True - else: - return False - def __getitem__(self, object i): """Get a `Token` or `Span` object. @@ -531,23 +517,29 @@ cdef class Doc: >>> assert [s.root.text for s in doc.sents] == ["is", "'s"] """ def __get__(self): - if not self.is_sentenced: - raise ValueError( - "Sentence boundaries unset. You can add the 'sentencizer' " - "component to the pipeline with: " - "nlp.add_pipe(nlp.create_pipe('sentencizer')) " - "Alternatively, add the dependency parser, or set " - "sentence boundaries by setting doc[i].sent_start") if 'sents' in self.user_hooks: yield from self.user_hooks['sents'](self) - else: - start = 0 + return + + cdef int i + if not self.is_parsed: for i in range(1, self.length): - if self.c[i].sent_start == 1: - yield Span(self, start, i) - start = i - if start != self.length: - yield Span(self, start, self.length) + if self.c[i].sent_start != 0: + break + else: + raise ValueError( + "Sentence boundaries unset. You can add the 'sentencizer' " + "component to the pipeline with: " + "nlp.add_pipe(nlp.create_pipe('sentencizer')) " + "Alternatively, add the dependency parser, or set " + "sentence boundaries by setting doc[i].sent_start") + start = 0 + for i in range(1, self.length): + if self.c[i].sent_start == 1: + yield Span(self, start, i) + start = i + if start != self.length: + yield Span(self, start, self.length) cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1: if self.length == 0: diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index aa085b59f..10d9660e7 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -285,42 +285,16 @@ cdef class Span: def __get__(self): if 'sent' in self.doc.user_span_hooks: return self.doc.user_span_hooks['sent'](self) - # This should raise if we're not parsed - # or doesen't have any sbd component :) + # This should raise if we're not parsed. self.doc.sents - # if doc is parsed we can use the deps to find the sentence - # otherwise we use the `sent_start` token attribute cdef int n = 0 - cdef int i - if self.doc.is_parsed: - root = &self.doc.c[self.start] - n = 0 - while root.head != 0: - root += root.head - n += 1 - if n >= self.doc.length: - raise RuntimeError - return self.doc[root.l_edge:root.r_edge + 1] - elif self.doc.is_sentenced: - # find start of the sentence - start = self.start - while self.doc.c[start].sent_start != 1 and start > 0: - start += -1 - # find end of the sentence - end = self.end - n = 0 - while end < self.doc.length and self.doc.c[end].sent_start != 1: - end += 1 - n += 1 - if n >= self.doc.length: - break - # - return self.doc[start:end] - else: - raise ValueError( - "Access to sentence requires either the dependency parse " - "or sentence boundaries to be set by setting " + - "doc[i].is_sent_start = True") + root = &self.doc.c[self.start] + while root.head != 0: + root += root.head + n += 1 + if n >= self.doc.length: + raise RuntimeError + return self.doc[root.l_edge:root.r_edge + 1] property has_vector: """RETURNS (bool): Whether a word vector is associated with the object. diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index cdd07ebc0..038f60954 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -34,11 +34,11 @@ cdef class Token: @classmethod def get_extension(cls, name): - return Underscore.span_extensions.get(name) + return Underscore.token_extensions.get(name) @classmethod def has_extension(cls, name): - return name in Underscore.span_extensions + return name in Underscore.token_extensions def __cinit__(self, Vocab vocab, Doc doc, int offset): """Construct a `Token` object. diff --git a/spacy/util.py b/spacy/util.py index 73d314e27..dc51e467d 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -436,29 +436,6 @@ def decaying(start, stop, decay): nr_upd += 1 -def minibatch_by_words(items, size, count_words=len): - '''Create minibatches of a given number of words.''' - if isinstance(size, int): - size_ = itertools.repeat(size) - else: - size_ = size - items = iter(items) - while True: - batch_size = next(size_) - batch = [] - while batch_size >= 0: - try: - doc, gold = next(items) - except StopIteration: - if batch: - yield batch - return - batch_size -= count_words(doc) - batch.append((doc, gold)) - if batch: - yield batch - - def itershuffle(iterable, bufsize=1000): """Shuffle an iterator. This works by holding `bufsize` items back and yielding them sometime later. Obviously, this is not unbiased – @@ -474,7 +451,7 @@ def itershuffle(iterable, bufsize=1000): try: while True: for i in range(random.randint(1, bufsize-len(buf))): - buf.append(next(iterable)) + buf.append(iterable.next()) random.shuffle(buf) for i in range(random.randint(1, bufsize)): if buf: diff --git a/website/usage/resources.jade b/website/usage/resources.jade index 8766d3864..4b29a7831 100644 --- a/website/usage/resources.jade +++ b/website/usage/resources.jade @@ -120,6 +120,9 @@ include ../_includes/_mixins | A Practical Real-World Approach to Gaining Actionable Insights | from your Data + +card("Practical Machine Learning with Python", "", "Dipanjan Sarkar et al. (Apress, 2017)", "book") + | A Problem-Solver's Guide to Building Real-World Intelligent Systems + +section("notebooks") +h(2, "notebooks") Jupyter notebooks